ba9cddf9a1
- Stripe: StripeClient.php, checkout/portal/webhook endpoints, idempotent event handling - FreeTier: tier-aware credits (free/light/pro/pro_plus), bonus_balance, hourly caps per tier - pricing.php + billing.php: 4-tier cards, 3 topups, Customer Portal, balance breakdown - Min Sak: CaseStore.php, AzureDocIntelligence.php, AzureSearchAdmin.php — per-user hybrid RAG - api/case/: upload, list, delete, ingest-callback (HMAC-auth'd from n8n) - award-survey-credits: inter-site HMAC endpoint for dobetternorge.no survey bonus - dashboard.php: tier badge, balance breakdown card, Min Sak CTA, survey CTA - KorrespondAgent + all 3 other agents: use_my_case toggle wired to dbnToolsCaseContext() - bootstrap.php: dbnToolsCaseContext(), dbnToolsIntersiteSecret(), dbnToolsCurrentTier() Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
205 lines
8.4 KiB
PHP
205 lines
8.4 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
/**
|
|
* Per-user Azure AI Search index management.
|
|
*
|
|
* Each user gets their own private index: case-{user_id}
|
|
* - Hybrid search (BM25 + vector + Norwegian Bokmål analyzer nb.microsoft)
|
|
* - Vector dim 1536 (text-embedding-3-small)
|
|
* - Schema: id, doc_id, user_id, filename, page, chunk_text, vector, doc_type, detected_date
|
|
*
|
|
* Isolation enforced at INDEX level — no shared index. Cross-user data leak is structurally impossible.
|
|
*/
|
|
final class AzureSearchAdmin
|
|
{
|
|
private const API_VERSION = '2024-07-01';
|
|
private string $endpoint;
|
|
private string $adminKey;
|
|
|
|
public function __construct(?string $endpoint = null, ?string $adminKey = null)
|
|
{
|
|
$cfg = self::loadConfig();
|
|
$this->endpoint = rtrim($endpoint ?? ($cfg['endpoint'] ?? ''), '/');
|
|
$this->adminKey = $adminKey ?? ($cfg['admin_key'] ?? '');
|
|
if ($this->endpoint === '' || $this->adminKey === '') {
|
|
throw new RuntimeException('AzureSearchAdmin: endpoint or admin key not configured.');
|
|
}
|
|
}
|
|
|
|
private static function loadConfig(): array
|
|
{
|
|
$path = '/etc/bnl/azure.php';
|
|
if (is_readable($path)) {
|
|
$cfg = require $path;
|
|
return [
|
|
'endpoint' => (string)($cfg['SEARCH_ENDPOINT'] ?? 'https://bnl-legal-search.search.windows.net'),
|
|
'admin_key' => (string)($cfg['SEARCH_ADMIN_KEY'] ?? ''),
|
|
];
|
|
}
|
|
return [
|
|
'endpoint' => (string)(getenv('AZURE_SEARCH_ENDPOINT') ?: 'https://bnl-legal-search.search.windows.net'),
|
|
'admin_key' => (string)(getenv('AZURE_SEARCH_ADMIN_KEY') ?: ''),
|
|
];
|
|
}
|
|
|
|
public static function indexName(int $userId): string
|
|
{
|
|
return 'case-' . $userId;
|
|
}
|
|
|
|
/** Create the per-user index if it does not exist. Idempotent. */
|
|
public function ensureUserIndex(int $userId): string
|
|
{
|
|
$name = self::indexName($userId);
|
|
if ($this->indexExists($name)) {
|
|
return $name;
|
|
}
|
|
$body = [
|
|
'name' => $name,
|
|
'fields' => [
|
|
['name' => 'id', 'type' => 'Edm.String', 'key' => true, 'filterable' => true],
|
|
['name' => 'doc_id', 'type' => 'Edm.Int32', 'filterable' => true, 'facetable' => true],
|
|
['name' => 'user_id', 'type' => 'Edm.Int32', 'filterable' => true],
|
|
['name' => 'filename', 'type' => 'Edm.String', 'filterable' => true, 'sortable' => true, 'searchable' => true, 'analyzer' => 'standard.lucene'],
|
|
['name' => 'page', 'type' => 'Edm.Int32', 'filterable' => true, 'sortable' => true],
|
|
['name' => 'chunk_text', 'type' => 'Edm.String', 'searchable' => true, 'analyzer' => 'nb.microsoft'],
|
|
['name' => 'doc_type', 'type' => 'Edm.String', 'filterable' => true, 'facetable' => true],
|
|
['name' => 'detected_date', 'type' => 'Edm.DateTimeOffset', 'filterable' => true, 'sortable' => true],
|
|
[
|
|
'name' => 'vector',
|
|
'type' => 'Collection(Edm.Single)',
|
|
'searchable' => true,
|
|
'dimensions' => 1536,
|
|
'vectorSearchProfile' => 'caseVectorProfile',
|
|
],
|
|
],
|
|
'vectorSearch' => [
|
|
'algorithms' => [[
|
|
'name' => 'caseHnsw',
|
|
'kind' => 'hnsw',
|
|
'hnswParameters' => ['m' => 4, 'efConstruction' => 400, 'efSearch' => 500, 'metric' => 'cosine'],
|
|
]],
|
|
'profiles' => [['name' => 'caseVectorProfile', 'algorithm' => 'caseHnsw']],
|
|
],
|
|
'semantic' => [
|
|
'configurations' => [[
|
|
'name' => 'caseSemantic',
|
|
'prioritizedFields' => [
|
|
'contentFields' => [['fieldName' => 'chunk_text']],
|
|
'titleField' => ['fieldName' => 'filename'],
|
|
],
|
|
]],
|
|
],
|
|
];
|
|
$this->request('PUT', '/indexes/' . rawurlencode($name) . '?api-version=' . self::API_VERSION, $body);
|
|
return $name;
|
|
}
|
|
|
|
public function indexExists(string $name): bool
|
|
{
|
|
$code = $this->request('GET', '/indexes/' . rawurlencode($name) . '?api-version=' . self::API_VERSION, null, true);
|
|
return $code === 200;
|
|
}
|
|
|
|
/** Upsert a batch of documents (chunks) into the user's index. */
|
|
public function upsertChunks(int $userId, array $chunks): void
|
|
{
|
|
if (empty($chunks)) return;
|
|
$name = self::indexName($userId);
|
|
$body = [
|
|
'value' => array_map(fn($c) => array_merge(['@search.action' => 'mergeOrUpload'], $c), $chunks),
|
|
];
|
|
$this->request('POST', '/indexes/' . rawurlencode($name) . '/docs/index?api-version=' . self::API_VERSION, $body);
|
|
}
|
|
|
|
/** Delete all chunks for a given doc_id (used on document deletion). */
|
|
public function deleteDoc(int $userId, int $docId): void
|
|
{
|
|
$name = self::indexName($userId);
|
|
// First search to get all chunk ids for this doc
|
|
$resp = $this->request('POST', '/indexes/' . rawurlencode($name) . '/docs/search?api-version=' . self::API_VERSION, [
|
|
'search' => '*',
|
|
'filter' => 'doc_id eq ' . $docId,
|
|
'select' => 'id',
|
|
'top' => 1000,
|
|
]);
|
|
$ids = array_map(fn($v) => $v['id'] ?? null, $resp['value'] ?? []);
|
|
$ids = array_filter($ids);
|
|
if (empty($ids)) return;
|
|
|
|
$body = [
|
|
'value' => array_map(fn($id) => ['@search.action' => 'delete', 'id' => $id], array_values($ids)),
|
|
];
|
|
$this->request('POST', '/indexes/' . rawurlencode($name) . '/docs/index?api-version=' . self::API_VERSION, $body);
|
|
}
|
|
|
|
/** Delete the entire index (account deletion / GDPR). */
|
|
public function deleteIndex(int $userId): void
|
|
{
|
|
$name = self::indexName($userId);
|
|
$this->request('DELETE', '/indexes/' . rawurlencode($name) . '?api-version=' . self::API_VERSION, null, true);
|
|
}
|
|
|
|
/**
|
|
* Hybrid search: BM25 (Norwegian analyzer) + vector + semantic ranker.
|
|
* Returns ['value' => [{id, doc_id, filename, page, chunk_text, @search.score, @search.rerankerScore}, ...]]
|
|
*/
|
|
public function hybridSearch(int $userId, string $query, array $queryVector, int $k = 5): array
|
|
{
|
|
$name = self::indexName($userId);
|
|
$body = [
|
|
'search' => $query,
|
|
'queryType' => 'semantic',
|
|
'semanticConfiguration' => 'caseSemantic',
|
|
'searchFields' => 'chunk_text,filename',
|
|
'select' => 'id,doc_id,filename,page,chunk_text,doc_type,detected_date',
|
|
'top' => $k,
|
|
'vectorQueries' => [[
|
|
'kind' => 'vector',
|
|
'vector' => $queryVector,
|
|
'k' => $k,
|
|
'fields' => 'vector',
|
|
]],
|
|
];
|
|
return $this->request('POST', '/indexes/' . rawurlencode($name) . '/docs/search?api-version=' . self::API_VERSION, $body);
|
|
}
|
|
|
|
/** Low-level HTTP. If $returnStatusOnly, returns http code instead of decoded body. */
|
|
private function request(string $method, string $path, ?array $body = null, bool $returnStatusOnly = false)
|
|
{
|
|
$url = $this->endpoint . $path;
|
|
$headers = [
|
|
'api-key: ' . $this->adminKey,
|
|
'Content-Type: application/json',
|
|
];
|
|
$ch = curl_init();
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_URL => $url,
|
|
CURLOPT_CUSTOMREQUEST => strtoupper($method),
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_HTTPHEADER => $headers,
|
|
CURLOPT_TIMEOUT => 30,
|
|
]);
|
|
if ($body !== null) {
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($body, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
|
|
}
|
|
$raw = curl_exec($ch);
|
|
$status = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
|
$errno = curl_errno($ch);
|
|
curl_close($ch);
|
|
|
|
if ($returnStatusOnly) {
|
|
return $status;
|
|
}
|
|
if ($errno !== 0) {
|
|
throw new RuntimeException('AzureSearch curl error: ' . curl_strerror($errno));
|
|
}
|
|
if ($status >= 400) {
|
|
throw new RuntimeException("AzureSearch HTTP {$status}: " . substr((string)$raw, 0, 300));
|
|
}
|
|
$decoded = json_decode((string)$raw, true);
|
|
return is_array($decoded) ? $decoded : [];
|
|
}
|
|
}
|