Wire Azure AI Search into dobetternorge-tools

health.php: Add azure_search check — calls /$count endpoint and
  reports doc count in the index. Reads DBN_AZURE_SEARCH_{ENDPOINT,KEY,INDEX}.

corpus-search.php: Add azure mode — semantic + vector hybrid search
  via Azure AI Search bnl-legal-v2. Embeds query with LiteLLM
  nomic-embed-text; expands keepCats to include government-policy,
  health-law, social-services, labour-law, immigration (previously
  blocked by contamination workaround, now safe to include).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-15 13:32:15 +02:00
parent d5e61d656a
commit 464b8572d3
2 changed files with 109 additions and 1 deletions
+23
View File
@@ -66,6 +66,29 @@ try {
$checks['family_legal_subscription'] = ['ok' => false, 'detail' => 'Not checked'];
}
$searchEndpoint = rtrim((string)dbnToolsEnv('DBN_AZURE_SEARCH_ENDPOINT', ''), '/');
$searchKey = (string)dbnToolsEnv('DBN_AZURE_SEARCH_KEY', '');
$searchIndex = (string)dbnToolsEnv('DBN_AZURE_SEARCH_INDEX', '');
if ($searchEndpoint && $searchKey && $searchIndex) {
$countUrl = "$searchEndpoint/indexes/$searchIndex/docs/\$count?api-version=2024-05-01-preview";
$ch = curl_init($countUrl);
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => 8,
CURLOPT_HTTPHEADER => ["api-key: $searchKey"],
]);
$resp = curl_exec($ch);
$code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
curl_close($ch);
$count = is_numeric(trim((string)$resp)) ? (int)trim($resp) : null;
$checks['azure_search'] = [
'ok' => $code === 200 && $count !== null,
'detail' => $code === 200 ? "$count docs in $searchIndex" : "HTTP $code",
];
} else {
$checks['azure_search'] = ['ok' => false, 'detail' => 'Azure Search env vars not configured'];
}
$logPath = dbnToolsMetadataLogPath();
$dir = dirname($logPath);
$checks['metadata_log'] = [