ba9cddf9a1
- Stripe: StripeClient.php, checkout/portal/webhook endpoints, idempotent event handling - FreeTier: tier-aware credits (free/light/pro/pro_plus), bonus_balance, hourly caps per tier - pricing.php + billing.php: 4-tier cards, 3 topups, Customer Portal, balance breakdown - Min Sak: CaseStore.php, AzureDocIntelligence.php, AzureSearchAdmin.php — per-user hybrid RAG - api/case/: upload, list, delete, ingest-callback (HMAC-auth'd from n8n) - award-survey-credits: inter-site HMAC endpoint for dobetternorge.no survey bonus - dashboard.php: tier badge, balance breakdown card, Min Sak CTA, survey CTA - KorrespondAgent + all 3 other agents: use_my_case toggle wired to dbnToolsCaseContext() - bootstrap.php: dbnToolsCaseContext(), dbnToolsIntersiteSecret(), dbnToolsCurrentTier() Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
120 lines
4.7 KiB
PHP
120 lines
4.7 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
/**
|
|
* Azure Document Intelligence (formerly Form Recognizer) — Read API for OCR + layout.
|
|
*
|
|
* Endpoint: https://bnl-doc-intelligence.cognitiveservices.azure.com/
|
|
* Auth: Ocp-Apim-Subscription-Key header
|
|
*
|
|
* Read API is async: POST to /documentintelligence/documentModels/prebuilt-read:analyze
|
|
* → returns 202 + Operation-Location header with poll URL
|
|
* → poll until status == "succeeded", then parse analyzeResult.content
|
|
*/
|
|
final class AzureDocIntelligence
|
|
{
|
|
private string $endpoint;
|
|
private string $key;
|
|
|
|
public function __construct(?string $endpoint = null, ?string $key = null)
|
|
{
|
|
$cfg = self::loadConfig();
|
|
$this->endpoint = rtrim($endpoint ?? ($cfg['endpoint'] ?? ''), '/');
|
|
$this->key = $key ?? ($cfg['key'] ?? '');
|
|
if ($this->endpoint === '' || $this->key === '') {
|
|
throw new RuntimeException('AzureDocIntelligence: endpoint or key not configured.');
|
|
}
|
|
}
|
|
|
|
private static function loadConfig(): array
|
|
{
|
|
$path = '/etc/bnl/azure.php';
|
|
if (is_readable($path)) {
|
|
$cfg = require $path;
|
|
return [
|
|
'endpoint' => (string)($cfg['DOC_INTELLIGENCE_ENDPOINT'] ?? ''),
|
|
'key' => (string)($cfg['DOC_INTELLIGENCE_KEY'] ?? ''),
|
|
];
|
|
}
|
|
return [
|
|
'endpoint' => (string)(getenv('AZURE_DOC_INTELLIGENCE_ENDPOINT') ?: ''),
|
|
'key' => (string)(getenv('AZURE_DOC_INTELLIGENCE_KEY') ?: ''),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* OCR a local PDF file using the prebuilt-read model.
|
|
* Returns: ['content' => string, 'pages' => array, 'languages' => array]
|
|
*/
|
|
public function readPdf(string $localPath, int $pollTimeoutSeconds = 120): array
|
|
{
|
|
if (!is_readable($localPath)) {
|
|
throw new InvalidArgumentException("Unreadable file: {$localPath}");
|
|
}
|
|
$url = $this->endpoint . '/documentintelligence/documentModels/prebuilt-read:analyze?api-version=2024-11-30';
|
|
$body = file_get_contents($localPath);
|
|
|
|
$ch = curl_init();
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_URL => $url,
|
|
CURLOPT_POST => true,
|
|
CURLOPT_POSTFIELDS => $body,
|
|
CURLOPT_HTTPHEADER => [
|
|
'Content-Type: application/pdf',
|
|
'Ocp-Apim-Subscription-Key: ' . $this->key,
|
|
],
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_HEADER => true,
|
|
CURLOPT_TIMEOUT => 60,
|
|
]);
|
|
$response = curl_exec($ch);
|
|
$headerSize = (int)curl_getinfo($ch, CURLINFO_HEADER_SIZE);
|
|
$status = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($status !== 202 || !is_string($response)) {
|
|
throw new RuntimeException("DocIntelligence analyze failed: HTTP {$status}");
|
|
}
|
|
$headers = substr($response, 0, $headerSize);
|
|
if (!preg_match('/Operation-Location:\s*(.+?)\r?\n/i', $headers, $m)) {
|
|
throw new RuntimeException('DocIntelligence: missing Operation-Location header.');
|
|
}
|
|
$pollUrl = trim($m[1]);
|
|
|
|
$deadline = time() + $pollTimeoutSeconds;
|
|
while (time() < $deadline) {
|
|
usleep(1500_000);
|
|
$pollCh = curl_init();
|
|
curl_setopt_array($pollCh, [
|
|
CURLOPT_URL => $pollUrl,
|
|
CURLOPT_HTTPHEADER => ['Ocp-Apim-Subscription-Key: ' . $this->key],
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_TIMEOUT => 30,
|
|
]);
|
|
$pollResp = curl_exec($pollCh);
|
|
$pollStatus = (int)curl_getinfo($pollCh, CURLINFO_RESPONSE_CODE);
|
|
curl_close($pollCh);
|
|
if ($pollStatus !== 200 || !is_string($pollResp)) {
|
|
throw new RuntimeException("DocIntelligence poll failed: HTTP {$pollStatus}");
|
|
}
|
|
$data = json_decode($pollResp, true);
|
|
$st = (string)($data['status'] ?? '');
|
|
if ($st === 'succeeded') {
|
|
$result = $data['analyzeResult'] ?? [];
|
|
return [
|
|
'content' => (string)($result['content'] ?? ''),
|
|
'pages' => $result['pages'] ?? [],
|
|
'languages' => $result['languages'] ?? [],
|
|
'page_count' => count($result['pages'] ?? []),
|
|
];
|
|
}
|
|
if ($st === 'failed') {
|
|
$err = $data['error']['message'] ?? 'unknown';
|
|
throw new RuntimeException("DocIntelligence analysis failed: {$err}");
|
|
}
|
|
// 'running' or 'notStarted' — continue polling
|
|
}
|
|
throw new RuntimeException("DocIntelligence poll timeout after {$pollTimeoutSeconds}s.");
|
|
}
|
|
}
|