fix: BVJ party extraction robustness + dbn-legal-agent streaming

Party extraction: wider excerpt (12k chars), cleaner prompt, fallback for
root-level array responses, log raw response on unexpected structure.

dbn-legal-agent synthesis: replace blocking curl (200s timeout) with an
SSE streaming approach (CURLOPT_WRITEFUNCTION). PHP now emits keepalive
progress events every 15 s during generation, preventing browser network
errors on slow ~6 t/s cuttlefish inference. Timeout extended to 660 s.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-15 21:35:18 +02:00
parent 9b8cb9c6dc
commit bc52690472
+94 -20
View File
@@ -493,31 +493,26 @@ PROMPT;
private function extractParties(string $docText, string $language): array private function extractParties(string $docText, string $language): array
{ {
$locale = $language === 'no' ? 'Norwegian' : 'English'; $locale = $language === 'no' ? 'Norwegian' : 'English';
$excerpt = mb_substr($docText, 0, 8000, 'UTF-8'); $excerpt = mb_substr($docText, 0, 12000, 'UTF-8');
$prompt = <<<PROMPT $prompt = <<<PROMPT
You are analysing a Norwegian child welfare (Barnevernet) document. You are analysing a Norwegian child welfare (Barnevernet) document.
Identify ALL named parties — every person or institution referred to by name or title. Identify ALL named parties — every person or institution referred to by name or title.
Return JSON only in {$locale}: Respond in {$locale}. Return a JSON object with a single key "parties" containing an array of objects.
{ Each object must have these four fields:
"parties": [ - "name": full name or institution name (string)
{ - "role": their role in the case, e.g. Biological mother, Child, Barnevernarbeider, Saksbehandler, Melder, Politi, Lege, Advokat, Foster carer, Rusklinikk
"name": "Full name or institution name", - "organization": employer or institution if mentioned, otherwise null
"role": "Their role, e.g. Biological mother, Barnevernarbeider, Saksbehandler, Child, Father, Foster carer, Melder, Politi, Doctor, Lawyer", - "relationship_to_child": relationship to the child in the document, e.g. Mother, Father, Caseworker, Melder, or null
"organization": "Organization or employer if mentioned, otherwise null",
"relationship_to_child": "Relationship to the child, e.g. Mor, Far, Saksbehandler, Melder, or null if unclear"
}
]
}
Rules: Rules:
- Include every named person and institution — even peripheral ones. - Include every named person and named institution — even peripheral ones.
- Use the role category from the document. For Norwegian documents use Norwegian role titles. - Include Barnevernvakta (bvv) as an institution even if no individual caseworkers are named.
- Do not invent parties not mentioned in the text. - Do not invent parties not present in the text.
- Maximum 20 parties. - Maximum 20 parties.
Document text (first 8000 chars): Document text:
{$excerpt} {$excerpt}
PROMPT; PROMPT;
@@ -525,11 +520,16 @@ PROMPT;
$raw = $this->azure->chatText([ $raw = $this->azure->chatText([
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
['role' => 'user', 'content' => $prompt], ['role' => 'user', 'content' => $prompt],
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1200, 'timeout' => 35]); ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1500, 'timeout' => 40]);
$json = $this->azure->decodeJsonObject($raw); $json = $this->azure->decodeJsonObject($raw);
if (is_array($json) && is_array($json['parties'] ?? null)) { if (is_array($json) && is_array($json['parties'] ?? null)) {
return array_slice($json['parties'], 0, 20); return array_slice($json['parties'], 0, 20);
} }
// Fallback: model returned an array at root level instead of {parties:[...]}
if (is_array($json) && isset($json[0]['name'])) {
return array_slice($json, 0, 20);
}
error_log('BVJ extractParties unexpected structure: ' . substr($raw, 0, 300));
} catch (Throwable $e) { } catch (Throwable $e) {
error_log('BVJ extractParties failed: ' . $e->getMessage()); error_log('BVJ extractParties failed: ' . $e->getMessage());
} }
@@ -844,7 +844,7 @@ PROMPT;
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
['role' => 'user', 'content' => $prompt], ['role' => 'user', 'content' => $prompt],
]; ];
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3500, 'timeout' => 200]; $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3000, 'timeout' => 200];
$deployLabel = match ($engine) { $deployLabel = match ($engine) {
'gpu' => 'GPU (cuttlefish)', 'gpu' => 'GPU (cuttlefish)',
@@ -853,10 +853,19 @@ PROMPT;
default => $this->azure->chatDeployment(), default => $this->azure->chatDeployment(),
}; };
$raw = '';
try { try {
if ($engine === 'dbn_legal') { if ($engine === 'dbn_legal') {
$response = dbnToolsCallGpuLlm($messages, array_merge($opts, ['model' => 'dbn-legal-agent', 'timeout' => 200])); // dbn-legal-agent is slow (~6 t/s on cuttlefish). Stream the response and emit
$raw = (string)($response['choices'][0]['message']['content'] ?? ''); // keepalive events every 15 s so the browser connection stays alive.
$raw = $this->callGpuLlmStream($messages, [
'model' => 'dbn-legal-agent',
'temperature' => $temperature,
'max_tokens' => 2800,
'timeout' => 660,
], $emit ? static function () use ($emit): void {
$emit('progress', ['detail' => 'dbn-legal-agent generating…']);
} : null);
} elseif ($engine === 'gpu') { } elseif ($engine === 'gpu') {
$response = dbnToolsCallGpuLlm($messages, $opts); $response = dbnToolsCallGpuLlm($messages, $opts);
$raw = (string)($response['choices'][0]['message']['content'] ?? ''); $raw = (string)($response['choices'][0]['message']['content'] ?? '');
@@ -885,6 +894,71 @@ PROMPT;
return ['json' => $json, 'deploy_label' => $deployLabel]; return ['json' => $json, 'deploy_label' => $deployLabel];
} }
// ── GPU streaming helper (keeps browser connection alive during slow models) ──
/**
* Call the LiteLLM endpoint with streaming enabled and accumulate the full text.
* Every 15 seconds, calls $onProgress() so PHP can flush a keepalive event to the browser.
*/
private function callGpuLlmStream(array $messages, array $options, ?callable $onProgress): string
{
$url = 'http://10.0.1.10:4000/v1/chat/completions';
$apiKey = (string)(dbnToolsEnv('LITELLM_MASTER_KEY') ?: 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d');
$timeout = (int)($options['timeout'] ?? 660);
$payload = [
'model' => (string)($options['model'] ?? 'qwen2.5:14b'),
'messages' => $messages,
'temperature' => $options['temperature'] ?? 0.1,
'max_tokens' => $options['max_tokens'] ?? 2800,
'stream' => true,
];
$body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
$headers = [
'Content-Type: application/json',
'Authorization: Bearer ' . $apiKey,
];
$accumulated = '';
$lastKeepalive = microtime(true);
$curlErr = '';
$ch = curl_init($url);
curl_setopt_array($ch, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $body,
CURLOPT_HTTPHEADER => $headers,
CURLOPT_TIMEOUT => $timeout,
CURLOPT_RETURNTRANSFER => false,
CURLOPT_WRITEFUNCTION => static function ($ch, $data) use (&$accumulated, &$lastKeepalive, $onProgress): int {
foreach (explode("\n", $data) as $line) {
$trimmed = ltrim($line);
if (!str_starts_with($trimmed, 'data: ')) continue;
$json = substr($trimmed, 6);
if (trim($json) === '[DONE]') continue;
$chunk = json_decode($json, true);
$delta = $chunk['choices'][0]['delta']['content'] ?? '';
if ($delta !== '') $accumulated .= $delta;
}
if ($onProgress !== null && microtime(true) - $lastKeepalive >= 15.0) {
$lastKeepalive = microtime(true);
$onProgress();
@flush();
}
return strlen($data);
},
]);
curl_exec($ch);
$curlErr = curl_error($ch);
curl_close($ch);
if ($curlErr !== '') {
throw new RuntimeException('GPU stream request failed: ' . $curlErr);
}
return trim($accumulated);
}
// ── Shared helpers (copied from DbnDeepResearchAgent) ──────────────────── // ── Shared helpers (copied from DbnDeepResearchAgent) ────────────────────
private function splitIntoChunks(string $text, string $filename, int $fileIdx): array private function splitIntoChunks(string $text, string $filename, int $fileIdx): array