fix: BVJ party extraction robustness + dbn-legal-agent streaming
Party extraction: wider excerpt (12k chars), cleaner prompt, fallback for root-level array responses, log raw response on unexpected structure. dbn-legal-agent synthesis: replace blocking curl (200s timeout) with an SSE streaming approach (CURLOPT_WRITEFUNCTION). PHP now emits keepalive progress events every 15 s during generation, preventing browser network errors on slow ~6 t/s cuttlefish inference. Timeout extended to 660 s. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -493,31 +493,26 @@ PROMPT;
|
||||
private function extractParties(string $docText, string $language): array
|
||||
{
|
||||
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
||||
$excerpt = mb_substr($docText, 0, 8000, 'UTF-8');
|
||||
$excerpt = mb_substr($docText, 0, 12000, 'UTF-8');
|
||||
|
||||
$prompt = <<<PROMPT
|
||||
You are analysing a Norwegian child welfare (Barnevernet) document.
|
||||
Identify ALL named parties — every person or institution referred to by name or title.
|
||||
|
||||
Return JSON only in {$locale}:
|
||||
{
|
||||
"parties": [
|
||||
{
|
||||
"name": "Full name or institution name",
|
||||
"role": "Their role, e.g. Biological mother, Barnevernarbeider, Saksbehandler, Child, Father, Foster carer, Melder, Politi, Doctor, Lawyer",
|
||||
"organization": "Organization or employer if mentioned, otherwise null",
|
||||
"relationship_to_child": "Relationship to the child, e.g. Mor, Far, Saksbehandler, Melder, or null if unclear"
|
||||
}
|
||||
]
|
||||
}
|
||||
Respond in {$locale}. Return a JSON object with a single key "parties" containing an array of objects.
|
||||
Each object must have these four fields:
|
||||
- "name": full name or institution name (string)
|
||||
- "role": their role in the case, e.g. Biological mother, Child, Barnevernarbeider, Saksbehandler, Melder, Politi, Lege, Advokat, Foster carer, Rusklinikk
|
||||
- "organization": employer or institution if mentioned, otherwise null
|
||||
- "relationship_to_child": relationship to the child in the document, e.g. Mother, Father, Caseworker, Melder, or null
|
||||
|
||||
Rules:
|
||||
- Include every named person and institution — even peripheral ones.
|
||||
- Use the role category from the document. For Norwegian documents use Norwegian role titles.
|
||||
- Do not invent parties not mentioned in the text.
|
||||
- Include every named person and named institution — even peripheral ones.
|
||||
- Include Barnevernvakta (bvv) as an institution even if no individual caseworkers are named.
|
||||
- Do not invent parties not present in the text.
|
||||
- Maximum 20 parties.
|
||||
|
||||
Document text (first 8000 chars):
|
||||
Document text:
|
||||
{$excerpt}
|
||||
PROMPT;
|
||||
|
||||
@@ -525,11 +520,16 @@ PROMPT;
|
||||
$raw = $this->azure->chatText([
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1200, 'timeout' => 35]);
|
||||
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1500, 'timeout' => 40]);
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
if (is_array($json) && is_array($json['parties'] ?? null)) {
|
||||
return array_slice($json['parties'], 0, 20);
|
||||
}
|
||||
// Fallback: model returned an array at root level instead of {parties:[...]}
|
||||
if (is_array($json) && isset($json[0]['name'])) {
|
||||
return array_slice($json, 0, 20);
|
||||
}
|
||||
error_log('BVJ extractParties unexpected structure: ' . substr($raw, 0, 300));
|
||||
} catch (Throwable $e) {
|
||||
error_log('BVJ extractParties failed: ' . $e->getMessage());
|
||||
}
|
||||
@@ -844,7 +844,7 @@ PROMPT;
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
];
|
||||
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3500, 'timeout' => 200];
|
||||
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3000, 'timeout' => 200];
|
||||
|
||||
$deployLabel = match ($engine) {
|
||||
'gpu' => 'GPU (cuttlefish)',
|
||||
@@ -853,10 +853,19 @@ PROMPT;
|
||||
default => $this->azure->chatDeployment(),
|
||||
};
|
||||
|
||||
$raw = '';
|
||||
try {
|
||||
if ($engine === 'dbn_legal') {
|
||||
$response = dbnToolsCallGpuLlm($messages, array_merge($opts, ['model' => 'dbn-legal-agent', 'timeout' => 200]));
|
||||
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
||||
// dbn-legal-agent is slow (~6 t/s on cuttlefish). Stream the response and emit
|
||||
// keepalive events every 15 s so the browser connection stays alive.
|
||||
$raw = $this->callGpuLlmStream($messages, [
|
||||
'model' => 'dbn-legal-agent',
|
||||
'temperature' => $temperature,
|
||||
'max_tokens' => 2800,
|
||||
'timeout' => 660,
|
||||
], $emit ? static function () use ($emit): void {
|
||||
$emit('progress', ['detail' => 'dbn-legal-agent generating…']);
|
||||
} : null);
|
||||
} elseif ($engine === 'gpu') {
|
||||
$response = dbnToolsCallGpuLlm($messages, $opts);
|
||||
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
||||
@@ -885,6 +894,71 @@ PROMPT;
|
||||
return ['json' => $json, 'deploy_label' => $deployLabel];
|
||||
}
|
||||
|
||||
// ── GPU streaming helper (keeps browser connection alive during slow models) ──
|
||||
|
||||
/**
|
||||
* Call the LiteLLM endpoint with streaming enabled and accumulate the full text.
|
||||
* Every 15 seconds, calls $onProgress() so PHP can flush a keepalive event to the browser.
|
||||
*/
|
||||
private function callGpuLlmStream(array $messages, array $options, ?callable $onProgress): string
|
||||
{
|
||||
$url = 'http://10.0.1.10:4000/v1/chat/completions';
|
||||
$apiKey = (string)(dbnToolsEnv('LITELLM_MASTER_KEY') ?: 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d');
|
||||
$timeout = (int)($options['timeout'] ?? 660);
|
||||
|
||||
$payload = [
|
||||
'model' => (string)($options['model'] ?? 'qwen2.5:14b'),
|
||||
'messages' => $messages,
|
||||
'temperature' => $options['temperature'] ?? 0.1,
|
||||
'max_tokens' => $options['max_tokens'] ?? 2800,
|
||||
'stream' => true,
|
||||
];
|
||||
$body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
$headers = [
|
||||
'Content-Type: application/json',
|
||||
'Authorization: Bearer ' . $apiKey,
|
||||
];
|
||||
|
||||
$accumulated = '';
|
||||
$lastKeepalive = microtime(true);
|
||||
$curlErr = '';
|
||||
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_POST => true,
|
||||
CURLOPT_POSTFIELDS => $body,
|
||||
CURLOPT_HTTPHEADER => $headers,
|
||||
CURLOPT_TIMEOUT => $timeout,
|
||||
CURLOPT_RETURNTRANSFER => false,
|
||||
CURLOPT_WRITEFUNCTION => static function ($ch, $data) use (&$accumulated, &$lastKeepalive, $onProgress): int {
|
||||
foreach (explode("\n", $data) as $line) {
|
||||
$trimmed = ltrim($line);
|
||||
if (!str_starts_with($trimmed, 'data: ')) continue;
|
||||
$json = substr($trimmed, 6);
|
||||
if (trim($json) === '[DONE]') continue;
|
||||
$chunk = json_decode($json, true);
|
||||
$delta = $chunk['choices'][0]['delta']['content'] ?? '';
|
||||
if ($delta !== '') $accumulated .= $delta;
|
||||
}
|
||||
if ($onProgress !== null && microtime(true) - $lastKeepalive >= 15.0) {
|
||||
$lastKeepalive = microtime(true);
|
||||
$onProgress();
|
||||
@flush();
|
||||
}
|
||||
return strlen($data);
|
||||
},
|
||||
]);
|
||||
|
||||
curl_exec($ch);
|
||||
$curlErr = curl_error($ch);
|
||||
curl_close($ch);
|
||||
|
||||
if ($curlErr !== '') {
|
||||
throw new RuntimeException('GPU stream request failed: ' . $curlErr);
|
||||
}
|
||||
return trim($accumulated);
|
||||
}
|
||||
|
||||
// ── Shared helpers (copied from DbnDeepResearchAgent) ────────────────────
|
||||
|
||||
private function splitIntoChunks(string $text, string $filename, int $fileIdx): array
|
||||
|
||||
Reference in New Issue
Block a user