fix: BVJ party extraction robustness + dbn-legal-agent streaming
Party extraction: wider excerpt (12k chars), cleaner prompt, fallback for root-level array responses, log raw response on unexpected structure. dbn-legal-agent synthesis: replace blocking curl (200s timeout) with an SSE streaming approach (CURLOPT_WRITEFUNCTION). PHP now emits keepalive progress events every 15 s during generation, preventing browser network errors on slow ~6 t/s cuttlefish inference. Timeout extended to 660 s. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -493,31 +493,26 @@ PROMPT;
|
|||||||
private function extractParties(string $docText, string $language): array
|
private function extractParties(string $docText, string $language): array
|
||||||
{
|
{
|
||||||
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
||||||
$excerpt = mb_substr($docText, 0, 8000, 'UTF-8');
|
$excerpt = mb_substr($docText, 0, 12000, 'UTF-8');
|
||||||
|
|
||||||
$prompt = <<<PROMPT
|
$prompt = <<<PROMPT
|
||||||
You are analysing a Norwegian child welfare (Barnevernet) document.
|
You are analysing a Norwegian child welfare (Barnevernet) document.
|
||||||
Identify ALL named parties — every person or institution referred to by name or title.
|
Identify ALL named parties — every person or institution referred to by name or title.
|
||||||
|
|
||||||
Return JSON only in {$locale}:
|
Respond in {$locale}. Return a JSON object with a single key "parties" containing an array of objects.
|
||||||
{
|
Each object must have these four fields:
|
||||||
"parties": [
|
- "name": full name or institution name (string)
|
||||||
{
|
- "role": their role in the case, e.g. Biological mother, Child, Barnevernarbeider, Saksbehandler, Melder, Politi, Lege, Advokat, Foster carer, Rusklinikk
|
||||||
"name": "Full name or institution name",
|
- "organization": employer or institution if mentioned, otherwise null
|
||||||
"role": "Their role, e.g. Biological mother, Barnevernarbeider, Saksbehandler, Child, Father, Foster carer, Melder, Politi, Doctor, Lawyer",
|
- "relationship_to_child": relationship to the child in the document, e.g. Mother, Father, Caseworker, Melder, or null
|
||||||
"organization": "Organization or employer if mentioned, otherwise null",
|
|
||||||
"relationship_to_child": "Relationship to the child, e.g. Mor, Far, Saksbehandler, Melder, or null if unclear"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- Include every named person and institution — even peripheral ones.
|
- Include every named person and named institution — even peripheral ones.
|
||||||
- Use the role category from the document. For Norwegian documents use Norwegian role titles.
|
- Include Barnevernvakta (bvv) as an institution even if no individual caseworkers are named.
|
||||||
- Do not invent parties not mentioned in the text.
|
- Do not invent parties not present in the text.
|
||||||
- Maximum 20 parties.
|
- Maximum 20 parties.
|
||||||
|
|
||||||
Document text (first 8000 chars):
|
Document text:
|
||||||
{$excerpt}
|
{$excerpt}
|
||||||
PROMPT;
|
PROMPT;
|
||||||
|
|
||||||
@@ -525,11 +520,16 @@ PROMPT;
|
|||||||
$raw = $this->azure->chatText([
|
$raw = $this->azure->chatText([
|
||||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||||
['role' => 'user', 'content' => $prompt],
|
['role' => 'user', 'content' => $prompt],
|
||||||
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1200, 'timeout' => 35]);
|
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1500, 'timeout' => 40]);
|
||||||
$json = $this->azure->decodeJsonObject($raw);
|
$json = $this->azure->decodeJsonObject($raw);
|
||||||
if (is_array($json) && is_array($json['parties'] ?? null)) {
|
if (is_array($json) && is_array($json['parties'] ?? null)) {
|
||||||
return array_slice($json['parties'], 0, 20);
|
return array_slice($json['parties'], 0, 20);
|
||||||
}
|
}
|
||||||
|
// Fallback: model returned an array at root level instead of {parties:[...]}
|
||||||
|
if (is_array($json) && isset($json[0]['name'])) {
|
||||||
|
return array_slice($json, 0, 20);
|
||||||
|
}
|
||||||
|
error_log('BVJ extractParties unexpected structure: ' . substr($raw, 0, 300));
|
||||||
} catch (Throwable $e) {
|
} catch (Throwable $e) {
|
||||||
error_log('BVJ extractParties failed: ' . $e->getMessage());
|
error_log('BVJ extractParties failed: ' . $e->getMessage());
|
||||||
}
|
}
|
||||||
@@ -844,7 +844,7 @@ PROMPT;
|
|||||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||||
['role' => 'user', 'content' => $prompt],
|
['role' => 'user', 'content' => $prompt],
|
||||||
];
|
];
|
||||||
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3500, 'timeout' => 200];
|
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3000, 'timeout' => 200];
|
||||||
|
|
||||||
$deployLabel = match ($engine) {
|
$deployLabel = match ($engine) {
|
||||||
'gpu' => 'GPU (cuttlefish)',
|
'gpu' => 'GPU (cuttlefish)',
|
||||||
@@ -853,10 +853,19 @@ PROMPT;
|
|||||||
default => $this->azure->chatDeployment(),
|
default => $this->azure->chatDeployment(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
$raw = '';
|
||||||
try {
|
try {
|
||||||
if ($engine === 'dbn_legal') {
|
if ($engine === 'dbn_legal') {
|
||||||
$response = dbnToolsCallGpuLlm($messages, array_merge($opts, ['model' => 'dbn-legal-agent', 'timeout' => 200]));
|
// dbn-legal-agent is slow (~6 t/s on cuttlefish). Stream the response and emit
|
||||||
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
// keepalive events every 15 s so the browser connection stays alive.
|
||||||
|
$raw = $this->callGpuLlmStream($messages, [
|
||||||
|
'model' => 'dbn-legal-agent',
|
||||||
|
'temperature' => $temperature,
|
||||||
|
'max_tokens' => 2800,
|
||||||
|
'timeout' => 660,
|
||||||
|
], $emit ? static function () use ($emit): void {
|
||||||
|
$emit('progress', ['detail' => 'dbn-legal-agent generating…']);
|
||||||
|
} : null);
|
||||||
} elseif ($engine === 'gpu') {
|
} elseif ($engine === 'gpu') {
|
||||||
$response = dbnToolsCallGpuLlm($messages, $opts);
|
$response = dbnToolsCallGpuLlm($messages, $opts);
|
||||||
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
||||||
@@ -885,6 +894,71 @@ PROMPT;
|
|||||||
return ['json' => $json, 'deploy_label' => $deployLabel];
|
return ['json' => $json, 'deploy_label' => $deployLabel];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── GPU streaming helper (keeps browser connection alive during slow models) ──
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Call the LiteLLM endpoint with streaming enabled and accumulate the full text.
|
||||||
|
* Every 15 seconds, calls $onProgress() so PHP can flush a keepalive event to the browser.
|
||||||
|
*/
|
||||||
|
private function callGpuLlmStream(array $messages, array $options, ?callable $onProgress): string
|
||||||
|
{
|
||||||
|
$url = 'http://10.0.1.10:4000/v1/chat/completions';
|
||||||
|
$apiKey = (string)(dbnToolsEnv('LITELLM_MASTER_KEY') ?: 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d');
|
||||||
|
$timeout = (int)($options['timeout'] ?? 660);
|
||||||
|
|
||||||
|
$payload = [
|
||||||
|
'model' => (string)($options['model'] ?? 'qwen2.5:14b'),
|
||||||
|
'messages' => $messages,
|
||||||
|
'temperature' => $options['temperature'] ?? 0.1,
|
||||||
|
'max_tokens' => $options['max_tokens'] ?? 2800,
|
||||||
|
'stream' => true,
|
||||||
|
];
|
||||||
|
$body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||||
|
$headers = [
|
||||||
|
'Content-Type: application/json',
|
||||||
|
'Authorization: Bearer ' . $apiKey,
|
||||||
|
];
|
||||||
|
|
||||||
|
$accumulated = '';
|
||||||
|
$lastKeepalive = microtime(true);
|
||||||
|
$curlErr = '';
|
||||||
|
|
||||||
|
$ch = curl_init($url);
|
||||||
|
curl_setopt_array($ch, [
|
||||||
|
CURLOPT_POST => true,
|
||||||
|
CURLOPT_POSTFIELDS => $body,
|
||||||
|
CURLOPT_HTTPHEADER => $headers,
|
||||||
|
CURLOPT_TIMEOUT => $timeout,
|
||||||
|
CURLOPT_RETURNTRANSFER => false,
|
||||||
|
CURLOPT_WRITEFUNCTION => static function ($ch, $data) use (&$accumulated, &$lastKeepalive, $onProgress): int {
|
||||||
|
foreach (explode("\n", $data) as $line) {
|
||||||
|
$trimmed = ltrim($line);
|
||||||
|
if (!str_starts_with($trimmed, 'data: ')) continue;
|
||||||
|
$json = substr($trimmed, 6);
|
||||||
|
if (trim($json) === '[DONE]') continue;
|
||||||
|
$chunk = json_decode($json, true);
|
||||||
|
$delta = $chunk['choices'][0]['delta']['content'] ?? '';
|
||||||
|
if ($delta !== '') $accumulated .= $delta;
|
||||||
|
}
|
||||||
|
if ($onProgress !== null && microtime(true) - $lastKeepalive >= 15.0) {
|
||||||
|
$lastKeepalive = microtime(true);
|
||||||
|
$onProgress();
|
||||||
|
@flush();
|
||||||
|
}
|
||||||
|
return strlen($data);
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
curl_exec($ch);
|
||||||
|
$curlErr = curl_error($ch);
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
|
if ($curlErr !== '') {
|
||||||
|
throw new RuntimeException('GPU stream request failed: ' . $curlErr);
|
||||||
|
}
|
||||||
|
return trim($accumulated);
|
||||||
|
}
|
||||||
|
|
||||||
// ── Shared helpers (copied from DbnDeepResearchAgent) ────────────────────
|
// ── Shared helpers (copied from DbnDeepResearchAgent) ────────────────────
|
||||||
|
|
||||||
private function splitIntoChunks(string $text, string $filename, int $fileIdx): array
|
private function splitIntoChunks(string $text, string $filename, int $fileIdx): array
|
||||||
|
|||||||
Reference in New Issue
Block a user