diff --git a/includes/BvjAnalyzerAgent.php b/includes/BvjAnalyzerAgent.php index 7eb0d61..467bf92 100644 --- a/includes/BvjAnalyzerAgent.php +++ b/includes/BvjAnalyzerAgent.php @@ -493,31 +493,26 @@ PROMPT; private function extractParties(string $docText, string $language): array { $locale = $language === 'no' ? 'Norwegian' : 'English'; - $excerpt = mb_substr($docText, 0, 8000, 'UTF-8'); + $excerpt = mb_substr($docText, 0, 12000, 'UTF-8'); $prompt = <<azure->chatText([ ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], ['role' => 'user', 'content' => $prompt], - ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1200, 'timeout' => 35]); + ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1500, 'timeout' => 40]); $json = $this->azure->decodeJsonObject($raw); if (is_array($json) && is_array($json['parties'] ?? null)) { return array_slice($json['parties'], 0, 20); } + // Fallback: model returned an array at root level instead of {parties:[...]} + if (is_array($json) && isset($json[0]['name'])) { + return array_slice($json, 0, 20); + } + error_log('BVJ extractParties unexpected structure: ' . substr($raw, 0, 300)); } catch (Throwable $e) { error_log('BVJ extractParties failed: ' . $e->getMessage()); } @@ -844,7 +844,7 @@ PROMPT; ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], ['role' => 'user', 'content' => $prompt], ]; - $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3500, 'timeout' => 200]; + $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3000, 'timeout' => 200]; $deployLabel = match ($engine) { 'gpu' => 'GPU (cuttlefish)', @@ -853,10 +853,19 @@ PROMPT; default => $this->azure->chatDeployment(), }; + $raw = ''; try { if ($engine === 'dbn_legal') { - $response = dbnToolsCallGpuLlm($messages, array_merge($opts, ['model' => 'dbn-legal-agent', 'timeout' => 200])); - $raw = (string)($response['choices'][0]['message']['content'] ?? ''); + // dbn-legal-agent is slow (~6 t/s on cuttlefish). Stream the response and emit + // keepalive events every 15 s so the browser connection stays alive. + $raw = $this->callGpuLlmStream($messages, [ + 'model' => 'dbn-legal-agent', + 'temperature' => $temperature, + 'max_tokens' => 2800, + 'timeout' => 660, + ], $emit ? static function () use ($emit): void { + $emit('progress', ['detail' => 'dbn-legal-agent generating…']); + } : null); } elseif ($engine === 'gpu') { $response = dbnToolsCallGpuLlm($messages, $opts); $raw = (string)($response['choices'][0]['message']['content'] ?? ''); @@ -885,6 +894,71 @@ PROMPT; return ['json' => $json, 'deploy_label' => $deployLabel]; } + // ── GPU streaming helper (keeps browser connection alive during slow models) ── + + /** + * Call the LiteLLM endpoint with streaming enabled and accumulate the full text. + * Every 15 seconds, calls $onProgress() so PHP can flush a keepalive event to the browser. + */ + private function callGpuLlmStream(array $messages, array $options, ?callable $onProgress): string + { + $url = 'http://10.0.1.10:4000/v1/chat/completions'; + $apiKey = (string)(dbnToolsEnv('LITELLM_MASTER_KEY') ?: 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d'); + $timeout = (int)($options['timeout'] ?? 660); + + $payload = [ + 'model' => (string)($options['model'] ?? 'qwen2.5:14b'), + 'messages' => $messages, + 'temperature' => $options['temperature'] ?? 0.1, + 'max_tokens' => $options['max_tokens'] ?? 2800, + 'stream' => true, + ]; + $body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); + $headers = [ + 'Content-Type: application/json', + 'Authorization: Bearer ' . $apiKey, + ]; + + $accumulated = ''; + $lastKeepalive = microtime(true); + $curlErr = ''; + + $ch = curl_init($url); + curl_setopt_array($ch, [ + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => $body, + CURLOPT_HTTPHEADER => $headers, + CURLOPT_TIMEOUT => $timeout, + CURLOPT_RETURNTRANSFER => false, + CURLOPT_WRITEFUNCTION => static function ($ch, $data) use (&$accumulated, &$lastKeepalive, $onProgress): int { + foreach (explode("\n", $data) as $line) { + $trimmed = ltrim($line); + if (!str_starts_with($trimmed, 'data: ')) continue; + $json = substr($trimmed, 6); + if (trim($json) === '[DONE]') continue; + $chunk = json_decode($json, true); + $delta = $chunk['choices'][0]['delta']['content'] ?? ''; + if ($delta !== '') $accumulated .= $delta; + } + if ($onProgress !== null && microtime(true) - $lastKeepalive >= 15.0) { + $lastKeepalive = microtime(true); + $onProgress(); + @flush(); + } + return strlen($data); + }, + ]); + + curl_exec($ch); + $curlErr = curl_error($ch); + curl_close($ch); + + if ($curlErr !== '') { + throw new RuntimeException('GPU stream request failed: ' . $curlErr); + } + return trim($accumulated); + } + // ── Shared helpers (copied from DbnDeepResearchAgent) ──────────────────── private function splitIntoChunks(string $text, string $filename, int $fileIdx): array