Azure: raise chat timeout 45s → 90s default; timeline uses 120s

Timeline was using no explicit timeout, falling back to the gateway's 45s default, which timed out on long Norwegian legal documents. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-15 02:09:02 +02:00
parent f183678f35
commit 85c3cee719
2 changed files with 3 additions and 67 deletions
@@ -356,7 +356,7 @@ PROMPT;
            ['role' => 'system', 'content' => $system],
            ['role' => 'user',   'content' => $prompt],
        ];
-        $chatOptions = ['json' => true, 'temperature' => 0.1, 'max_tokens' => 4000];
+        $chatOptions = ['json' => true, 'temperature' => 0.1, 'max_tokens' => 4000, 'timeout' => 120];
        $deployLabel = $this->azure->chatDeployment();

        try {
@@ -1074,71 +1074,7 @@ PROMPT;

    private function callGpuLlm(array $messages, array $options = []): array
    {
-        $url     = 'http://10.0.1.10:4000/v1/chat/completions';
-        $apiKey  = 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d';
-        $model   = 'qwen2.5:14b';
-        $timeout = (int)($options['timeout'] ?? 90);
-
-        $payload = [
-            'model'       => $model,
-            'messages'    => $messages,
-            'temperature' => $options['temperature'] ?? 0.1,
-            'max_tokens'  => $options['max_tokens']  ?? 8000,
-        ];
-        if (!empty($options['json'])) {
-            $payload['response_format'] = ['type' => 'json_object'];
-        }
-
-        $body    = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
-        $headers = [
-            'Content-Type: application/json',
-            'Authorization: Bearer ' . $apiKey,
-        ];
-
-        if (function_exists('curl_init')) {
-            $ch = curl_init($url);
-            curl_setopt_array($ch, [
-                CURLOPT_RETURNTRANSFER => true,
-                CURLOPT_POST           => true,
-                CURLOPT_POSTFIELDS     => $body,
-                CURLOPT_HTTPHEADER     => $headers,
-                CURLOPT_TIMEOUT        => $timeout,
-            ]);
-            $response = curl_exec($ch);
-            $code     = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
-            $err      = curl_error($ch);
-            curl_close($ch);
-
-            if ($response === false) {
-                throw new RuntimeException('GPU LiteLLM request failed: ' . $err);
-            }
-        } else {
-            $ctx      = stream_context_create(['http' => [
-                'method'        => 'POST',
-                'header'        => implode("\r\n", $headers),
-                'content'       => $body,
-                'timeout'       => $timeout,
-                'ignore_errors' => true,
-            ]]);
-            $response = @file_get_contents($url, false, $ctx);
-            $code     = 0;
-            if (isset($http_response_header[0]) && preg_match('/\s(\d{3})\s/', $http_response_header[0], $m)) {
-                $code = (int)$m[1];
-            }
-            if ($response === false) {
-                throw new RuntimeException('GPU LiteLLM request failed.');
-            }
-        }
-
-        $decoded = json_decode($response, true);
-        if (!is_array($decoded)) {
-            throw new RuntimeException('GPU LiteLLM returned non-JSON response.');
-        }
-        if ($code < 200 || $code >= 300) {
-            $msg = $decoded['error']['message'] ?? ('HTTP ' . $code);
-            throw new RuntimeException('GPU LiteLLM error: ' . $msg);
-        }
-        return $decoded;
+        return dbnToolsCallGpuLlm($messages, $options);
    }

    private function applyGenericTags(string $text): string