From 85c3cee7198744a03798878e6c58b0916c535104 Mon Sep 17 00:00:00 2001 From: davegilligan Date: Fri, 15 May 2026 02:09:02 +0200 Subject: [PATCH] =?UTF-8?q?Azure:=20raise=20chat=20timeout=2045s=20?= =?UTF-8?q?=E2=86=92=2090s=20default;=20timeline=20uses=20120s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Timeline was using no explicit timeout, falling back to the gateway's 45s default, which timed out on long Norwegian legal documents. Co-Authored-By: Claude Sonnet 4.6 --- includes/AzureOpenAiGateway.php | 2 +- includes/LegalTools.php | 68 +-------------------------------- 2 files changed, 3 insertions(+), 67 deletions(-) diff --git a/includes/AzureOpenAiGateway.php b/includes/AzureOpenAiGateway.php index 35c7b3c..5fd92fb 100644 --- a/includes/AzureOpenAiGateway.php +++ b/includes/AzureOpenAiGateway.php @@ -125,7 +125,7 @@ final class DbnAzureOpenAiGateway . '/chat/completions?api-version=' . rawurlencode((string)$this->config['api_version']); - return $this->postJson($url, $payload, (int)($options['timeout'] ?? 45)); + return $this->postJson($url, $payload, (int)($options['timeout'] ?? 90)); } public function ping(int $timeout = 8): bool diff --git a/includes/LegalTools.php b/includes/LegalTools.php index 2b3a24c..12ac451 100644 --- a/includes/LegalTools.php +++ b/includes/LegalTools.php @@ -356,7 +356,7 @@ PROMPT; ['role' => 'system', 'content' => $system], ['role' => 'user', 'content' => $prompt], ]; - $chatOptions = ['json' => true, 'temperature' => 0.1, 'max_tokens' => 4000]; + $chatOptions = ['json' => true, 'temperature' => 0.1, 'max_tokens' => 4000, 'timeout' => 120]; $deployLabel = $this->azure->chatDeployment(); try { @@ -1074,71 +1074,7 @@ PROMPT; private function callGpuLlm(array $messages, array $options = []): array { - $url = 'http://10.0.1.10:4000/v1/chat/completions'; - $apiKey = 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d'; - $model = 'qwen2.5:14b'; - $timeout = (int)($options['timeout'] ?? 90); - - $payload = [ - 'model' => $model, - 'messages' => $messages, - 'temperature' => $options['temperature'] ?? 0.1, - 'max_tokens' => $options['max_tokens'] ?? 8000, - ]; - if (!empty($options['json'])) { - $payload['response_format'] = ['type' => 'json_object']; - } - - $body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); - $headers = [ - 'Content-Type: application/json', - 'Authorization: Bearer ' . $apiKey, - ]; - - if (function_exists('curl_init')) { - $ch = curl_init($url); - curl_setopt_array($ch, [ - CURLOPT_RETURNTRANSFER => true, - CURLOPT_POST => true, - CURLOPT_POSTFIELDS => $body, - CURLOPT_HTTPHEADER => $headers, - CURLOPT_TIMEOUT => $timeout, - ]); - $response = curl_exec($ch); - $code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE); - $err = curl_error($ch); - curl_close($ch); - - if ($response === false) { - throw new RuntimeException('GPU LiteLLM request failed: ' . $err); - } - } else { - $ctx = stream_context_create(['http' => [ - 'method' => 'POST', - 'header' => implode("\r\n", $headers), - 'content' => $body, - 'timeout' => $timeout, - 'ignore_errors' => true, - ]]); - $response = @file_get_contents($url, false, $ctx); - $code = 0; - if (isset($http_response_header[0]) && preg_match('/\s(\d{3})\s/', $http_response_header[0], $m)) { - $code = (int)$m[1]; - } - if ($response === false) { - throw new RuntimeException('GPU LiteLLM request failed.'); - } - } - - $decoded = json_decode($response, true); - if (!is_array($decoded)) { - throw new RuntimeException('GPU LiteLLM returned non-JSON response.'); - } - if ($code < 200 || $code >= 300) { - $msg = $decoded['error']['message'] ?? ('HTTP ' . $code); - throw new RuntimeException('GPU LiteLLM error: ' . $msg); - } - return $decoded; + return dbnToolsCallGpuLlm($messages, $options); } private function applyGenericTags(string $text): string