Azure: raise chat timeout 45s → 90s default; timeline uses 120s
Timeline was using no explicit timeout, falling back to the gateway's 45s default, which timed out on long Norwegian legal documents. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -125,7 +125,7 @@ final class DbnAzureOpenAiGateway
|
|||||||
. '/chat/completions?api-version='
|
. '/chat/completions?api-version='
|
||||||
. rawurlencode((string)$this->config['api_version']);
|
. rawurlencode((string)$this->config['api_version']);
|
||||||
|
|
||||||
return $this->postJson($url, $payload, (int)($options['timeout'] ?? 45));
|
return $this->postJson($url, $payload, (int)($options['timeout'] ?? 90));
|
||||||
}
|
}
|
||||||
|
|
||||||
public function ping(int $timeout = 8): bool
|
public function ping(int $timeout = 8): bool
|
||||||
|
|||||||
+2
-66
@@ -356,7 +356,7 @@ PROMPT;
|
|||||||
['role' => 'system', 'content' => $system],
|
['role' => 'system', 'content' => $system],
|
||||||
['role' => 'user', 'content' => $prompt],
|
['role' => 'user', 'content' => $prompt],
|
||||||
];
|
];
|
||||||
$chatOptions = ['json' => true, 'temperature' => 0.1, 'max_tokens' => 4000];
|
$chatOptions = ['json' => true, 'temperature' => 0.1, 'max_tokens' => 4000, 'timeout' => 120];
|
||||||
$deployLabel = $this->azure->chatDeployment();
|
$deployLabel = $this->azure->chatDeployment();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -1074,71 +1074,7 @@ PROMPT;
|
|||||||
|
|
||||||
private function callGpuLlm(array $messages, array $options = []): array
|
private function callGpuLlm(array $messages, array $options = []): array
|
||||||
{
|
{
|
||||||
$url = 'http://10.0.1.10:4000/v1/chat/completions';
|
return dbnToolsCallGpuLlm($messages, $options);
|
||||||
$apiKey = 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d';
|
|
||||||
$model = 'qwen2.5:14b';
|
|
||||||
$timeout = (int)($options['timeout'] ?? 90);
|
|
||||||
|
|
||||||
$payload = [
|
|
||||||
'model' => $model,
|
|
||||||
'messages' => $messages,
|
|
||||||
'temperature' => $options['temperature'] ?? 0.1,
|
|
||||||
'max_tokens' => $options['max_tokens'] ?? 8000,
|
|
||||||
];
|
|
||||||
if (!empty($options['json'])) {
|
|
||||||
$payload['response_format'] = ['type' => 'json_object'];
|
|
||||||
}
|
|
||||||
|
|
||||||
$body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
|
||||||
$headers = [
|
|
||||||
'Content-Type: application/json',
|
|
||||||
'Authorization: Bearer ' . $apiKey,
|
|
||||||
];
|
|
||||||
|
|
||||||
if (function_exists('curl_init')) {
|
|
||||||
$ch = curl_init($url);
|
|
||||||
curl_setopt_array($ch, [
|
|
||||||
CURLOPT_RETURNTRANSFER => true,
|
|
||||||
CURLOPT_POST => true,
|
|
||||||
CURLOPT_POSTFIELDS => $body,
|
|
||||||
CURLOPT_HTTPHEADER => $headers,
|
|
||||||
CURLOPT_TIMEOUT => $timeout,
|
|
||||||
]);
|
|
||||||
$response = curl_exec($ch);
|
|
||||||
$code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
|
||||||
$err = curl_error($ch);
|
|
||||||
curl_close($ch);
|
|
||||||
|
|
||||||
if ($response === false) {
|
|
||||||
throw new RuntimeException('GPU LiteLLM request failed: ' . $err);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
$ctx = stream_context_create(['http' => [
|
|
||||||
'method' => 'POST',
|
|
||||||
'header' => implode("\r\n", $headers),
|
|
||||||
'content' => $body,
|
|
||||||
'timeout' => $timeout,
|
|
||||||
'ignore_errors' => true,
|
|
||||||
]]);
|
|
||||||
$response = @file_get_contents($url, false, $ctx);
|
|
||||||
$code = 0;
|
|
||||||
if (isset($http_response_header[0]) && preg_match('/\s(\d{3})\s/', $http_response_header[0], $m)) {
|
|
||||||
$code = (int)$m[1];
|
|
||||||
}
|
|
||||||
if ($response === false) {
|
|
||||||
throw new RuntimeException('GPU LiteLLM request failed.');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$decoded = json_decode($response, true);
|
|
||||||
if (!is_array($decoded)) {
|
|
||||||
throw new RuntimeException('GPU LiteLLM returned non-JSON response.');
|
|
||||||
}
|
|
||||||
if ($code < 200 || $code >= 300) {
|
|
||||||
$msg = $decoded['error']['message'] ?? ('HTTP ' . $code);
|
|
||||||
throw new RuntimeException('GPU LiteLLM error: ' . $msg);
|
|
||||||
}
|
|
||||||
return $decoded;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function applyGenericTags(string $text): string
|
private function applyGenericTags(string $text): string
|
||||||
|
|||||||
Reference in New Issue
Block a user