Files
dobetternorge-tools/includes/AzureOpenAiGateway.php
T
daveadmin c84ed2ed78 fix(tools): parse-harden Do Better Legal ask against leaky fine-tune output
The dbn-legal-agent-v3 fine-tune (Track 1 / family) emits a labelled-prose
template — duplicate `answer:` prefixes, markdown-escaped underscores (`\_`),
and a trailing raw JSON blob — rather than the strict JSON the Azure/gpt-4o
path produces via response_format. decodeJsonObject() returned null on that
invalid JSON, so ask() dumped the entire raw blob into `answer`.

Fix at the parse layer (no upstream response_format change, to avoid fighting
the fine-tune's training):
- dbnToolsRepairJsonText(): strip fences, drop only invalid `\_`/`\*` escapes,
  then balanced-brace scan collecting every top-level {...} (longest first) to
  recover an appended JSON object. Shared by both gateways' decodeJsonObject(),
  so all JSON tools benefit.
- dbnToolsParseLabeledFields(): parse labelled-prose into real fields when no
  JSON decodes, tolerating escaped key names and collapsing duplicate prefixes.
- ask() null-fallback now builds clean structured fields from the parsed prose
  instead of dumping raw; what_remains_uncertain becomes a proper list.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 17:36:35 +02:00

219 lines
7.1 KiB
PHP

<?php
declare(strict_types=1);
require_once __DIR__ . '/bootstrap.php';
final class DbnAzureOpenAiGateway
{
private array $config;
public function __construct(?array $config = null)
{
$this->config = $config ?: [
'endpoint' => rtrim((string)dbnToolsEnv('DBN_AZURE_OPENAI_ENDPOINT', ''), '/'),
'api_key' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_API_KEY', ''),
'api_version' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_API_VERSION', ''),
'chat_deployment' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_CHAT_DEPLOYMENT', ''),
'embedding_deployment' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_EMBEDDING_DEPLOYMENT', ''),
];
}
public function missingChatConfig(): array
{
$missing = [];
foreach (['endpoint', 'api_key', 'api_version', 'chat_deployment'] as $key) {
if (trim((string)($this->config[$key] ?? '')) === '') {
$missing[] = $key;
}
}
return $missing;
}
public function missingEmbeddingConfig(): array
{
$missing = [];
foreach (['endpoint', 'api_key', 'api_version', 'embedding_deployment'] as $key) {
if (trim((string)($this->config[$key] ?? '')) === '') {
$missing[] = $key;
}
}
return $missing;
}
public function withDeployment(string $deployment): static
{
$clone = clone $this;
$clone->config['chat_deployment'] = $deployment;
return $clone;
}
public function chatDeployment(): string
{
return (string)$this->config['chat_deployment'];
}
public function embeddingDeployment(): string
{
return (string)$this->config['embedding_deployment'];
}
public function requireChat(): void
{
$missing = $this->missingChatConfig();
if ($missing) {
dbnToolsAbort(
'Azure OpenAI chat gateway is missing configuration: ' . implode(', ', $missing) . '.',
503,
'azure_config_missing',
['missing' => $missing]
);
}
}
public function requireEmbedding(): void
{
$missing = $this->missingEmbeddingConfig();
if ($missing) {
dbnToolsAbort(
'Azure OpenAI embedding gateway is missing configuration: ' . implode(', ', $missing) . '.',
503,
'azure_embedding_config_missing',
['missing' => $missing]
);
}
}
public function embeddings(array|string $input, array $options = []): array
{
$this->requireEmbedding();
$url = $this->config['endpoint']
. '/openai/deployments/'
. rawurlencode((string)$this->config['embedding_deployment'])
. '/embeddings?api-version='
. rawurlencode((string)$this->config['api_version']);
return $this->postJson($url, ['input' => $input], (int)($options['timeout'] ?? 30));
}
public function chatText(array $messages, array $options = []): string
{
$response = $this->chat($messages, $options);
$content = $response['choices'][0]['message']['content'] ?? '';
if (!is_string($content) || trim($content) === '') {
throw new RuntimeException('Azure OpenAI returned an empty chat response.');
}
return trim($content);
}
public function chat(array $messages, array $options = []): array
{
$this->requireChat();
$payload = [
'messages' => $messages,
'temperature' => $options['temperature'] ?? 0.2,
'max_tokens' => $options['max_tokens'] ?? 1200,
];
if (!empty($options['json'])) {
$payload['response_format'] = ['type' => 'json_object'];
}
$url = $this->config['endpoint']
. '/openai/deployments/'
. rawurlencode((string)$this->config['chat_deployment'])
. '/chat/completions?api-version='
. rawurlencode((string)$this->config['api_version']);
return $this->postJson($url, $payload, (int)($options['timeout'] ?? 90));
}
public function ping(int $timeout = 8): bool
{
try {
$text = $this->chatText([
['role' => 'system', 'content' => 'Return one word only: ok'],
['role' => 'user', 'content' => 'health'],
], [
'temperature' => 0,
'max_tokens' => 5,
'timeout' => $timeout,
]);
return trim($text) !== '';
} catch (Throwable $e) {
error_log('DBN Azure health check failed: ' . $e->getMessage());
return false;
}
}
public function decodeJsonObject(string $content): ?array
{
return dbnToolsRepairJsonText($content);
}
private function postJson(string $url, array $payload, int $timeout): array
{
$body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
if ($body === false) {
throw new RuntimeException('Unable to encode Azure OpenAI request.');
}
$headers = [
'Content-Type: application/json',
'api-key: ' . $this->config['api_key'],
];
if (function_exists('curl_init')) {
$ch = curl_init($url);
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $body,
CURLOPT_HTTPHEADER => $headers,
CURLOPT_TIMEOUT => $timeout,
]);
$response = curl_exec($ch);
$code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
$error = curl_error($ch);
curl_close($ch);
if ($response === false) {
throw new RuntimeException('Azure OpenAI request failed: ' . $error);
}
return $this->decodeResponse($response, $code);
}
$context = stream_context_create([
'http' => [
'method' => 'POST',
'header' => implode("\r\n", $headers),
'content' => $body,
'timeout' => $timeout,
'ignore_errors' => true,
],
]);
$response = @file_get_contents($url, false, $context);
$code = 0;
if (isset($http_response_header[0]) && preg_match('/\s(\d{3})\s/', $http_response_header[0], $m)) {
$code = (int)$m[1];
}
if ($response === false) {
throw new RuntimeException('Azure OpenAI request failed.');
}
return $this->decodeResponse($response, $code);
}
private function decodeResponse(string $response, int $code): array
{
$decoded = json_decode($response, true);
if (!is_array($decoded)) {
throw new RuntimeException('Azure OpenAI returned non-JSON response.');
}
if ($code < 200 || $code >= 300) {
$message = $decoded['error']['message'] ?? ('HTTP ' . $code);
throw new RuntimeException('Azure OpenAI request failed: ' . $message);
}
return $decoded;
}
}