Files
dobetternorge-tools/api/translate.php
T

177 lines
6.5 KiB
PHP

<?php
declare(strict_types=1);
require_once __DIR__ . '/../includes/bootstrap.php';
require_once __DIR__ . '/../includes/AzureOpenAiGateway.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
@ini_set('output_buffering', '0');
@ini_set('zlib.output_compression', '0');
@ini_set('implicit_flush', '1');
while (ob_get_level() > 0) { @ob_end_clean(); }
ob_implicit_flush(true);
header('Content-Type: application/x-ndjson; charset=utf-8');
header('Cache-Control: no-store');
header('X-Accel-Buffering: no');
$startTime = microtime(true);
$language = 'en';
$emit = function (string $event, array $payload = []) use ($startTime): void {
$payload['event'] = $event;
$payload['t_ms'] = (int)round((microtime(true) - $startTime) * 1000);
echo json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) . "\n";
@flush();
};
try {
$input = dbnToolsJsonInput(400000);
$language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
$sourceLang = dbnToolsNormalizeLanguage($input['source_lang'] ?? 'no');
$targetLang = dbnToolsNormalizeLanguage($input['target_lang'] ?? 'en');
$allowedDocTypes = ['auto','barnevernet','adopsjon','emergency','samvær','fylkesnemnd','other'];
$docType = (string)($input['doc_type'] ?? 'auto');
if (!in_array($docType, $allowedDocTypes, true)) {
$docType = 'auto';
}
if ($sourceLang === $targetLang) {
throw new DbnToolsHttpException(
'Source and target languages must be different.',
422, 'same_language'
);
}
$text = dbnToolsInjectDocContent($input, dbnToolsString($input, 'text', 200000, false));
if (mb_strlen(trim($text), 'UTF-8') < 10) {
throw new DbnToolsHttpException(
'Please paste text or upload a file to translate.',
422, 'empty_text'
);
}
$ftUid = dbnToolsFreeTierCheck('translate');
$ftRemaining = dbnToolsFreeTierDeduct($ftUid, 'translate');
if ($ftRemaining >= 0) {
header('X-Credits-Remaining: ' . $ftRemaining);
}
$emit('start', [
'mode' => 'translate',
'language' => $language,
'source_lang' => $sourceLang,
'target_lang' => $targetLang,
'doc_type' => $docType,
'chars' => mb_strlen($text, 'UTF-8'),
]);
$emit('progress', ['step' => 'translating', 'detail' => 'Translating…']);
$sourceName = dbnToolsLanguageName($sourceLang);
$targetName = dbnToolsLanguageName($targetLang);
$docTypeHint = $docType !== 'auto'
? "The document is of type: {$docType}. Apply appropriate Norwegian family-law terminology for this context."
: '';
$systemPrompt = <<<PROMPT
You are a professional legal translator specialising in Norwegian family law, ECHR, and child-welfare proceedings.
Task: Translate the provided text from {$sourceName} into {$targetName}.
Rules:
1. Preserve ALL Norwegian statute references verbatim as proper nouns (barnevernsloven, bvl., BRL, EMK, barnekonvensjonen, § numbers, Høyesterett, Fylkesnemnda, Barnevernet, Statsforvalteren, Bufdir, NAV, etc.).
2. Maintain formal legal register throughout.
3. Translate the complete text faithfully — do NOT summarise, add commentary, or omit any content.
4. If a Norwegian legal term has no natural equivalent in {$targetName}, translate it as closely as possible and add a brief translator's note in square brackets, e.g. [barnevernstjenesten = the Child Welfare Service, local authority].
5. After translating, list any such terms requiring explanation in the annotations array.
{$docTypeHint}
Respond with a valid JSON object:
{
"translated_text": "<full translation>",
"annotations": [
{"term": "<Norwegian term>", "explanation": "<plain-language explanation in {$targetName}>"}
],
"disclaimer": "<one-sentence AI disclaimer in {$targetName}>"
}
If no terms require annotation, return an empty array for "annotations".
PROMPT;
$azure = (new DbnAzureOpenAiGateway())->withDeployment('gpt-4o-mini');
$chars = mb_strlen($text, 'UTF-8');
$maxTokens = min(8000, max(1500, (int)($chars * 1.4)));
$response = $azure->chat([
['role' => 'system', 'content' => $systemPrompt],
['role' => 'user', 'content' => $text],
], [
'json' => true,
'temperature' => 0.05,
'max_tokens' => $maxTokens,
'timeout' => 120,
]);
$rawContent = $response['choices'][0]['message']['content'] ?? '';
$decoded = $azure->decodeJsonObject($rawContent);
if ($decoded === null || empty($decoded['translated_text'])) {
throw new DbnToolsHttpException(
'Translation model returned an unexpected response. Please try again.',
502, 'bad_response'
);
}
$result = [
'ok' => true,
'translated_text' => trim((string)($decoded['translated_text'] ?? '')),
'annotations' => is_array($decoded['annotations'] ?? null) ? $decoded['annotations'] : [],
'disclaimer' => (string)($decoded['disclaimer'] ?? ''),
'source_lang' => $sourceLang,
'target_lang' => $targetLang,
'doc_type' => $docType,
'model' => 'gpt-4o-mini',
'latency_ms' => (int)round((microtime(true) - $startTime) * 1000),
];
dbnToolsLogMetadata([
'tool' => 'translate',
'language' => $language,
'ok' => true,
'latency_ms' => $result['latency_ms'],
'source_lang' => $sourceLang,
'target_lang' => $targetLang,
'deployment' => 'gpt-4o-mini',
]);
$emit('final', ['result' => $result]);
} catch (DbnToolsHttpException $e) {
$latency = (int)round((microtime(true) - $startTime) * 1000);
dbnToolsLogMetadata([
'tool' => 'translate',
'language' => $language,
'ok' => false,
'latency_ms' => $latency,
'error_code' => $e->errorCode,
]);
$emit('error', ['code' => $e->errorCode, 'message' => $e->getMessage(), 'status' => $e->status]);
} catch (Throwable $e) {
error_log('translate fatal: ' . $e->getMessage());
$latency = (int)round((microtime(true) - $startTime) * 1000);
dbnToolsLogMetadata([
'tool' => 'translate',
'language' => $language,
'ok' => false,
'latency_ms' => $latency,
'error_code' => 'internal_error',
]);
$emit('error', ['code' => 'internal_error', 'message' => 'Translation could not complete this request.']);
}