feat(transcribe): GPT cleanup pass + advanced options i18n

Adds optional post-transcription cleanup via GPT-4o/GPT-4o-mini to fix mishearing errors, punctuation, and domain terms. Speaker role labelling now accepts a deployment param. Adds i18n strings for advanced options panel (task, VAD filter, Whisper model, AI cleanup) in all four languages. Updates BvjAnalyzerAgent and DeepResearchAgent. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 07:23:01 +02:00
parent e32ee60e78
commit c4362738c1
5 changed files with 345 additions and 112 deletions
@@ -27,6 +27,11 @@ $task        = ($_POST['task'] ?? 'transcribe') === 'translate' ? 'translate' :
 $vadFilter   = !empty($_POST['vad_filter']) && $_POST['vad_filter'] !== '0';
 $initPrompt  = substr(trim((string)($_POST['initial_prompt'] ?? '')), 0, 500);

+$allowedPostModels = ['', 'gpt-4o-mini', 'gpt-4o'];
+$postModel = in_array($_POST['post_model'] ?? '', $allowedPostModels, true)
+    ? (string)($_POST['post_model'] ?? '')
+    : '';
+
 // ── Validate upload ───────────────────────────────────────────────────────────

 if (empty($_FILES['audio']) || $_FILES['audio']['error'] !== UPLOAD_ERR_OK) {
@@ -114,6 +119,17 @@ if ($timeOffset > 0.0 && !empty($result['segments'])) {
    unset($seg);
 }

+// ── Optional GPT cleanup pass ─────────────────────────────────────────────────
+
+$cleanedBy = null;
+if ($postModel !== '' && !empty($result['text'])) {
+    $cleaned = dbnCleanupTranscript($result['text'], $language, $initPrompt, $postModel);
+    if ($cleaned !== null) {
+        $result['text'] = $cleaned;
+        $cleanedBy = $postModel;
+    }
+}
+
 // ── Speaker role labelling (diarize + multiple speakers only) ─────────────────

 $segments    = $result['segments']    ?? [];
@@ -126,7 +142,8 @@ if ($numDetected < 2 && $segments) {

 $speakerRoles = null;
 if ($diarize && $numDetected > 1 && $segments) {
-    $speakerRoles = dbnLabelSpeakerRoles($segments);
+    $labelDeployment = $postModel ?: 'gpt-4o-mini';
+    $speakerRoles = dbnLabelSpeakerRoles($segments, $labelDeployment);
 }

 // ── Friendly engine label ─────────────────────────────────────────────────────
@@ -161,6 +178,7 @@ dbnToolsRespond([
    'model'         => $engineLabel,
    'engine'        => $engineUsed,
    'latency_ms'    => $latencyMs,
+    'cleaned_by'    => $cleanedBy,
 ]);


@@ -313,7 +331,7 @@ function transcribeViaWhisperGpu(array $file, string $language, bool $diarize, i
 }


-function dbnLabelSpeakerRoles(array $segments): array
+function dbnLabelSpeakerRoles(array $segments, string $deployment = 'gpt-4o-mini'): array
 {
    $sample = array_slice(
        array_values(array_filter($segments, fn($s) => isset($s['speaker']))),
@@ -322,16 +340,11 @@ function dbnLabelSpeakerRoles(array $segments): array
    if (!$sample) return [];

    $lines  = array_map(fn($s) => "[{$s['speaker']}] " . trim((string)($s['text'] ?? '')), $sample);
-    $azure  = new DbnAzureOpenAiGateway();
-    $system = 'You are analyzing a legal proceeding transcript. '
-        . 'Based on the first segments, identify the role of each speaker. '
-        . 'Common roles in Norwegian legal proceedings: dommer (judge), advokat (lawyer), '
-        . 'forelder (parent), barn (child), sakkyndig (expert witness), '
-        . 'saksbehandler (caseworker), tolk (interpreter), vitne (witness), '
-        . 'prosessfullmektig (counsel). '
+    $azure  = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
+    $system = 'Label speakers in this Norwegian legal transcript. '
        . 'Return ONLY valid JSON: {"SPEAKER_00":"dommer","SPEAKER_01":"forelder"}. '
-        . 'Use Norwegian role names. Use "ukjent" if role cannot be determined. '
-        . 'Only include speakers present in the input.';
+        . 'Norwegian role names only — dommer, advokat, forelder, barn, sakkyndig, '
+        . 'saksbehandler, tolk, vitne, prosessfullmektig. Use "ukjent" if unclear.';

    try {
        $text    = $azure->chatText([
@@ -345,3 +358,36 @@ function dbnLabelSpeakerRoles(array $segments): array
        return [];
    }
 }
+
+
+function dbnCleanupTranscript(string $text, string $language, string $vocabulary, string $deployment): ?string
+{
+    $langName = match($language) {
+        'no', 'nb', 'nn' => 'Norwegian',
+        'en'             => 'English',
+        'pl'             => 'Polish',
+        'uk'             => 'Ukrainian',
+        'sv'             => 'Swedish',
+        'da'             => 'Danish',
+        'de'             => 'German',
+        'fr'             => 'French',
+        default          => 'Norwegian',
+    };
+    $vocabHint = $vocabulary !== '' ? " Domain terms to preserve correctly: {$vocabulary}." : '';
+    $system = "Fix transcription errors in this {$langName} text.{$vocabHint} "
+        . "Correct mishearing errors, run-on sentences, and punctuation. "
+        . "Preserve all meaning and the original language exactly. "
+        . "Return only the corrected transcript text, no commentary.";
+
+    try {
+        $azure = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
+        $result = $azure->chatText(
+            [['role' => 'system', 'content' => $system],
+             ['role' => 'user',   'content' => $text]],
+            ['temperature' => 0.1, 'max_tokens' => 4096]
+        );
+        return ($result !== '' && $result !== null) ? $result : null;
+    } catch (Throwable) {
+        return null;
+    }
+}