feat(transcribe): GPT cleanup pass + advanced options i18n

Adds optional post-transcription cleanup via GPT-4o/GPT-4o-mini to fix
mishearing errors, punctuation, and domain terms. Speaker role labelling
now accepts a deployment param. Adds i18n strings for advanced options
panel (task, VAD filter, Whisper model, AI cleanup) in all four languages.
Updates BvjAnalyzerAgent and DeepResearchAgent.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-18 07:23:01 +02:00
parent e32ee60e78
commit c4362738c1
5 changed files with 345 additions and 112 deletions
+57 -11
View File
@@ -27,6 +27,11 @@ $task = ($_POST['task'] ?? 'transcribe') === 'translate' ? 'translate' :
$vadFilter = !empty($_POST['vad_filter']) && $_POST['vad_filter'] !== '0';
$initPrompt = substr(trim((string)($_POST['initial_prompt'] ?? '')), 0, 500);
$allowedPostModels = ['', 'gpt-4o-mini', 'gpt-4o'];
$postModel = in_array($_POST['post_model'] ?? '', $allowedPostModels, true)
? (string)($_POST['post_model'] ?? '')
: '';
// ── Validate upload ───────────────────────────────────────────────────────────
if (empty($_FILES['audio']) || $_FILES['audio']['error'] !== UPLOAD_ERR_OK) {
@@ -114,6 +119,17 @@ if ($timeOffset > 0.0 && !empty($result['segments'])) {
unset($seg);
}
// ── Optional GPT cleanup pass ─────────────────────────────────────────────────
$cleanedBy = null;
if ($postModel !== '' && !empty($result['text'])) {
$cleaned = dbnCleanupTranscript($result['text'], $language, $initPrompt, $postModel);
if ($cleaned !== null) {
$result['text'] = $cleaned;
$cleanedBy = $postModel;
}
}
// ── Speaker role labelling (diarize + multiple speakers only) ─────────────────
$segments = $result['segments'] ?? [];
@@ -126,7 +142,8 @@ if ($numDetected < 2 && $segments) {
$speakerRoles = null;
if ($diarize && $numDetected > 1 && $segments) {
$speakerRoles = dbnLabelSpeakerRoles($segments);
$labelDeployment = $postModel ?: 'gpt-4o-mini';
$speakerRoles = dbnLabelSpeakerRoles($segments, $labelDeployment);
}
// ── Friendly engine label ─────────────────────────────────────────────────────
@@ -161,6 +178,7 @@ dbnToolsRespond([
'model' => $engineLabel,
'engine' => $engineUsed,
'latency_ms' => $latencyMs,
'cleaned_by' => $cleanedBy,
]);
@@ -313,7 +331,7 @@ function transcribeViaWhisperGpu(array $file, string $language, bool $diarize, i
}
function dbnLabelSpeakerRoles(array $segments): array
function dbnLabelSpeakerRoles(array $segments, string $deployment = 'gpt-4o-mini'): array
{
$sample = array_slice(
array_values(array_filter($segments, fn($s) => isset($s['speaker']))),
@@ -322,16 +340,11 @@ function dbnLabelSpeakerRoles(array $segments): array
if (!$sample) return [];
$lines = array_map(fn($s) => "[{$s['speaker']}] " . trim((string)($s['text'] ?? '')), $sample);
$azure = new DbnAzureOpenAiGateway();
$system = 'You are analyzing a legal proceeding transcript. '
. 'Based on the first segments, identify the role of each speaker. '
. 'Common roles in Norwegian legal proceedings: dommer (judge), advokat (lawyer), '
. 'forelder (parent), barn (child), sakkyndig (expert witness), '
. 'saksbehandler (caseworker), tolk (interpreter), vitne (witness), '
. 'prosessfullmektig (counsel). '
$azure = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
$system = 'Label speakers in this Norwegian legal transcript. '
. 'Return ONLY valid JSON: {"SPEAKER_00":"dommer","SPEAKER_01":"forelder"}. '
. 'Use Norwegian role names. Use "ukjent" if role cannot be determined. '
. 'Only include speakers present in the input.';
. 'Norwegian role names only — dommer, advokat, forelder, barn, sakkyndig, '
. 'saksbehandler, tolk, vitne, prosessfullmektig. Use "ukjent" if unclear.';
try {
$text = $azure->chatText([
@@ -345,3 +358,36 @@ function dbnLabelSpeakerRoles(array $segments): array
return [];
}
}
function dbnCleanupTranscript(string $text, string $language, string $vocabulary, string $deployment): ?string
{
$langName = match($language) {
'no', 'nb', 'nn' => 'Norwegian',
'en' => 'English',
'pl' => 'Polish',
'uk' => 'Ukrainian',
'sv' => 'Swedish',
'da' => 'Danish',
'de' => 'German',
'fr' => 'French',
default => 'Norwegian',
};
$vocabHint = $vocabulary !== '' ? " Domain terms to preserve correctly: {$vocabulary}." : '';
$system = "Fix transcription errors in this {$langName} text.{$vocabHint} "
. "Correct mishearing errors, run-on sentences, and punctuation. "
. "Preserve all meaning and the original language exactly. "
. "Return only the corrected transcript text, no commentary.";
try {
$azure = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
$result = $azure->chatText(
[['role' => 'system', 'content' => $system],
['role' => 'user', 'content' => $text]],
['temperature' => 0.1, 'max_tokens' => 4096]
);
return ($result !== '' && $result !== null) ? $result : null;
} catch (Throwable) {
return null;
}
}