feat(transcribe): GPT cleanup pass + advanced options i18n
Adds optional post-transcription cleanup via GPT-4o/GPT-4o-mini to fix mishearing errors, punctuation, and domain terms. Speaker role labelling now accepts a deployment param. Adds i18n strings for advanced options panel (task, VAD filter, Whisper model, AI cleanup) in all four languages. Updates BvjAnalyzerAgent and DeepResearchAgent. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+57
-11
@@ -27,6 +27,11 @@ $task = ($_POST['task'] ?? 'transcribe') === 'translate' ? 'translate' :
|
||||
$vadFilter = !empty($_POST['vad_filter']) && $_POST['vad_filter'] !== '0';
|
||||
$initPrompt = substr(trim((string)($_POST['initial_prompt'] ?? '')), 0, 500);
|
||||
|
||||
$allowedPostModels = ['', 'gpt-4o-mini', 'gpt-4o'];
|
||||
$postModel = in_array($_POST['post_model'] ?? '', $allowedPostModels, true)
|
||||
? (string)($_POST['post_model'] ?? '')
|
||||
: '';
|
||||
|
||||
// ── Validate upload ───────────────────────────────────────────────────────────
|
||||
|
||||
if (empty($_FILES['audio']) || $_FILES['audio']['error'] !== UPLOAD_ERR_OK) {
|
||||
@@ -114,6 +119,17 @@ if ($timeOffset > 0.0 && !empty($result['segments'])) {
|
||||
unset($seg);
|
||||
}
|
||||
|
||||
// ── Optional GPT cleanup pass ─────────────────────────────────────────────────
|
||||
|
||||
$cleanedBy = null;
|
||||
if ($postModel !== '' && !empty($result['text'])) {
|
||||
$cleaned = dbnCleanupTranscript($result['text'], $language, $initPrompt, $postModel);
|
||||
if ($cleaned !== null) {
|
||||
$result['text'] = $cleaned;
|
||||
$cleanedBy = $postModel;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Speaker role labelling (diarize + multiple speakers only) ─────────────────
|
||||
|
||||
$segments = $result['segments'] ?? [];
|
||||
@@ -126,7 +142,8 @@ if ($numDetected < 2 && $segments) {
|
||||
|
||||
$speakerRoles = null;
|
||||
if ($diarize && $numDetected > 1 && $segments) {
|
||||
$speakerRoles = dbnLabelSpeakerRoles($segments);
|
||||
$labelDeployment = $postModel ?: 'gpt-4o-mini';
|
||||
$speakerRoles = dbnLabelSpeakerRoles($segments, $labelDeployment);
|
||||
}
|
||||
|
||||
// ── Friendly engine label ─────────────────────────────────────────────────────
|
||||
@@ -161,6 +178,7 @@ dbnToolsRespond([
|
||||
'model' => $engineLabel,
|
||||
'engine' => $engineUsed,
|
||||
'latency_ms' => $latencyMs,
|
||||
'cleaned_by' => $cleanedBy,
|
||||
]);
|
||||
|
||||
|
||||
@@ -313,7 +331,7 @@ function transcribeViaWhisperGpu(array $file, string $language, bool $diarize, i
|
||||
}
|
||||
|
||||
|
||||
function dbnLabelSpeakerRoles(array $segments): array
|
||||
function dbnLabelSpeakerRoles(array $segments, string $deployment = 'gpt-4o-mini'): array
|
||||
{
|
||||
$sample = array_slice(
|
||||
array_values(array_filter($segments, fn($s) => isset($s['speaker']))),
|
||||
@@ -322,16 +340,11 @@ function dbnLabelSpeakerRoles(array $segments): array
|
||||
if (!$sample) return [];
|
||||
|
||||
$lines = array_map(fn($s) => "[{$s['speaker']}] " . trim((string)($s['text'] ?? '')), $sample);
|
||||
$azure = new DbnAzureOpenAiGateway();
|
||||
$system = 'You are analyzing a legal proceeding transcript. '
|
||||
. 'Based on the first segments, identify the role of each speaker. '
|
||||
. 'Common roles in Norwegian legal proceedings: dommer (judge), advokat (lawyer), '
|
||||
. 'forelder (parent), barn (child), sakkyndig (expert witness), '
|
||||
. 'saksbehandler (caseworker), tolk (interpreter), vitne (witness), '
|
||||
. 'prosessfullmektig (counsel). '
|
||||
$azure = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
|
||||
$system = 'Label speakers in this Norwegian legal transcript. '
|
||||
. 'Return ONLY valid JSON: {"SPEAKER_00":"dommer","SPEAKER_01":"forelder"}. '
|
||||
. 'Use Norwegian role names. Use "ukjent" if role cannot be determined. '
|
||||
. 'Only include speakers present in the input.';
|
||||
. 'Norwegian role names only — dommer, advokat, forelder, barn, sakkyndig, '
|
||||
. 'saksbehandler, tolk, vitne, prosessfullmektig. Use "ukjent" if unclear.';
|
||||
|
||||
try {
|
||||
$text = $azure->chatText([
|
||||
@@ -345,3 +358,36 @@ function dbnLabelSpeakerRoles(array $segments): array
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function dbnCleanupTranscript(string $text, string $language, string $vocabulary, string $deployment): ?string
|
||||
{
|
||||
$langName = match($language) {
|
||||
'no', 'nb', 'nn' => 'Norwegian',
|
||||
'en' => 'English',
|
||||
'pl' => 'Polish',
|
||||
'uk' => 'Ukrainian',
|
||||
'sv' => 'Swedish',
|
||||
'da' => 'Danish',
|
||||
'de' => 'German',
|
||||
'fr' => 'French',
|
||||
default => 'Norwegian',
|
||||
};
|
||||
$vocabHint = $vocabulary !== '' ? " Domain terms to preserve correctly: {$vocabulary}." : '';
|
||||
$system = "Fix transcription errors in this {$langName} text.{$vocabHint} "
|
||||
. "Correct mishearing errors, run-on sentences, and punctuation. "
|
||||
. "Preserve all meaning and the original language exactly. "
|
||||
. "Return only the corrected transcript text, no commentary.";
|
||||
|
||||
try {
|
||||
$azure = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
|
||||
$result = $azure->chatText(
|
||||
[['role' => 'system', 'content' => $system],
|
||||
['role' => 'user', 'content' => $text]],
|
||||
['temperature' => 0.1, 'max_tokens' => 4096]
|
||||
);
|
||||
return ($result !== '' && $result !== null) ? $result : null;
|
||||
} catch (Throwable) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user