Transcribe: audio-to-text tool with diarization and speaker role labelling
New sixth tool in the hub. Accepts MP3/WAV/OGG/M4A/FLAC/WEBM up to 200 MB, proxies to Whisper on cuttlefish GPU. Optional speaker separation with LLM role labelling (dommer, advokat, forelder, sakkyndig, etc. via GPT-4o-mini). Client-side TXT / SRT / VTT download from segment data. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,190 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/../includes/LegalTools.php';
|
||||
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
$validLangs = ['auto', 'no', 'en', 'sv', 'da', 'de', 'fr', 'es', 'pl'];
|
||||
$language = strtolower(trim((string)($_POST['language'] ?? 'auto')));
|
||||
if (!in_array($language, $validLangs, true)) $language = 'auto';
|
||||
|
||||
$diarize = !empty($_POST['diarize']) && $_POST['diarize'] !== '0';
|
||||
$numSpeakers = isset($_POST['num_speakers']) ? max(0, min(20, (int)$_POST['num_speakers'])) : 0;
|
||||
|
||||
// ── Validate upload ───────────────────────────────────────────────────────────
|
||||
|
||||
if (empty($_FILES['audio']) || $_FILES['audio']['error'] !== UPLOAD_ERR_OK) {
|
||||
$code = $_FILES['audio']['error'] ?? -1;
|
||||
$map = [
|
||||
UPLOAD_ERR_INI_SIZE => 'File exceeds server upload limit.',
|
||||
UPLOAD_ERR_FORM_SIZE => 'File exceeds form size limit.',
|
||||
UPLOAD_ERR_PARTIAL => 'File was only partially uploaded.',
|
||||
UPLOAD_ERR_NO_FILE => 'No audio file received.',
|
||||
];
|
||||
dbnToolsError($map[$code] ?? "Upload error (code {$code}).", 400, 'upload_error');
|
||||
}
|
||||
|
||||
$file = $_FILES['audio'];
|
||||
$maxBytes = 200 * 1024 * 1024;
|
||||
|
||||
if ($file['size'] > $maxBytes) {
|
||||
dbnToolsError('File too large. Maximum 200 MB.', 413, 'file_too_large');
|
||||
}
|
||||
|
||||
$allowedExts = ['mp3', 'wav', 'ogg', 'oga', 'm4a', 'mp4', 'flac', 'webm', 'aac'];
|
||||
$ext = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION));
|
||||
if (!in_array($ext, $allowedExts, true)) {
|
||||
dbnToolsError("Unsupported format: .{$ext}. Use MP3, WAV, OGG, M4A, FLAC, or WebM.", 415, 'unsupported_format');
|
||||
}
|
||||
|
||||
// ── Build Whisper request ─────────────────────────────────────────────────────
|
||||
|
||||
$whisperBase = 'http://194.93.49.14:20019';
|
||||
$endpoint = $diarize ? $whisperBase . '/transcribe/diarize' : $whisperBase . '/transcribe';
|
||||
|
||||
$boundary = '----DBN' . bin2hex(random_bytes(8));
|
||||
$body = "--{$boundary}\r\n";
|
||||
$body .= 'Content-Disposition: form-data; name="file"; filename="' . addslashes(basename($file['name'])) . '"' . "\r\n";
|
||||
$body .= "Content-Type: application/octet-stream\r\n\r\n";
|
||||
|
||||
$fileContents = file_get_contents($file['tmp_name']);
|
||||
if ($fileContents === false) {
|
||||
dbnToolsError('Could not read uploaded file.', 500, 'file_read_error');
|
||||
}
|
||||
$body .= $fileContents . "\r\n";
|
||||
|
||||
if ($language !== 'auto') {
|
||||
$body .= "--{$boundary}\r\n";
|
||||
$body .= "Content-Disposition: form-data; name=\"language\"\r\n\r\n";
|
||||
$body .= $language . "\r\n";
|
||||
}
|
||||
|
||||
if ($diarize && $numSpeakers > 1) {
|
||||
$body .= "--{$boundary}\r\n";
|
||||
$body .= "Content-Disposition: form-data; name=\"num_speakers\"\r\n\r\n";
|
||||
$body .= $numSpeakers . "\r\n";
|
||||
}
|
||||
|
||||
$body .= "--{$boundary}--\r\n";
|
||||
|
||||
// ── Call Whisper ──────────────────────────────────────────────────────────────
|
||||
|
||||
$t0 = microtime(true);
|
||||
|
||||
if (function_exists('curl_init')) {
|
||||
$ch = curl_init($endpoint);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_POST => true,
|
||||
CURLOPT_POSTFIELDS => $body,
|
||||
CURLOPT_HTTPHEADER => [
|
||||
"Content-Type: multipart/form-data; boundary={$boundary}",
|
||||
'Accept: application/json',
|
||||
],
|
||||
CURLOPT_TIMEOUT => 600,
|
||||
]);
|
||||
$whisperBody = curl_exec($ch);
|
||||
$httpCode = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
||||
$curlErr = curl_error($ch);
|
||||
curl_close($ch);
|
||||
|
||||
if ($whisperBody === false || $httpCode !== 200) {
|
||||
dbnToolsError('Whisper service error (HTTP ' . $httpCode . '): ' . $curlErr, 502, 'whisper_error');
|
||||
}
|
||||
} else {
|
||||
$ctx = stream_context_create([
|
||||
'http' => [
|
||||
'method' => 'POST',
|
||||
'timeout' => 600,
|
||||
'header' => "Content-Type: multipart/form-data; boundary={$boundary}\r\nAccept: application/json\r\n",
|
||||
'content' => $body,
|
||||
'ignore_errors' => true,
|
||||
],
|
||||
]);
|
||||
$whisperBody = @file_get_contents($endpoint, false, $ctx);
|
||||
|
||||
if ($whisperBody === false) {
|
||||
dbnToolsError('Whisper service unreachable. The GPU may be offline.', 502, 'whisper_unreachable');
|
||||
}
|
||||
}
|
||||
|
||||
$latencyMs = (int)round((microtime(true) - $t0) * 1000);
|
||||
|
||||
$whisper = json_decode($whisperBody, true);
|
||||
if (!is_array($whisper) || empty($whisper['text'])) {
|
||||
dbnToolsError('Empty or invalid response from Whisper.', 502, 'whisper_empty');
|
||||
}
|
||||
|
||||
// ── Speaker role labelling ────────────────────────────────────────────────────
|
||||
|
||||
$segments = is_array($whisper['segments'] ?? null) ? $whisper['segments'] : [];
|
||||
$numDetected = (int)($whisper['num_speakers'] ?? 1);
|
||||
|
||||
if ($numDetected < 2 && $segments) {
|
||||
$uniqueSpeakers = array_filter(array_unique(array_column($segments, 'speaker')));
|
||||
if (count($uniqueSpeakers) > 1) $numDetected = count($uniqueSpeakers);
|
||||
}
|
||||
|
||||
$speakerRoles = null;
|
||||
if ($diarize && $numDetected > 1 && $segments) {
|
||||
$speakerRoles = dbnLabelSpeakerRoles($segments);
|
||||
}
|
||||
|
||||
// ── Respond ───────────────────────────────────────────────────────────────────
|
||||
|
||||
dbnToolsLogMetadata([
|
||||
'tool' => 'transcribe',
|
||||
'language' => $language,
|
||||
'ok' => true,
|
||||
'latency_ms' => $latencyMs,
|
||||
]);
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'tool' => 'transcribe',
|
||||
'transcript' => (string)$whisper['text'],
|
||||
'segments' => $segments,
|
||||
'speaker_roles' => $speakerRoles,
|
||||
'num_speakers' => $numDetected,
|
||||
'language' => (string)($whisper['language'] ?? $language),
|
||||
'duration_sec' => round((float)($whisper['duration_seconds'] ?? 0), 2),
|
||||
'model' => (string)($whisper['model'] ?? 'whisper'),
|
||||
'latency_ms' => $latencyMs,
|
||||
]);
|
||||
|
||||
// ── Speaker role labelling helper ─────────────────────────────────────────────
|
||||
|
||||
function dbnLabelSpeakerRoles(array $segments): array
|
||||
{
|
||||
$sample = array_slice(
|
||||
array_values(array_filter($segments, fn($s) => isset($s['speaker']))),
|
||||
0, 20
|
||||
);
|
||||
if (!$sample) return [];
|
||||
|
||||
$lines = array_map(fn($s) => "[{$s['speaker']}] " . trim((string)($s['text'] ?? '')), $sample);
|
||||
$azure = new DbnAzureOpenAiGateway();
|
||||
$system = 'You are analyzing a legal proceeding transcript. '
|
||||
. 'Based on the first segments, identify the role of each speaker. '
|
||||
. 'Common roles in Norwegian legal proceedings: dommer (judge), advokat (lawyer), '
|
||||
. 'forelder (parent), barn (child), sakkyndig (expert witness), '
|
||||
. 'saksbehandler (caseworker), tolk (interpreter), vitne (witness), '
|
||||
. 'prosessfullmektig (counsel). '
|
||||
. 'Return ONLY valid JSON: {"SPEAKER_00":"dommer","SPEAKER_01":"forelder"}. '
|
||||
. 'Use Norwegian role names. Use "ukjent" if role cannot be determined. '
|
||||
. 'Only include speakers present in the input.';
|
||||
|
||||
try {
|
||||
$text = $azure->chatText([
|
||||
['role' => 'system', 'content' => $system],
|
||||
['role' => 'user', 'content' => implode("\n", $lines)],
|
||||
], ['temperature' => 0.1, 'max_tokens' => 200]);
|
||||
$cleaned = preg_replace('/^```(?:json)?\s*|\s*```$/m', '', trim($text));
|
||||
$json = json_decode($cleaned, true);
|
||||
return is_array($json) ? $json : [];
|
||||
} catch (Throwable) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
@@ -1103,3 +1103,112 @@ p {
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.03em;
|
||||
}
|
||||
|
||||
/* ── Transcribe tool ─────────────────────────────────────────────── */
|
||||
|
||||
.num-speakers-input {
|
||||
width: 4.5rem;
|
||||
padding: 0.25rem 0.5rem;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 6px;
|
||||
background: #fff;
|
||||
color: var(--ink);
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
|
||||
.transcript-roles {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.4rem;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.speaker-tag {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.3rem;
|
||||
font-size: 0.72rem;
|
||||
font-weight: 600;
|
||||
padding: 0.2rem 0.55rem;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.speaker-tag small {
|
||||
font-weight: 400;
|
||||
opacity: 0.75;
|
||||
}
|
||||
|
||||
.speaker-tag--0 { background: #dbeafe; color: #1d4ed8; }
|
||||
.speaker-tag--1 { background: #ede9fe; color: #6d28d9; }
|
||||
.speaker-tag--2 { background: #dcfce7; color: #166534; }
|
||||
.speaker-tag--3 { background: #fef9c3; color: #854d0e; }
|
||||
.speaker-tag--4 { background: #fee2e2; color: #991b1b; }
|
||||
.speaker-tag--5 { background: #e7f5f2; color: #0f766e; }
|
||||
|
||||
.transcript-box {
|
||||
background: var(--bg);
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
padding: 1rem;
|
||||
max-height: 400px;
|
||||
overflow-y: auto;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.transcript-text {
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.65;
|
||||
font-family: inherit;
|
||||
margin: 0;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
.segment-details {
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.segment-summary {
|
||||
font-size: 0.8rem;
|
||||
color: var(--muted);
|
||||
padding: 0.6rem 1rem;
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.segment-list {
|
||||
padding: 0.25rem 0.75rem 0.75rem;
|
||||
max-height: 280px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.segment-row {
|
||||
display: flex;
|
||||
gap: 0.6rem;
|
||||
align-items: baseline;
|
||||
padding: 0.2rem 0;
|
||||
font-size: 0.78rem;
|
||||
border-bottom: 1px solid var(--bg);
|
||||
}
|
||||
|
||||
.segment-time {
|
||||
color: var(--muted);
|
||||
font-family: ui-monospace, monospace;
|
||||
min-width: 7rem;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.segment-text {
|
||||
color: var(--ink);
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.transcript-downloads {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.5rem;
|
||||
margin-top: 0.75rem;
|
||||
}
|
||||
|
||||
+264
-1
@@ -4,6 +4,8 @@ const state = {
|
||||
};
|
||||
|
||||
let lastTimelineEvents = [];
|
||||
let lastAudioFile = null;
|
||||
let lastTranscriptData = null;
|
||||
|
||||
const tools = {
|
||||
ask: {
|
||||
@@ -56,6 +58,16 @@ const tools = {
|
||||
usesLanguage: false,
|
||||
badge: 'deterministic first',
|
||||
},
|
||||
transcribe: {
|
||||
kind: 'Audio Transcription',
|
||||
title: 'Transcribe audio',
|
||||
label: 'Audio file',
|
||||
endpoint: 'api/transcribe.php',
|
||||
payloadKey: null,
|
||||
placeholder: '',
|
||||
usesLanguage: false,
|
||||
badge: 'Whisper / GPU',
|
||||
},
|
||||
};
|
||||
|
||||
const els = {};
|
||||
@@ -91,6 +103,17 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
aliasSection: document.querySelector('#aliasSection'),
|
||||
addAliasRow: document.querySelector('#addAliasRow'),
|
||||
aliasRows: document.querySelector('#aliasRows'),
|
||||
audioZone: document.querySelector('#audioZone'),
|
||||
audioInput: document.querySelector('#audioInput'),
|
||||
audioPrompt: document.querySelector('#audioPrompt'),
|
||||
audioFileInfo: document.querySelector('#audioFileInfo'),
|
||||
audioFileName: document.querySelector('#audioFileName'),
|
||||
audioFileSize: document.querySelector('#audioFileSize'),
|
||||
audioClear: document.querySelector('#audioClear'),
|
||||
diarizeControl: document.querySelector('#diarizeControl'),
|
||||
diarizeCheck: document.querySelector('#diarizeCheck'),
|
||||
numSpeakersInput: document.querySelector('#numSpeakersInput'),
|
||||
transcribeLangControl: document.querySelector('#transcribeLangControl'),
|
||||
});
|
||||
|
||||
els.tabs.forEach((button) => {
|
||||
@@ -101,8 +124,12 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
els.healthButton.addEventListener('click', checkHealth);
|
||||
setupUpload();
|
||||
setupAliases();
|
||||
setupAudio();
|
||||
els.results.addEventListener('click', (e) => {
|
||||
if (e.target.closest('#exportCsvBtn')) exportTimelineCSV(lastTimelineEvents);
|
||||
if (e.target.closest('#dlTxt')) downloadTranscriptTxt();
|
||||
if (e.target.closest('#dlSrt')) downloadTranscriptSrt();
|
||||
if (e.target.closest('#dlVtt')) downloadTranscriptVtt();
|
||||
});
|
||||
setTool(state.activeTool);
|
||||
|
||||
@@ -132,8 +159,14 @@ function setTool(toolName) {
|
||||
els.redactionControl.classList.toggle('is-hidden', toolName !== 'redact');
|
||||
els.uploadZone.classList.toggle('is-hidden', toolName !== 'redact' && toolName !== 'timeline');
|
||||
els.aliasSection.classList.toggle('is-hidden', toolName !== 'redact');
|
||||
els.audioZone.classList.toggle('is-hidden', toolName !== 'transcribe');
|
||||
els.diarizeControl.classList.toggle('is-hidden', toolName !== 'transcribe');
|
||||
els.transcribeLangControl.classList.toggle('is-hidden', toolName !== 'transcribe');
|
||||
els.input.classList.toggle('is-hidden', toolName === 'transcribe');
|
||||
els.inputLabel.classList.toggle('is-hidden', toolName === 'transcribe');
|
||||
resetUpload();
|
||||
resetAliases();
|
||||
resetAudio();
|
||||
els.status.textContent = '';
|
||||
renderTrace([]);
|
||||
}
|
||||
@@ -163,6 +196,12 @@ async function submitPasscode(event) {
|
||||
|
||||
async function runTool(event) {
|
||||
event.preventDefault();
|
||||
|
||||
if (state.activeTool === 'transcribe') {
|
||||
await runTranscribe();
|
||||
return;
|
||||
}
|
||||
|
||||
const tool = tools[state.activeTool];
|
||||
const text = els.input.value.trim();
|
||||
if (!text) {
|
||||
@@ -389,7 +428,9 @@ async function postJson(url, payload) {
|
||||
function setBusy(isBusy) {
|
||||
const button = document.querySelector('#runButton');
|
||||
button.disabled = isBusy;
|
||||
button.textContent = isBusy ? 'Running...' : 'Run Tool';
|
||||
button.textContent = isBusy
|
||||
? (state.activeTool === 'transcribe' ? 'Transcribing...' : 'Running...')
|
||||
: 'Run Tool';
|
||||
}
|
||||
|
||||
function currentLanguage() {
|
||||
@@ -447,6 +488,10 @@ function renderMainFinding(data) {
|
||||
return `<p>${escapeHtml(data.what_we_found || '')}</p>`;
|
||||
}
|
||||
|
||||
function currentTranscribeLang() {
|
||||
return document.querySelector('input[name="transcribeLang"]:checked')?.value || 'auto';
|
||||
}
|
||||
|
||||
function renderEvidence(data) {
|
||||
const items = data.evidence_trail || data.sources || data.hits || [];
|
||||
if (!items.length) {
|
||||
@@ -513,6 +558,224 @@ function exportTimelineCSV(events) {
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async function runTranscribe() {
|
||||
if (!lastAudioFile) {
|
||||
els.status.textContent = 'Choose an audio file before transcribing.';
|
||||
return;
|
||||
}
|
||||
setBusy(true);
|
||||
renderTrace([{ label: 'Sending to Whisper', detail: 'Uploading audio to cuttlefish GPU…', status: 'running' }]);
|
||||
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('audio', lastAudioFile);
|
||||
formData.append('language', currentTranscribeLang());
|
||||
if (els.diarizeCheck?.checked) {
|
||||
formData.append('diarize', '1');
|
||||
const n = parseInt(els.numSpeakersInput?.value || '', 10);
|
||||
if (n >= 2) formData.append('num_speakers', String(n));
|
||||
}
|
||||
|
||||
const resp = await fetch('api/transcribe.php', {
|
||||
method: 'POST',
|
||||
credentials: 'same-origin',
|
||||
body: formData,
|
||||
});
|
||||
const data = await resp.json().catch(() => ({}));
|
||||
if (!resp.ok || !data.ok) {
|
||||
throw new Error(data.error?.message || `Transcription failed (HTTP ${resp.status}).`);
|
||||
}
|
||||
|
||||
lastTranscriptData = data;
|
||||
renderTranscriptResults(data);
|
||||
|
||||
const dur = data.duration_sec ? ` · Audio: ${Math.round(data.duration_sec)}s` : '';
|
||||
els.status.textContent = `Done in ${data.latency_ms || 0} ms${dur}.`;
|
||||
} catch (error) {
|
||||
els.status.textContent = error.message;
|
||||
renderTrace([{ label: 'Transcription error', detail: error.message, status: 'warning' }]);
|
||||
} finally {
|
||||
setBusy(false);
|
||||
}
|
||||
}
|
||||
|
||||
function renderTranscriptResults(data) {
|
||||
const speakerRoles = data.speaker_roles || {};
|
||||
const segments = data.segments || [];
|
||||
const hasSpeakers = segments.some((s) => s.speaker);
|
||||
|
||||
const speakerOrder = [...new Set(segments.filter((s) => s.speaker).map((s) => s.speaker))];
|
||||
|
||||
const rolesHtml = speakerOrder.length
|
||||
? `<p class="transcript-roles">${speakerOrder.map((id, i) => {
|
||||
const role = speakerRoles[id] || id;
|
||||
return `<span class="speaker-tag speaker-tag--${i % 6}">${escapeHtml(role)}<small>${escapeHtml(id)}</small></span>`;
|
||||
}).join('')}</p>`
|
||||
: '';
|
||||
|
||||
const segmentsHtml = hasSpeakers
|
||||
? `<details class="segment-details"><summary class="segment-summary">Segments (${segments.length})</summary>
|
||||
<div class="segment-list">${segments.map((seg) => {
|
||||
const idx = speakerOrder.indexOf(seg.speaker);
|
||||
const roleLabel = seg.speaker && speakerRoles[seg.speaker]
|
||||
? `${speakerRoles[seg.speaker]} (${seg.speaker})`
|
||||
: (seg.speaker || '');
|
||||
return `<div class="segment-row">
|
||||
<span class="segment-time">${fmtTime(seg.start)}–${fmtTime(seg.end)}</span>
|
||||
${seg.speaker ? `<span class="speaker-tag speaker-tag--${idx >= 0 ? idx % 6 : 0}">${escapeHtml(roleLabel)}</span>` : ''}
|
||||
<span class="segment-text">${escapeHtml(seg.text)}</span>
|
||||
</div>`;
|
||||
}).join('')}</div></details>`
|
||||
: '';
|
||||
|
||||
const dlSrtVtt = segments.length
|
||||
? `<button type="button" class="export-csv-btn" id="dlSrt">Download SRT</button>
|
||||
<button type="button" class="export-csv-btn" id="dlVtt">Download VTT</button>`
|
||||
: '';
|
||||
|
||||
els.results.innerHTML = `
|
||||
<section class="result-section">
|
||||
<h3>Transcript</h3>
|
||||
${rolesHtml}
|
||||
<div class="transcript-box"><pre class="transcript-text">${escapeHtml(data.transcript)}</pre></div>
|
||||
${segmentsHtml}
|
||||
<div class="transcript-downloads">
|
||||
<button type="button" class="export-csv-btn" id="dlTxt">Download TXT</button>
|
||||
${dlSrtVtt}
|
||||
</div>
|
||||
</section>`;
|
||||
|
||||
const traceMeta = [];
|
||||
if (data.duration_sec) traceMeta.push({ label: `Duration: ${Math.round(data.duration_sec)}s`, detail: '', status: 'complete' });
|
||||
if (data.language) traceMeta.push({ label: `Language: ${data.language}`, detail: '', status: 'complete' });
|
||||
if (data.num_speakers > 1) traceMeta.push({ label: `Speakers detected: ${data.num_speakers}`, detail: Object.entries(speakerRoles).map(([id, r]) => `${id}: ${r}`).join(', ') || '', status: 'complete' });
|
||||
if (data.model) traceMeta.push({ label: `Model: ${data.model}`, detail: '', status: 'complete' });
|
||||
renderTrace(traceMeta.length ? traceMeta : [{ label: 'Transcribed', detail: '', status: 'complete' }]);
|
||||
}
|
||||
|
||||
function fmtTime(secs) {
|
||||
const h = Math.floor(secs / 3600);
|
||||
const m = Math.floor((secs % 3600) / 60);
|
||||
const s = Math.floor(secs % 60);
|
||||
const parts = h > 0 ? [pad2(h), pad2(m), pad2(s)] : [pad2(m), pad2(s)];
|
||||
return parts.join(':');
|
||||
}
|
||||
|
||||
function pad2(n) { return String(n).padStart(2, '0'); }
|
||||
|
||||
function toSrtTime(secs) {
|
||||
const h = Math.floor(secs / 3600);
|
||||
const m = Math.floor((secs % 3600) / 60);
|
||||
const s = Math.floor(secs % 60);
|
||||
const ms = Math.round((secs % 1) * 1000);
|
||||
return `${pad2(h)}:${pad2(m)}:${pad2(s)},${String(ms).padStart(3, '0')}`;
|
||||
}
|
||||
|
||||
function toVttTime(secs) {
|
||||
return toSrtTime(secs).replace(',', '.');
|
||||
}
|
||||
|
||||
function downloadBlob(blob, filename) {
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = Object.assign(document.createElement('a'), { href: url, download: filename });
|
||||
a.click();
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
function downloadTranscriptTxt() {
|
||||
if (!lastTranscriptData) return;
|
||||
downloadBlob(new Blob([lastTranscriptData.transcript], { type: 'text/plain' }), 'transcript.txt');
|
||||
}
|
||||
|
||||
function downloadTranscriptSrt() {
|
||||
if (!lastTranscriptData?.segments?.length) return;
|
||||
const { segments, speaker_roles: roles = {} } = lastTranscriptData;
|
||||
const lines = segments.map((seg, i) => {
|
||||
const spk = seg.speaker ? `[${roles[seg.speaker] || seg.speaker}] ` : '';
|
||||
return `${i + 1}\n${toSrtTime(seg.start)} --> ${toSrtTime(seg.end)}\n${spk}${seg.text}\n`;
|
||||
});
|
||||
downloadBlob(new Blob([lines.join('\n')], { type: 'text/srt' }), 'transcript.srt');
|
||||
}
|
||||
|
||||
function downloadTranscriptVtt() {
|
||||
if (!lastTranscriptData?.segments?.length) return;
|
||||
const { segments, speaker_roles: roles = {} } = lastTranscriptData;
|
||||
const lines = ['WEBVTT\n'];
|
||||
segments.forEach((seg) => {
|
||||
const spk = seg.speaker ? `<v ${roles[seg.speaker] || seg.speaker}>` : '';
|
||||
lines.push(`${toVttTime(seg.start)} --> ${toVttTime(seg.end)}\n${spk}${seg.text}\n`);
|
||||
});
|
||||
downloadBlob(new Blob([lines.join('\n')], { type: 'text/vtt' }), 'transcript.vtt');
|
||||
}
|
||||
|
||||
function resetAudio() {
|
||||
lastAudioFile = null;
|
||||
if (!els.audioInput) return;
|
||||
els.audioInput.value = '';
|
||||
if (els.audioPrompt) els.audioPrompt.classList.remove('is-hidden');
|
||||
if (els.audioFileInfo) els.audioFileInfo.classList.add('is-hidden');
|
||||
if (els.audioFileName) els.audioFileName.textContent = '';
|
||||
if (els.audioFileSize) els.audioFileSize.textContent = '';
|
||||
}
|
||||
|
||||
function setupAudio() {
|
||||
if (!els.audioZone) return;
|
||||
|
||||
els.audioZone.addEventListener('dragover', (e) => {
|
||||
e.preventDefault();
|
||||
els.audioZone.classList.add('is-drag-over');
|
||||
});
|
||||
|
||||
els.audioZone.addEventListener('dragleave', (e) => {
|
||||
if (!els.audioZone.contains(e.relatedTarget)) {
|
||||
els.audioZone.classList.remove('is-drag-over');
|
||||
}
|
||||
});
|
||||
|
||||
els.audioZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
els.audioZone.classList.remove('is-drag-over');
|
||||
const f = e.dataTransfer?.files?.[0];
|
||||
if (f) handleAudio(f);
|
||||
});
|
||||
|
||||
els.audioZone.addEventListener('click', (e) => {
|
||||
if (e.target === els.audioClear || els.audioClear?.contains(e.target)) return;
|
||||
if (e.target.tagName === 'LABEL') return;
|
||||
els.audioInput.click();
|
||||
});
|
||||
|
||||
els.audioInput.addEventListener('change', () => {
|
||||
const f = els.audioInput.files?.[0];
|
||||
if (f) handleAudio(f);
|
||||
});
|
||||
|
||||
els.audioClear.addEventListener('click', () => {
|
||||
resetAudio();
|
||||
els.status.textContent = '';
|
||||
});
|
||||
}
|
||||
|
||||
function handleAudio(file) {
|
||||
const allowedExts = ['mp3', 'wav', 'ogg', 'oga', 'm4a', 'mp4', 'flac', 'webm', 'aac'];
|
||||
const ext = file.name.split('.').pop().toLowerCase();
|
||||
if (!allowedExts.includes(ext)) {
|
||||
els.status.textContent = `Unsupported format: .${ext}. Use MP3, WAV, OGG, M4A, FLAC, or WebM.`;
|
||||
return;
|
||||
}
|
||||
const sizeMB = file.size / 1024 / 1024;
|
||||
if (sizeMB > 200) {
|
||||
els.status.textContent = `File too large (${sizeMB.toFixed(1)} MB). Maximum 200 MB.`;
|
||||
return;
|
||||
}
|
||||
lastAudioFile = file;
|
||||
if (els.audioFileName) els.audioFileName.textContent = file.name;
|
||||
if (els.audioFileSize) els.audioFileSize.textContent = `${sizeMB.toFixed(1)} MB`;
|
||||
if (els.audioPrompt) els.audioPrompt.classList.add('is-hidden');
|
||||
if (els.audioFileInfo) els.audioFileInfo.classList.remove('is-hidden');
|
||||
els.status.textContent = `Ready: ${file.name} (${sizeMB.toFixed(1)} MB)`;
|
||||
}
|
||||
|
||||
function renderEntityCounts(counts = {}) {
|
||||
const entries = Object.entries(counts).filter(([, count]) => Number(count) > 0);
|
||||
if (!entries.length) {
|
||||
|
||||
@@ -61,7 +61,7 @@ $authenticated = dbnToolsIsAuthenticated();
|
||||
|
||||
<section class="cap-section">
|
||||
<div class="section-inner">
|
||||
<h2 class="section-heading">Five tools, one corpus</h2>
|
||||
<h2 class="section-heading">Six tools, one suite</h2>
|
||||
<div class="cap-grid">
|
||||
<div class="cap-card">
|
||||
<span class="cap-label">Ask</span>
|
||||
@@ -88,6 +88,11 @@ $authenticated = dbnToolsIsAuthenticated();
|
||||
<h3>Redact</h3>
|
||||
<p>Remove sensitive personal data with configurable Nordic / ECHR / Global profiles.</p>
|
||||
</div>
|
||||
<div class="cap-card">
|
||||
<span class="cap-label">Transcribe</span>
|
||||
<h3>Transcribe</h3>
|
||||
<p>Convert audio recordings to text with optional speaker separation and Norwegian role labelling.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
@@ -189,6 +194,10 @@ $authenticated = dbnToolsIsAuthenticated();
|
||||
<span>Redact</span>
|
||||
<small>Privacy</small>
|
||||
</button>
|
||||
<button type="button" class="tool-tab" data-tool="transcribe" aria-pressed="false">
|
||||
<span>Transcribe</span>
|
||||
<small>Audio</small>
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<section class="tool-panel" aria-labelledby="toolTitle">
|
||||
@@ -207,6 +216,20 @@ $authenticated = dbnToolsIsAuthenticated();
|
||||
<label><input type="radio" name="language" value="no"> Norsk</label>
|
||||
</div>
|
||||
|
||||
<div class="control-row is-hidden" id="transcribeLangControl">
|
||||
<span class="control-label">Language</span>
|
||||
<label><input type="radio" name="transcribeLang" value="auto" checked> Auto-detect</label>
|
||||
<label><input type="radio" name="transcribeLang" value="no"> Norsk</label>
|
||||
<label><input type="radio" name="transcribeLang" value="en"> English</label>
|
||||
</div>
|
||||
|
||||
<div class="control-row is-hidden" id="diarizeControl">
|
||||
<span class="control-label">Speakers</span>
|
||||
<label><input type="checkbox" id="diarizeCheck" name="diarize"> Separate speakers</label>
|
||||
<span class="control-label" style="margin-left:1.25rem">Count</span>
|
||||
<input type="number" id="numSpeakersInput" name="num_speakers" min="2" max="10" placeholder="auto" class="num-speakers-input" aria-label="Expected speaker count">
|
||||
</div>
|
||||
|
||||
<div class="control-row is-hidden" id="redactionControl">
|
||||
<span class="control-label">Mode</span>
|
||||
<label><input type="radio" name="redactionMode" value="standard" checked> Standard</label>
|
||||
@@ -218,6 +241,19 @@ $authenticated = dbnToolsIsAuthenticated();
|
||||
<label><input type="radio" name="redactionRegion" value="global"> Global</label>
|
||||
</div>
|
||||
|
||||
<div class="upload-zone is-hidden" id="audioZone" role="region" aria-label="Audio upload">
|
||||
<input type="file" id="audioInput" accept="audio/*,video/mp4,video/webm" aria-label="Choose audio file">
|
||||
<div id="audioPrompt" class="upload-prompt">
|
||||
<span class="upload-icon" aria-hidden="true">▶</span>
|
||||
<p>Drop audio file here, or <label for="audioInput" class="upload-browse">browse</label></p>
|
||||
<p class="upload-hint"><strong>MP3</strong>, <strong>WAV</strong>, <strong>OGG</strong>, <strong>M4A</strong>, <strong>FLAC</strong>, <strong>WEBM</strong> — max 200 MB</p>
|
||||
</div>
|
||||
<div id="audioFileInfo" class="upload-file is-hidden">
|
||||
<ul class="upload-file-list"><li id="audioFileLine"><span id="audioFileName" class="upload-filename"></span><span id="audioFileSize" class="upload-chars"></span></li></ul>
|
||||
<button type="button" id="audioClear" class="upload-clear" aria-label="Clear audio file">×</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="upload-zone is-hidden" id="uploadZone" role="region" aria-label="File upload">
|
||||
<input type="file" id="uploadInput" multiple accept=".pdf,.docx,.txt" aria-label="Choose files">
|
||||
<div id="uploadPrompt" class="upload-prompt">
|
||||
|
||||
Reference in New Issue
Block a user