diff --git a/api/transcribe.php b/api/transcribe.php new file mode 100644 index 0000000..42cf177 --- /dev/null +++ b/api/transcribe.php @@ -0,0 +1,190 @@ + 'File exceeds server upload limit.', + UPLOAD_ERR_FORM_SIZE => 'File exceeds form size limit.', + UPLOAD_ERR_PARTIAL => 'File was only partially uploaded.', + UPLOAD_ERR_NO_FILE => 'No audio file received.', + ]; + dbnToolsError($map[$code] ?? "Upload error (code {$code}).", 400, 'upload_error'); +} + +$file = $_FILES['audio']; +$maxBytes = 200 * 1024 * 1024; + +if ($file['size'] > $maxBytes) { + dbnToolsError('File too large. Maximum 200 MB.', 413, 'file_too_large'); +} + +$allowedExts = ['mp3', 'wav', 'ogg', 'oga', 'm4a', 'mp4', 'flac', 'webm', 'aac']; +$ext = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION)); +if (!in_array($ext, $allowedExts, true)) { + dbnToolsError("Unsupported format: .{$ext}. Use MP3, WAV, OGG, M4A, FLAC, or WebM.", 415, 'unsupported_format'); +} + +// ── Build Whisper request ───────────────────────────────────────────────────── + +$whisperBase = 'http://194.93.49.14:20019'; +$endpoint = $diarize ? $whisperBase . '/transcribe/diarize' : $whisperBase . '/transcribe'; + +$boundary = '----DBN' . bin2hex(random_bytes(8)); +$body = "--{$boundary}\r\n"; +$body .= 'Content-Disposition: form-data; name="file"; filename="' . addslashes(basename($file['name'])) . '"' . "\r\n"; +$body .= "Content-Type: application/octet-stream\r\n\r\n"; + +$fileContents = file_get_contents($file['tmp_name']); +if ($fileContents === false) { + dbnToolsError('Could not read uploaded file.', 500, 'file_read_error'); +} +$body .= $fileContents . "\r\n"; + +if ($language !== 'auto') { + $body .= "--{$boundary}\r\n"; + $body .= "Content-Disposition: form-data; name=\"language\"\r\n\r\n"; + $body .= $language . "\r\n"; +} + +if ($diarize && $numSpeakers > 1) { + $body .= "--{$boundary}\r\n"; + $body .= "Content-Disposition: form-data; name=\"num_speakers\"\r\n\r\n"; + $body .= $numSpeakers . "\r\n"; +} + +$body .= "--{$boundary}--\r\n"; + +// ── Call Whisper ────────────────────────────────────────────────────────────── + +$t0 = microtime(true); + +if (function_exists('curl_init')) { + $ch = curl_init($endpoint); + curl_setopt_array($ch, [ + CURLOPT_RETURNTRANSFER => true, + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => $body, + CURLOPT_HTTPHEADER => [ + "Content-Type: multipart/form-data; boundary={$boundary}", + 'Accept: application/json', + ], + CURLOPT_TIMEOUT => 600, + ]); + $whisperBody = curl_exec($ch); + $httpCode = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE); + $curlErr = curl_error($ch); + curl_close($ch); + + if ($whisperBody === false || $httpCode !== 200) { + dbnToolsError('Whisper service error (HTTP ' . $httpCode . '): ' . $curlErr, 502, 'whisper_error'); + } +} else { + $ctx = stream_context_create([ + 'http' => [ + 'method' => 'POST', + 'timeout' => 600, + 'header' => "Content-Type: multipart/form-data; boundary={$boundary}\r\nAccept: application/json\r\n", + 'content' => $body, + 'ignore_errors' => true, + ], + ]); + $whisperBody = @file_get_contents($endpoint, false, $ctx); + + if ($whisperBody === false) { + dbnToolsError('Whisper service unreachable. The GPU may be offline.', 502, 'whisper_unreachable'); + } +} + +$latencyMs = (int)round((microtime(true) - $t0) * 1000); + +$whisper = json_decode($whisperBody, true); +if (!is_array($whisper) || empty($whisper['text'])) { + dbnToolsError('Empty or invalid response from Whisper.', 502, 'whisper_empty'); +} + +// ── Speaker role labelling ──────────────────────────────────────────────────── + +$segments = is_array($whisper['segments'] ?? null) ? $whisper['segments'] : []; +$numDetected = (int)($whisper['num_speakers'] ?? 1); + +if ($numDetected < 2 && $segments) { + $uniqueSpeakers = array_filter(array_unique(array_column($segments, 'speaker'))); + if (count($uniqueSpeakers) > 1) $numDetected = count($uniqueSpeakers); +} + +$speakerRoles = null; +if ($diarize && $numDetected > 1 && $segments) { + $speakerRoles = dbnLabelSpeakerRoles($segments); +} + +// ── Respond ─────────────────────────────────────────────────────────────────── + +dbnToolsLogMetadata([ + 'tool' => 'transcribe', + 'language' => $language, + 'ok' => true, + 'latency_ms' => $latencyMs, +]); + +dbnToolsRespond([ + 'ok' => true, + 'tool' => 'transcribe', + 'transcript' => (string)$whisper['text'], + 'segments' => $segments, + 'speaker_roles' => $speakerRoles, + 'num_speakers' => $numDetected, + 'language' => (string)($whisper['language'] ?? $language), + 'duration_sec' => round((float)($whisper['duration_seconds'] ?? 0), 2), + 'model' => (string)($whisper['model'] ?? 'whisper'), + 'latency_ms' => $latencyMs, +]); + +// ── Speaker role labelling helper ───────────────────────────────────────────── + +function dbnLabelSpeakerRoles(array $segments): array +{ + $sample = array_slice( + array_values(array_filter($segments, fn($s) => isset($s['speaker']))), + 0, 20 + ); + if (!$sample) return []; + + $lines = array_map(fn($s) => "[{$s['speaker']}] " . trim((string)($s['text'] ?? '')), $sample); + $azure = new DbnAzureOpenAiGateway(); + $system = 'You are analyzing a legal proceeding transcript. ' + . 'Based on the first segments, identify the role of each speaker. ' + . 'Common roles in Norwegian legal proceedings: dommer (judge), advokat (lawyer), ' + . 'forelder (parent), barn (child), sakkyndig (expert witness), ' + . 'saksbehandler (caseworker), tolk (interpreter), vitne (witness), ' + . 'prosessfullmektig (counsel). ' + . 'Return ONLY valid JSON: {"SPEAKER_00":"dommer","SPEAKER_01":"forelder"}. ' + . 'Use Norwegian role names. Use "ukjent" if role cannot be determined. ' + . 'Only include speakers present in the input.'; + + try { + $text = $azure->chatText([ + ['role' => 'system', 'content' => $system], + ['role' => 'user', 'content' => implode("\n", $lines)], + ], ['temperature' => 0.1, 'max_tokens' => 200]); + $cleaned = preg_replace('/^```(?:json)?\s*|\s*```$/m', '', trim($text)); + $json = json_decode($cleaned, true); + return is_array($json) ? $json : []; + } catch (Throwable) { + return []; + } +} diff --git a/assets/css/tools.css b/assets/css/tools.css index 2f8c4fc..3b1f3a6 100644 --- a/assets/css/tools.css +++ b/assets/css/tools.css @@ -1103,3 +1103,112 @@ p { text-transform: uppercase; letter-spacing: 0.03em; } + +/* ── Transcribe tool ─────────────────────────────────────────────── */ + +.num-speakers-input { + width: 4.5rem; + padding: 0.25rem 0.5rem; + border: 1px solid var(--line); + border-radius: 6px; + background: #fff; + color: var(--ink); + font-size: 0.85rem; +} + +.transcript-roles { + display: flex; + flex-wrap: wrap; + gap: 0.4rem; + margin-bottom: 0.75rem; +} + +.speaker-tag { + display: inline-flex; + align-items: center; + gap: 0.3rem; + font-size: 0.72rem; + font-weight: 600; + padding: 0.2rem 0.55rem; + border-radius: 4px; +} + +.speaker-tag small { + font-weight: 400; + opacity: 0.75; +} + +.speaker-tag--0 { background: #dbeafe; color: #1d4ed8; } +.speaker-tag--1 { background: #ede9fe; color: #6d28d9; } +.speaker-tag--2 { background: #dcfce7; color: #166534; } +.speaker-tag--3 { background: #fef9c3; color: #854d0e; } +.speaker-tag--4 { background: #fee2e2; color: #991b1b; } +.speaker-tag--5 { background: #e7f5f2; color: #0f766e; } + +.transcript-box { + background: var(--bg); + border: 1px solid var(--line); + border-radius: 8px; + padding: 1rem; + max-height: 400px; + overflow-y: auto; + margin-bottom: 0.75rem; +} + +.transcript-text { + white-space: pre-wrap; + word-break: break-word; + font-size: 0.875rem; + line-height: 1.65; + font-family: inherit; + margin: 0; + color: var(--ink); +} + +.segment-details { + border: 1px solid var(--line); + border-radius: 8px; + margin-bottom: 0.75rem; +} + +.segment-summary { + font-size: 0.8rem; + color: var(--muted); + padding: 0.6rem 1rem; + cursor: pointer; + user-select: none; +} + +.segment-list { + padding: 0.25rem 0.75rem 0.75rem; + max-height: 280px; + overflow-y: auto; +} + +.segment-row { + display: flex; + gap: 0.6rem; + align-items: baseline; + padding: 0.2rem 0; + font-size: 0.78rem; + border-bottom: 1px solid var(--bg); +} + +.segment-time { + color: var(--muted); + font-family: ui-monospace, monospace; + min-width: 7rem; + flex-shrink: 0; +} + +.segment-text { + color: var(--ink); + line-height: 1.4; +} + +.transcript-downloads { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + margin-top: 0.75rem; +} diff --git a/assets/js/tools.js b/assets/js/tools.js index a3adbaf..2adb355 100644 --- a/assets/js/tools.js +++ b/assets/js/tools.js @@ -4,6 +4,8 @@ const state = { }; let lastTimelineEvents = []; +let lastAudioFile = null; +let lastTranscriptData = null; const tools = { ask: { @@ -56,6 +58,16 @@ const tools = { usesLanguage: false, badge: 'deterministic first', }, + transcribe: { + kind: 'Audio Transcription', + title: 'Transcribe audio', + label: 'Audio file', + endpoint: 'api/transcribe.php', + payloadKey: null, + placeholder: '', + usesLanguage: false, + badge: 'Whisper / GPU', + }, }; const els = {}; @@ -91,6 +103,17 @@ document.addEventListener('DOMContentLoaded', () => { aliasSection: document.querySelector('#aliasSection'), addAliasRow: document.querySelector('#addAliasRow'), aliasRows: document.querySelector('#aliasRows'), + audioZone: document.querySelector('#audioZone'), + audioInput: document.querySelector('#audioInput'), + audioPrompt: document.querySelector('#audioPrompt'), + audioFileInfo: document.querySelector('#audioFileInfo'), + audioFileName: document.querySelector('#audioFileName'), + audioFileSize: document.querySelector('#audioFileSize'), + audioClear: document.querySelector('#audioClear'), + diarizeControl: document.querySelector('#diarizeControl'), + diarizeCheck: document.querySelector('#diarizeCheck'), + numSpeakersInput: document.querySelector('#numSpeakersInput'), + transcribeLangControl: document.querySelector('#transcribeLangControl'), }); els.tabs.forEach((button) => { @@ -101,8 +124,12 @@ document.addEventListener('DOMContentLoaded', () => { els.healthButton.addEventListener('click', checkHealth); setupUpload(); setupAliases(); + setupAudio(); els.results.addEventListener('click', (e) => { if (e.target.closest('#exportCsvBtn')) exportTimelineCSV(lastTimelineEvents); + if (e.target.closest('#dlTxt')) downloadTranscriptTxt(); + if (e.target.closest('#dlSrt')) downloadTranscriptSrt(); + if (e.target.closest('#dlVtt')) downloadTranscriptVtt(); }); setTool(state.activeTool); @@ -132,8 +159,14 @@ function setTool(toolName) { els.redactionControl.classList.toggle('is-hidden', toolName !== 'redact'); els.uploadZone.classList.toggle('is-hidden', toolName !== 'redact' && toolName !== 'timeline'); els.aliasSection.classList.toggle('is-hidden', toolName !== 'redact'); + els.audioZone.classList.toggle('is-hidden', toolName !== 'transcribe'); + els.diarizeControl.classList.toggle('is-hidden', toolName !== 'transcribe'); + els.transcribeLangControl.classList.toggle('is-hidden', toolName !== 'transcribe'); + els.input.classList.toggle('is-hidden', toolName === 'transcribe'); + els.inputLabel.classList.toggle('is-hidden', toolName === 'transcribe'); resetUpload(); resetAliases(); + resetAudio(); els.status.textContent = ''; renderTrace([]); } @@ -163,6 +196,12 @@ async function submitPasscode(event) { async function runTool(event) { event.preventDefault(); + + if (state.activeTool === 'transcribe') { + await runTranscribe(); + return; + } + const tool = tools[state.activeTool]; const text = els.input.value.trim(); if (!text) { @@ -389,7 +428,9 @@ async function postJson(url, payload) { function setBusy(isBusy) { const button = document.querySelector('#runButton'); button.disabled = isBusy; - button.textContent = isBusy ? 'Running...' : 'Run Tool'; + button.textContent = isBusy + ? (state.activeTool === 'transcribe' ? 'Transcribing...' : 'Running...') + : 'Run Tool'; } function currentLanguage() { @@ -447,6 +488,10 @@ function renderMainFinding(data) { return `

${escapeHtml(data.what_we_found || '')}

`; } +function currentTranscribeLang() { + return document.querySelector('input[name="transcribeLang"]:checked')?.value || 'auto'; +} + function renderEvidence(data) { const items = data.evidence_trail || data.sources || data.hits || []; if (!items.length) { @@ -513,6 +558,224 @@ function exportTimelineCSV(events) { URL.revokeObjectURL(url); } +async function runTranscribe() { + if (!lastAudioFile) { + els.status.textContent = 'Choose an audio file before transcribing.'; + return; + } + setBusy(true); + renderTrace([{ label: 'Sending to Whisper', detail: 'Uploading audio to cuttlefish GPU…', status: 'running' }]); + + try { + const formData = new FormData(); + formData.append('audio', lastAudioFile); + formData.append('language', currentTranscribeLang()); + if (els.diarizeCheck?.checked) { + formData.append('diarize', '1'); + const n = parseInt(els.numSpeakersInput?.value || '', 10); + if (n >= 2) formData.append('num_speakers', String(n)); + } + + const resp = await fetch('api/transcribe.php', { + method: 'POST', + credentials: 'same-origin', + body: formData, + }); + const data = await resp.json().catch(() => ({})); + if (!resp.ok || !data.ok) { + throw new Error(data.error?.message || `Transcription failed (HTTP ${resp.status}).`); + } + + lastTranscriptData = data; + renderTranscriptResults(data); + + const dur = data.duration_sec ? ` · Audio: ${Math.round(data.duration_sec)}s` : ''; + els.status.textContent = `Done in ${data.latency_ms || 0} ms${dur}.`; + } catch (error) { + els.status.textContent = error.message; + renderTrace([{ label: 'Transcription error', detail: error.message, status: 'warning' }]); + } finally { + setBusy(false); + } +} + +function renderTranscriptResults(data) { + const speakerRoles = data.speaker_roles || {}; + const segments = data.segments || []; + const hasSpeakers = segments.some((s) => s.speaker); + + const speakerOrder = [...new Set(segments.filter((s) => s.speaker).map((s) => s.speaker))]; + + const rolesHtml = speakerOrder.length + ? `

${speakerOrder.map((id, i) => { + const role = speakerRoles[id] || id; + return `${escapeHtml(role)}${escapeHtml(id)}`; + }).join('')}

` + : ''; + + const segmentsHtml = hasSpeakers + ? `
Segments (${segments.length}) +
${segments.map((seg) => { + const idx = speakerOrder.indexOf(seg.speaker); + const roleLabel = seg.speaker && speakerRoles[seg.speaker] + ? `${speakerRoles[seg.speaker]} (${seg.speaker})` + : (seg.speaker || ''); + return `
+ ${fmtTime(seg.start)}–${fmtTime(seg.end)} + ${seg.speaker ? `${escapeHtml(roleLabel)}` : ''} + ${escapeHtml(seg.text)} +
`; + }).join('')}
` + : ''; + + const dlSrtVtt = segments.length + ? ` + ` + : ''; + + els.results.innerHTML = ` +
+

Transcript

+ ${rolesHtml} +
${escapeHtml(data.transcript)}
+ ${segmentsHtml} +
+ + ${dlSrtVtt} +
+
`; + + const traceMeta = []; + if (data.duration_sec) traceMeta.push({ label: `Duration: ${Math.round(data.duration_sec)}s`, detail: '', status: 'complete' }); + if (data.language) traceMeta.push({ label: `Language: ${data.language}`, detail: '', status: 'complete' }); + if (data.num_speakers > 1) traceMeta.push({ label: `Speakers detected: ${data.num_speakers}`, detail: Object.entries(speakerRoles).map(([id, r]) => `${id}: ${r}`).join(', ') || '', status: 'complete' }); + if (data.model) traceMeta.push({ label: `Model: ${data.model}`, detail: '', status: 'complete' }); + renderTrace(traceMeta.length ? traceMeta : [{ label: 'Transcribed', detail: '', status: 'complete' }]); +} + +function fmtTime(secs) { + const h = Math.floor(secs / 3600); + const m = Math.floor((secs % 3600) / 60); + const s = Math.floor(secs % 60); + const parts = h > 0 ? [pad2(h), pad2(m), pad2(s)] : [pad2(m), pad2(s)]; + return parts.join(':'); +} + +function pad2(n) { return String(n).padStart(2, '0'); } + +function toSrtTime(secs) { + const h = Math.floor(secs / 3600); + const m = Math.floor((secs % 3600) / 60); + const s = Math.floor(secs % 60); + const ms = Math.round((secs % 1) * 1000); + return `${pad2(h)}:${pad2(m)}:${pad2(s)},${String(ms).padStart(3, '0')}`; +} + +function toVttTime(secs) { + return toSrtTime(secs).replace(',', '.'); +} + +function downloadBlob(blob, filename) { + const url = URL.createObjectURL(blob); + const a = Object.assign(document.createElement('a'), { href: url, download: filename }); + a.click(); + URL.revokeObjectURL(url); +} + +function downloadTranscriptTxt() { + if (!lastTranscriptData) return; + downloadBlob(new Blob([lastTranscriptData.transcript], { type: 'text/plain' }), 'transcript.txt'); +} + +function downloadTranscriptSrt() { + if (!lastTranscriptData?.segments?.length) return; + const { segments, speaker_roles: roles = {} } = lastTranscriptData; + const lines = segments.map((seg, i) => { + const spk = seg.speaker ? `[${roles[seg.speaker] || seg.speaker}] ` : ''; + return `${i + 1}\n${toSrtTime(seg.start)} --> ${toSrtTime(seg.end)}\n${spk}${seg.text}\n`; + }); + downloadBlob(new Blob([lines.join('\n')], { type: 'text/srt' }), 'transcript.srt'); +} + +function downloadTranscriptVtt() { + if (!lastTranscriptData?.segments?.length) return; + const { segments, speaker_roles: roles = {} } = lastTranscriptData; + const lines = ['WEBVTT\n']; + segments.forEach((seg) => { + const spk = seg.speaker ? `` : ''; + lines.push(`${toVttTime(seg.start)} --> ${toVttTime(seg.end)}\n${spk}${seg.text}\n`); + }); + downloadBlob(new Blob([lines.join('\n')], { type: 'text/vtt' }), 'transcript.vtt'); +} + +function resetAudio() { + lastAudioFile = null; + if (!els.audioInput) return; + els.audioInput.value = ''; + if (els.audioPrompt) els.audioPrompt.classList.remove('is-hidden'); + if (els.audioFileInfo) els.audioFileInfo.classList.add('is-hidden'); + if (els.audioFileName) els.audioFileName.textContent = ''; + if (els.audioFileSize) els.audioFileSize.textContent = ''; +} + +function setupAudio() { + if (!els.audioZone) return; + + els.audioZone.addEventListener('dragover', (e) => { + e.preventDefault(); + els.audioZone.classList.add('is-drag-over'); + }); + + els.audioZone.addEventListener('dragleave', (e) => { + if (!els.audioZone.contains(e.relatedTarget)) { + els.audioZone.classList.remove('is-drag-over'); + } + }); + + els.audioZone.addEventListener('drop', (e) => { + e.preventDefault(); + els.audioZone.classList.remove('is-drag-over'); + const f = e.dataTransfer?.files?.[0]; + if (f) handleAudio(f); + }); + + els.audioZone.addEventListener('click', (e) => { + if (e.target === els.audioClear || els.audioClear?.contains(e.target)) return; + if (e.target.tagName === 'LABEL') return; + els.audioInput.click(); + }); + + els.audioInput.addEventListener('change', () => { + const f = els.audioInput.files?.[0]; + if (f) handleAudio(f); + }); + + els.audioClear.addEventListener('click', () => { + resetAudio(); + els.status.textContent = ''; + }); +} + +function handleAudio(file) { + const allowedExts = ['mp3', 'wav', 'ogg', 'oga', 'm4a', 'mp4', 'flac', 'webm', 'aac']; + const ext = file.name.split('.').pop().toLowerCase(); + if (!allowedExts.includes(ext)) { + els.status.textContent = `Unsupported format: .${ext}. Use MP3, WAV, OGG, M4A, FLAC, or WebM.`; + return; + } + const sizeMB = file.size / 1024 / 1024; + if (sizeMB > 200) { + els.status.textContent = `File too large (${sizeMB.toFixed(1)} MB). Maximum 200 MB.`; + return; + } + lastAudioFile = file; + if (els.audioFileName) els.audioFileName.textContent = file.name; + if (els.audioFileSize) els.audioFileSize.textContent = `${sizeMB.toFixed(1)} MB`; + if (els.audioPrompt) els.audioPrompt.classList.add('is-hidden'); + if (els.audioFileInfo) els.audioFileInfo.classList.remove('is-hidden'); + els.status.textContent = `Ready: ${file.name} (${sizeMB.toFixed(1)} MB)`; +} + function renderEntityCounts(counts = {}) { const entries = Object.entries(counts).filter(([, count]) => Number(count) > 0); if (!entries.length) { diff --git a/index.php b/index.php index b0ca1a0..a713be0 100644 --- a/index.php +++ b/index.php @@ -61,7 +61,7 @@ $authenticated = dbnToolsIsAuthenticated();
-

Five tools, one corpus

+

Six tools, one suite

Ask @@ -88,6 +88,11 @@ $authenticated = dbnToolsIsAuthenticated();

Redact

Remove sensitive personal data with configurable Nordic / ECHR / Global profiles.

+
+ Transcribe +

Transcribe

+

Convert audio recordings to text with optional speaker separation and Norwegian role labelling.

+
@@ -189,6 +194,10 @@ $authenticated = dbnToolsIsAuthenticated(); Redact Privacy +
@@ -207,6 +216,20 @@ $authenticated = dbnToolsIsAuthenticated(); + + + + + +