From 26f4e2231b508e1ff19c6a5cf2a731150b28f383 Mon Sep 17 00:00:00 2001 From: davegilligan Date: Thu, 14 May 2026 22:20:11 +0200 Subject: [PATCH] feat(transcribe): Norwegian defaults, vocabulary presets, multi-file court day queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Default language → nb (Bokmål); auto-detect demoted with warning note - Default model → large-v3; VAD filter on by default - Vocabulary prompt promoted to main form with 4 preset buttons (Barnerett/CPS, Rettssak/tingrett, Generell norsk, Egendefinert) - Multi-file upload queue: drop/select multiple clips, numbered list UI - Sequential queue processing with cumulative time_offset per clip - Backend shifts segment timestamps so SRT/VTT covers full court day - Merged transcript + segments across all clips for single download Co-Authored-By: Claude Sonnet 4.6 --- api/transcribe.php | 11 ++ assets/css/tools.css | 96 ++++++++++++- assets/js/tools.js | 317 +++++++++++++++++++++++++++++-------------- transcribe.php | 70 ++++++---- 4 files changed, 356 insertions(+), 138 deletions(-) diff --git a/api/transcribe.php b/api/transcribe.php index ea1457e..e9787f5 100644 --- a/api/transcribe.php +++ b/api/transcribe.php @@ -53,6 +53,7 @@ if ($engine === 'openai' && $file['size'] > 25 * 1024 * 1024) { dbnToolsError('OpenAI Whisper API has a 25 MB file limit. Use the GPU engine for larger files.', 413, 'openai_file_too_large'); } +$timeOffset = max(0.0, (float)($_POST['time_offset'] ?? 0)); $t0 = microtime(true); // ── Route to engine ─────────────────────────────────────────────────────────── @@ -79,6 +80,16 @@ if ($engine === 'openai') { $latencyMs = (int)round((microtime(true) - $t0) * 1000); +// ── Shift segment timestamps for multi-clip sessions ───────────────────────── + +if ($timeOffset > 0.0 && !empty($result['segments'])) { + foreach ($result['segments'] as &$seg) { + $seg['start'] = round(($seg['start'] ?? 0) + $timeOffset, 3); + $seg['end'] = round(($seg['end'] ?? 0) + $timeOffset, 3); + } + unset($seg); +} + // ── Speaker role labelling (GPU + diarize only) ─────────────────────────────── $segments = $result['segments'] ?? []; diff --git a/assets/css/tools.css b/assets/css/tools.css index aef4768..644ee6b 100644 --- a/assets/css/tools.css +++ b/assets/css/tools.css @@ -940,10 +940,12 @@ p { .upload-file { display: flex; - align-items: center; - justify-content: center; - gap: 10px; + flex-direction: column; + align-items: flex-start; + gap: 6px; min-height: 48px; + width: 100%; + padding: 0.4rem 0; } .upload-filename { @@ -1288,4 +1290,92 @@ p { } .prompt-textarea:focus { outline: 2px solid var(--teal); outline-offset: 1px; } +/* ─── Vocabulary presets ──────────────────────────────────────────────────── */ + +.vocab-presets { + display: flex; + flex-wrap: wrap; + align-items: center; + gap: 0.35rem; + margin-bottom: 0.35rem; +} + +.vocab-btn { + font-size: 0.78rem; + padding: 0.2rem 0.6rem; + border: 1px solid var(--line); + border-radius: 20px; + background: var(--bg); + color: var(--ink); + cursor: pointer; + transition: background 0.15s, border-color 0.15s, color 0.15s; +} + +.vocab-btn:hover { + background: var(--soft-teal, #e8f7f6); + border-color: var(--teal); + color: var(--teal); +} + +.vocab-btn.is-active { + background: var(--teal); + border-color: var(--teal); + color: #fff; + font-weight: 600; +} + +/* ─── Audio queue list ────────────────────────────────────────────────────── */ + +.audio-queue-list { + list-style: none; + padding: 0; + margin: 0; + width: 100%; + text-align: left; +} + +.queue-item { + display: flex; + align-items: baseline; + gap: 0.5rem; + padding: 3px 0; + font-size: 0.84rem; +} + +.queue-num { + flex-shrink: 0; + min-width: 1.4rem; + font-variant-numeric: tabular-nums; + color: var(--muted); + font-size: 0.78rem; +} + +.queue-item--processing .queue-num { color: var(--teal); } +.queue-item--done .queue-num { color: #22a06b; } +.queue-item--error .queue-num { color: var(--coral, #e05); } + +.queue-name { + flex: 1; + font-weight: 500; + word-break: break-all; + color: var(--ink); +} + +.queue-item--done .queue-name { color: var(--muted); } +.queue-item--error .queue-name { color: var(--coral, #e05); } + +.queue-size { + flex-shrink: 0; + font-size: 0.76rem; + color: var(--muted); +} + +.audio-queue-actions { + display: flex; + align-items: center; + gap: 1rem; + margin-top: 0.5rem; + font-size: 0.82rem; +} + .control-hint { font-size: 0.74rem; color: var(--muted); font-weight: 400; } diff --git a/assets/js/tools.js b/assets/js/tools.js index 43d69b8..99a1b3a 100644 --- a/assets/js/tools.js +++ b/assets/js/tools.js @@ -4,9 +4,16 @@ const state = { }; let lastTimelineEvents = []; -let lastAudioFile = null; +let audioQueue = []; // [{file, status: 'pending'|'processing'|'done'|'error', result}] let lastTranscriptData = null; +const VOCAB_PRESETS = { + barnerett: 'Barnevernet, Fylkesnemnda, barnevernloven, barneloven, barnets beste, samvær, foreldreansvar, omsorgsovertakelse, sakkyndig, advokat, prosessfullmektig, dommer, vitne, tolk, bistandsadvokat, fosterforeldre, fosterhjem, akuttvedtak, statsforvalter, Bufetat, saksbehandler, rettslig medhold, begjæring, samtykke, tilsynsfører', + rettssak: 'Tingretten, lagmannsretten, Høyesterett, statsadvokat, aktor, forsvarer, tiltalte, fornærmede, stevning, tilsvar, prosesskriv, rettsbok, bevisføring, anke, dom, kjennelse, rettsmekling, forlik, saksøker, saksøkte, vitne, ed, prosessfullmektig', + generell: 'bokmål, nynorsk, statsforvalter, kommunen, forvaltning, klage, vedtak, rettigheter, plikter, protokoll, referat, rapport, dokumentasjon, velferd', + custom: '', +}; + const tools = { ask: { kind: 'Source-grounded Legal Ask', @@ -107,13 +114,14 @@ document.addEventListener('DOMContentLoaded', () => { audioInput: document.querySelector('#audioInput'), audioPrompt: document.querySelector('#audioPrompt'), audioFileInfo: document.querySelector('#audioFileInfo'), - audioFileName: document.querySelector('#audioFileName'), - audioFileSize: document.querySelector('#audioFileSize'), + audioQueueList: document.querySelector('#audioQueueList'), audioClear: document.querySelector('#audioClear'), diarizeControl: document.querySelector('#diarizeControl'), diarizeCheck: document.querySelector('#diarizeCheck'), numSpeakersInput: document.querySelector('#numSpeakersInput'), transcribeLangControl: document.querySelector('#transcribeLangControl'), + initPromptInput: document.querySelector('#initPromptInput'), + vocabPresets: document.querySelector('#vocabPresets'), }); els.tabs.forEach((tab) => { @@ -128,6 +136,7 @@ document.addEventListener('DOMContentLoaded', () => { setupAliases(); setupAudio(); setupTranscribeControls(); + setupVocabPresets(); els.results.addEventListener('click', (e) => { if (e.target.closest('#exportCsvBtn')) exportTimelineCSV(lastTimelineEvents); if (e.target.closest('#dlTxt')) downloadTranscriptTxt(); @@ -434,8 +443,8 @@ function setBusy(isBusy) { const button = document.querySelector('#runButton'); button.disabled = isBusy; button.textContent = isBusy - ? (state.activeTool === 'transcribe' ? 'Transcribing...' : 'Running...') - : 'Run Tool'; + ? (state.activeTool === 'transcribe' ? 'Transkriberer...' : 'Kjører...') + : 'Kjør'; } function currentLanguage() { @@ -581,123 +590,171 @@ function currentTask() { } async function runTranscribe() { - if (!lastAudioFile) { - els.status.textContent = 'Choose an audio file before transcribing.'; + if (!audioQueue.length) { + els.status.textContent = 'Velg minst én lydfil før transkripsjon.'; return; } const engine = currentTranscribeEngine(); - // BYOK key validation before starting the upload if (engine === 'openai') { const key = document.getElementById('openaiKeyInput')?.value?.trim(); if (!key || !key.startsWith('sk-')) { - els.status.textContent = 'Enter a valid OpenAI API key (sk-…) before running.'; + els.status.textContent = 'Legg inn en gyldig OpenAI API-nøkkel (sk-…) før du kjører.'; return; } - if (lastAudioFile.size > 25 * 1024 * 1024) { - els.status.textContent = 'OpenAI Whisper has a 25 MB file limit. Switch to GPU engine for this file.'; + const oversized = audioQueue.find((item) => item.file.size > 25 * 1024 * 1024); + if (oversized) { + els.status.textContent = `OpenAI Whisper har 25 MB-grense. Bruk GPU-motor for ${oversized.file.name}.`; return; } } if (engine === 'azure') { const key = document.getElementById('azureKeyInput')?.value?.trim(); if (!key) { - els.status.textContent = 'Enter an Azure Speech API key before running.'; + els.status.textContent = 'Legg inn Azure Speech API-nøkkel før du kjører.'; return; } } setBusy(true); - const startTime = Date.now(); - let elapsed = 0; - updateTranscribeTrace(0, engine); - els.status.textContent = 'Transcribing…'; + const initPrompt = els.initPromptInput?.value?.trim() || ''; + const diarize = els.diarizeCheck?.checked ?? false; + const numSpeakers = parseInt(els.numSpeakersInput?.value || '', 10); + const vadFilter = document.getElementById('vadFilterCheck')?.checked ?? false; + const total = audioQueue.length; - const timer = setInterval(() => { - elapsed = Math.floor((Date.now() - startTime) / 1000); - const m = Math.floor(elapsed / 60); - const s = elapsed % 60; - els.status.textContent = m > 0 ? `Transcribing… ${m}:${pad2(s)}` : `Transcribing… ${s}s`; - updateTranscribeTrace(elapsed, engine); - }, 1000); + // Reset all items to pending before starting + audioQueue.forEach((item) => { item.status = 'pending'; item.result = null; }); + renderAudioQueue(); - try { - const formData = new FormData(); - formData.append('audio', lastAudioFile); - formData.append('engine', engine); - formData.append('language', currentTranscribeLang()); - formData.append('model', currentTranscribeModel()); - formData.append('beam_size', currentBeamSize()); - formData.append('task', currentTask()); + let cumulativeOffset = 0; + let allTranscripts = []; + let allSegments = []; + let firstSpeakerRoles = null; + let lastResult = null; - const vadCheck = document.getElementById('vadFilterCheck'); - if (vadCheck?.checked) formData.append('vad_filter', '1'); + for (let i = 0; i < audioQueue.length; i++) { + const item = audioQueue[i]; + item.status = 'processing'; + renderAudioQueue(); - const initPrompt = document.getElementById('initPromptInput')?.value?.trim(); - if (initPrompt) formData.append('initial_prompt', initPrompt); + const startTime = Date.now(); + let elapsed = 0; + const clipLabel = total > 1 ? `Klipp ${i + 1}/${total}` : 'Transkriberer'; + els.status.textContent = `${clipLabel}…`; - if (els.diarizeCheck?.checked) { - formData.append('diarize', '1'); - const n = parseInt(els.numSpeakersInput?.value || '', 10); - if (n >= 2) formData.append('num_speakers', String(n)); + const timer = setInterval(() => { + elapsed = Math.floor((Date.now() - startTime) / 1000); + const m = Math.floor(elapsed / 60); + const s = elapsed % 60; + const t = m > 0 ? `${m}:${pad2(s)}` : `${s}s`; + els.status.textContent = `${clipLabel}… ${t}`; + updateTranscribeTrace(elapsed, engine, clipLabel); + }, 1000); + + try { + const formData = new FormData(); + formData.append('audio', item.file); + formData.append('engine', engine); + formData.append('language', currentTranscribeLang()); + formData.append('model', currentTranscribeModel()); + formData.append('beam_size', currentBeamSize()); + formData.append('task', currentTask()); + formData.append('time_offset', String(cumulativeOffset)); + if (vadFilter) formData.append('vad_filter', '1'); + if (initPrompt) formData.append('initial_prompt', initPrompt); + if (diarize) { + formData.append('diarize', '1'); + if (numSpeakers >= 2) formData.append('num_speakers', String(numSpeakers)); + } + if (engine === 'openai') { + formData.append('openai_key', document.getElementById('openaiKeyInput')?.value?.trim()); + } + if (engine === 'azure') { + formData.append('azure_key', document.getElementById('azureKeyInput')?.value?.trim()); + formData.append('azure_region', document.getElementById('azureRegionInput')?.value?.trim() || 'norwayeast'); + } + + const resp = await fetch('api/transcribe.php', { + method: 'POST', + credentials: 'same-origin', + body: formData, + }); + const data = await resp.json().catch(() => ({})); + if (!resp.ok || !data.ok) { + throw new Error(data.error?.message || `Transkripsjon feilet (HTTP ${resp.status}).`); + } + + clearInterval(timer); + item.status = 'done'; + item.result = data; + lastResult = data; + + allTranscripts.push(data.transcript || ''); + allSegments.push(...(data.segments || [])); + if (!firstSpeakerRoles && data.speaker_roles && Object.keys(data.speaker_roles).length) { + firstSpeakerRoles = data.speaker_roles; + } + + // Advance offset by this clip's duration (fall back to file-size estimate at 128 kbps) + cumulativeOffset += data.duration_sec > 0 + ? data.duration_sec + : item.file.size / (128 * 1024 / 8); + + } catch (err) { + clearInterval(timer); + item.status = 'error'; + renderAudioQueue(); + els.status.textContent = `${clipLabel}: ${err.message}`; + renderTrace([{ label: `Feil – ${clipLabel}`, detail: err.message, status: 'warning' }]); + setBusy(false); + return; } - if (engine === 'openai') { - formData.append('openai_key', document.getElementById('openaiKeyInput')?.value?.trim()); - } - if (engine === 'azure') { - formData.append('azure_key', document.getElementById('azureKeyInput')?.value?.trim()); - formData.append('azure_region', document.getElementById('azureRegionInput')?.value?.trim() || 'norwayeast'); - } - - const resp = await fetch('api/transcribe.php', { - method: 'POST', - credentials: 'same-origin', - body: formData, - }); - const data = await resp.json().catch(() => ({})); - if (!resp.ok || !data.ok) { - throw new Error(data.error?.message || `Transcription failed (HTTP ${resp.status}).`); - } - - lastTranscriptData = data; - renderTranscriptResults(data); - - const dur = data.duration_sec ? ` · Audio: ${Math.round(data.duration_sec)}s` : ''; - const proc = data.processing_sec ? ` · GPU: ${data.processing_sec.toFixed(1)}s` : ''; - const rtf = (data.duration_sec && data.processing_sec) - ? ` · RTF: ${(data.processing_sec / data.duration_sec).toFixed(2)}` : ''; - els.status.textContent = `Done in ${data.latency_ms || 0} ms${dur}${proc}${rtf}.`; - } catch (error) { - els.status.textContent = error.message; - renderTrace([{ label: 'Transcription error', detail: error.message, status: 'warning' }]); - } finally { - clearInterval(timer); - setBusy(false); + renderAudioQueue(); } + + // Merge results + const merged = { + ...lastResult, + transcript: allTranscripts.join('\n\n'), + segments: allSegments, + speaker_roles: firstSpeakerRoles, + num_speakers: lastResult?.num_speakers ?? 0, + duration_sec: cumulativeOffset, + }; + + lastTranscriptData = merged; + renderTranscriptResults(merged); + + const totalSec = Math.round(cumulativeOffset); + const totalMin = Math.floor(totalSec / 60); + const remSec = totalSec % 60; + const durLabel = totalMin > 0 ? `${totalMin}m ${remSec}s` : `${totalSec}s`; + const clipCount = total > 1 ? ` · ${total} klipp` : ''; + els.status.textContent = `Ferdig${clipCount} · Total lyd: ${durLabel}`; + setBusy(false); } -function updateTranscribeTrace(elapsed, engine) { +function updateTranscribeTrace(elapsed, engine, clipLabel = 'Transkriberer') { const engineLabel = engine === 'openai' ? 'OpenAI API' : engine === 'azure' ? 'Azure Speech' : 'Whisper GPU'; let label, detail; if (elapsed < 10) { - label = `Uploading to ${engineLabel}`; - detail = engine === 'gpu' - ? 'Sending audio to cuttlefish GPU…' - : `Sending audio to ${engineLabel}…`; + label = `${clipLabel} — laster opp til ${engineLabel}`; + detail = engine === 'gpu' ? 'Sender lyd til cuttlefish GPU…' : `Sender lyd til ${engineLabel}…`; } else if (elapsed < 60) { - label = `Processing — ${engineLabel}`; + label = `${clipLabel} — ${engineLabel} transkriberer`; detail = engine === 'gpu' - ? 'Whisper is transcribing. Large files take 1–3 minutes.' - : `${engineLabel} is processing the audio.`; + ? 'Whisper transkriberer. Store filer tar 1–3 minutter.' + : `${engineLabel} behandler lyden.`; } else if (elapsed < 120) { - label = 'Still processing…'; - detail = `${Math.floor(elapsed / 60)} min elapsed — ${engineLabel} is working through the audio.`; + label = `${clipLabel} — behandler fortsatt…`; + detail = `${Math.floor(elapsed / 60)} min gått — ${engineLabel} jobber gjennom lyden.`; } else { - label = 'Still processing…'; - detail = `${Math.floor(elapsed / 60)} min ${pad2(elapsed % 60)}s — long recordings can take several minutes.`; + label = `${clipLabel} — behandler fortsatt…`; + detail = `${Math.floor(elapsed / 60)} min ${pad2(elapsed % 60)}s — lange opptak tar flere minutter.`; } renderTrace([{ label, detail, status: 'running' }]); } @@ -812,13 +869,12 @@ function downloadTranscriptVtt() { } function resetAudio() { - lastAudioFile = null; + audioQueue = []; if (!els.audioInput) return; els.audioInput.value = ''; if (els.audioPrompt) els.audioPrompt.classList.remove('is-hidden'); if (els.audioFileInfo) els.audioFileInfo.classList.add('is-hidden'); - if (els.audioFileName) els.audioFileName.textContent = ''; - if (els.audioFileSize) els.audioFileSize.textContent = ''; + if (els.audioQueueList) els.audioQueueList.innerHTML = ''; } function setupAudio() { @@ -838,20 +894,20 @@ function setupAudio() { els.audioZone.addEventListener('drop', (e) => { e.preventDefault(); els.audioZone.classList.remove('is-drag-over'); - const f = e.dataTransfer?.files?.[0]; - if (f) handleAudio(f); + if (e.dataTransfer?.files?.length) handleAudioFiles(e.dataTransfer.files); }); els.audioZone.addEventListener('click', (e) => { if (e.target === els.audioClear || els.audioClear?.contains(e.target)) return; if (e.target === els.audioInput) return; if (e.target.tagName === 'LABEL') return; + if (e.target.closest('#audioFileInfo') && e.target.tagName !== 'LABEL') return; els.audioInput.click(); }); els.audioInput.addEventListener('change', () => { - const f = els.audioInput.files?.[0]; - if (f) handleAudio(f); + if (els.audioInput.files?.length) handleAudioFiles(els.audioInput.files); + els.audioInput.value = ''; }); els.audioClear.addEventListener('click', () => { @@ -871,24 +927,75 @@ function setupTranscribeControls() { }); } -function handleAudio(file) { +function setupVocabPresets() { + if (!els.vocabPresets) return; + els.vocabPresets.addEventListener('click', (e) => { + const btn = e.target.closest('.vocab-btn'); + if (!btn) return; + const preset = btn.dataset.preset; + if (preset && els.initPromptInput) { + els.initPromptInput.value = VOCAB_PRESETS[preset] ?? ''; + els.vocabPresets.querySelectorAll('.vocab-btn').forEach((b) => b.classList.remove('is-active')); + btn.classList.add('is-active'); + if (preset !== 'custom') els.initPromptInput.focus(); + } + }); +} + +function handleAudioFiles(fileList) { const allowedExts = ['mp3', 'wav', 'ogg', 'oga', 'm4a', 'mp4', 'flac', 'webm', 'aac']; - const ext = file.name.split('.').pop().toLowerCase(); - if (!allowedExts.includes(ext)) { - els.status.textContent = `Unsupported format: .${ext}. Use MP3, WAV, OGG, M4A, FLAC, or WebM.`; + let added = 0; + let skipped = []; + + Array.from(fileList).forEach((file) => { + const ext = file.name.split('.').pop().toLowerCase(); + if (!allowedExts.includes(ext)) { + skipped.push(file.name); + return; + } + const sizeMB = file.size / 1024 / 1024; + if (sizeMB > 200) { + skipped.push(`${file.name} (${sizeMB.toFixed(1)} MB — maks 200 MB)`); + return; + } + audioQueue.push({ file, status: 'pending', result: null }); + added++; + }); + + if (skipped.length) { + els.status.textContent = `Hoppet over: ${skipped.join(', ')}`; + } else if (added > 0) { + els.status.textContent = `${audioQueue.length} fil${audioQueue.length !== 1 ? 'er' : ''} i køen.`; + } + + renderAudioQueue(); +} + +function renderAudioQueue() { + if (!els.audioQueueList) return; + + if (!audioQueue.length) { + els.audioPrompt.classList.remove('is-hidden'); + els.audioFileInfo.classList.add('is-hidden'); return; } - const sizeMB = file.size / 1024 / 1024; - if (sizeMB > 200) { - els.status.textContent = `File too large (${sizeMB.toFixed(1)} MB). Maximum 200 MB.`; - return; - } - lastAudioFile = file; - if (els.audioFileName) els.audioFileName.textContent = file.name; - if (els.audioFileSize) els.audioFileSize.textContent = `${sizeMB.toFixed(1)} MB`; - if (els.audioPrompt) els.audioPrompt.classList.add('is-hidden'); - if (els.audioFileInfo) els.audioFileInfo.classList.remove('is-hidden'); - els.status.textContent = `Ready: ${file.name} (${sizeMB.toFixed(1)} MB)`; + + els.audioPrompt.classList.add('is-hidden'); + els.audioFileInfo.classList.remove('is-hidden'); + + els.audioQueueList.innerHTML = audioQueue.map((item, i) => { + const sizeMB = (item.file.size / 1024 / 1024).toFixed(1); + const statusIcon = item.status === 'processing' ? '⏳' + : item.status === 'done' ? '✓' + : item.status === 'error' ? '✗' + : `${i + 1}.`; + const statusClass = `queue-item queue-item--${item.status}`; + return `
  • + ${statusIcon} + ${escapeHtml(item.file.name)} + ${sizeMB} MB +
  • `; + }).join(''); } function renderEntityCounts(counts = {}) { diff --git a/transcribe.php b/transcribe.php index b40ca9b..b5a0a81 100644 --- a/transcribe.php +++ b/transcribe.php @@ -30,65 +30,75 @@ require_once __DIR__ . '/includes/layout.php';
    Model - - - + + +
    - Language - - + Språk + +
    - Speakers - - Count - + Talere + + Antall + +
    + +
    +
    + Ordliste + + + + +
    + +

    Hjelper Whisper gjenkjenne fagtermer. Ikke inkludert i utskriften.

    - +
    -

    Drop audio file here, or

    -

    MP3, WAV, OGG, M4A, FLAC, WEBM — max 200 MB

    +

    Slipp lydfil(er) her, eller

    +

    MP3, WAV, OGG, M4A, FLAC, WEBM — maks 200 MB per fil

    - Expert settings + Ekspertinnstillinger
    - Task - - + Oppgave + +
    Beam size - +
    - VAD filter - -
    -
    - - -

    Helps Whisper recognise specialist terms. Not included in output.

    + VAD-filter +
    @@ -110,14 +120,14 @@ require_once __DIR__ . '/includes/layout.php';
    -

    Ready

    -

    Choose a tool, run a request, and the answer will show the evidence trail beside it.

    +

    Klar

    +

    Velg et verktøy, kjør en forespørsel, og svaret vises her.