Per-tool pages + multi-engine transcribe with expert controls

- Split monolithic index.php into per-tool pages (ask, search, summarize,
  timeline, redact, transcribe), each with its own URL and bookmarkable state
- Shared shell: includes/layout.php + layout_footer.php; shared form:
  includes/tool_form.php used by all text-tool pages
- index.php now redirects authenticated users to ask.php; unauthenticated
  users see the login gate only
- transcribe.php: engine selector (GPU/OpenAI/Azure), model size (small/
  medium/large-v3), diarize, language, expert settings (beam, VAD, task,
  initial prompt)
- api/transcribe.php: engine routing — GPU (cuttlefish), OpenAI BYOK,
  Azure AI Speech; passes model/beam/task/vad/prompt to Whisper server
- tools.js: data-active-tool body attr drives setTool() on load; <a> nav
  tabs skip click listeners; null guards on form/passcodeForm; engine radio
  toggle shows/hides BYOK key inputs and model selector; RTF shown in status
- tools.css: styles for BYOK inputs, expert settings panel, prompt textarea

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 22:14:20 +02:00
parent d178fbf295
commit eaff2a4d86
13 changed files with 789 additions and 257 deletions
+71
View File
@@ -1218,3 +1218,74 @@ p {
gap: 0.5rem;
margin-top: 0.75rem;
}
/* ── Transcribe extended controls ─────────────────────────────────────── */
.byok-input {
font-size: 0.82rem;
padding: 0.3rem 0.6rem;
border: 1px solid var(--line);
border-radius: 6px;
background: var(--bg);
color: var(--ink);
width: 22rem;
max-width: 100%;
}
.byok-input--short { width: 9rem; }
.byok-input:focus { outline: 2px solid var(--teal); outline-offset: 1px; }
.inline-hint {
font-size: 0.75rem;
color: var(--muted);
margin-left: 0.4rem;
}
.expert-settings {
border: 1px solid var(--line);
border-radius: 8px;
padding: 0;
margin-top: 0.75rem;
}
.expert-summary {
font-size: 0.82rem;
font-weight: 600;
color: var(--muted);
cursor: pointer;
padding: 0.55rem 0.9rem;
list-style: none;
user-select: none;
}
.expert-summary::-webkit-details-marker { display: none; }
.expert-summary::before {
content: '▶ ';
font-size: 0.65rem;
transition: transform 0.15s;
}
.expert-settings[open] .expert-summary::before { content: '▼ '; }
.expert-body {
padding: 0.6rem 0.9rem 0.9rem;
border-top: 1px solid var(--line);
display: flex;
flex-direction: column;
gap: 0.4rem;
}
.expert-field {
display: flex;
flex-direction: column;
gap: 0.25rem;
margin-top: 0.4rem;
}
.prompt-textarea {
font-size: 0.82rem;
padding: 0.4rem 0.6rem;
border: 1px solid var(--line);
border-radius: 6px;
background: var(--bg);
color: var(--ink);
resize: vertical;
width: 100%;
box-sizing: border-box;
}
.prompt-textarea:focus { outline: 2px solid var(--teal); outline-offset: 1px; }
.control-hint { font-size: 0.74rem; color: var(--muted); font-weight: 400; }
+99 -17
View File
@@ -116,22 +116,26 @@ document.addEventListener('DOMContentLoaded', () => {
transcribeLangControl: document.querySelector('#transcribeLangControl'),
});
els.tabs.forEach((button) => {
button.addEventListener('click', () => setTool(button.dataset.tool));
els.tabs.forEach((tab) => {
if (tab.tagName !== 'A') {
tab.addEventListener('click', () => setTool(tab.dataset.tool));
}
});
els.form.addEventListener('submit', runTool);
els.passcodeForm.addEventListener('submit', submitPasscode);
els.form?.addEventListener('submit', runTool);
els.passcodeForm?.addEventListener('submit', submitPasscode);
els.healthButton.addEventListener('click', checkHealth);
setupUpload();
setupAliases();
setupAudio();
setupTranscribeControls();
els.results.addEventListener('click', (e) => {
if (e.target.closest('#exportCsvBtn')) exportTimelineCSV(lastTimelineEvents);
if (e.target.closest('#dlTxt')) downloadTranscriptTxt();
if (e.target.closest('#dlSrt')) downloadTranscriptSrt();
if (e.target.closest('#dlVtt')) downloadTranscriptVtt();
});
setTool(state.activeTool);
const activeTool = document.body.dataset.activeTool || state.activeTool;
setTool(activeTool);
if (state.authenticated) {
checkHealth();
@@ -559,16 +563,56 @@ function exportTimelineCSV(events) {
URL.revokeObjectURL(url);
}
function currentTranscribeEngine() {
const el = document.querySelector('input[name="engine"]:checked');
return el ? el.value : 'gpu';
}
function currentTranscribeModel() {
const el = document.querySelector('input[name="model"]:checked');
return el ? el.value : 'small';
}
function currentBeamSize() {
const el = document.querySelector('input[name="beam_size"]:checked');
return el ? el.value : '5';
}
function currentTask() {
const el = document.querySelector('input[name="task"]:checked');
return el ? el.value : 'transcribe';
}
async function runTranscribe() {
if (!lastAudioFile) {
els.status.textContent = 'Choose an audio file before transcribing.';
return;
}
const engine = currentTranscribeEngine();
// BYOK key validation before starting the upload
if (engine === 'openai') {
const key = document.getElementById('openaiKeyInput')?.value?.trim();
if (!key || !key.startsWith('sk-')) {
els.status.textContent = 'Enter a valid OpenAI API key (sk-…) before running.';
return;
}
if (lastAudioFile.size > 25 * 1024 * 1024) {
els.status.textContent = 'OpenAI Whisper has a 25 MB file limit. Switch to GPU engine for this file.';
return;
}
}
if (engine === 'azure') {
const key = document.getElementById('azureKeyInput')?.value?.trim();
if (!key) {
els.status.textContent = 'Enter an Azure Speech API key before running.';
return;
}
}
setBusy(true);
const startTime = Date.now();
let elapsed = 0;
updateTranscribeTrace(0);
updateTranscribeTrace(0, engine);
els.status.textContent = 'Transcribing…';
const timer = setInterval(() => {
@@ -576,19 +620,38 @@ async function runTranscribe() {
const m = Math.floor(elapsed / 60);
const s = elapsed % 60;
els.status.textContent = m > 0 ? `Transcribing… ${m}:${pad2(s)}` : `Transcribing… ${s}s`;
updateTranscribeTrace(elapsed);
updateTranscribeTrace(elapsed, engine);
}, 1000);
try {
const formData = new FormData();
formData.append('audio', lastAudioFile);
formData.append('engine', engine);
formData.append('language', currentTranscribeLang());
formData.append('model', currentTranscribeModel());
formData.append('beam_size', currentBeamSize());
formData.append('task', currentTask());
const vadCheck = document.getElementById('vadFilterCheck');
if (vadCheck?.checked) formData.append('vad_filter', '1');
const initPrompt = document.getElementById('initPromptInput')?.value?.trim();
if (initPrompt) formData.append('initial_prompt', initPrompt);
if (els.diarizeCheck?.checked) {
formData.append('diarize', '1');
const n = parseInt(els.numSpeakersInput?.value || '', 10);
if (n >= 2) formData.append('num_speakers', String(n));
}
if (engine === 'openai') {
formData.append('openai_key', document.getElementById('openaiKeyInput')?.value?.trim());
}
if (engine === 'azure') {
formData.append('azure_key', document.getElementById('azureKeyInput')?.value?.trim());
formData.append('azure_region', document.getElementById('azureRegionInput')?.value?.trim() || 'norwayeast');
}
const resp = await fetch('api/transcribe.php', {
method: 'POST',
credentials: 'same-origin',
@@ -602,8 +665,11 @@ async function runTranscribe() {
lastTranscriptData = data;
renderTranscriptResults(data);
const dur = data.duration_sec ? ` · Audio: ${Math.round(data.duration_sec)}s` : '';
els.status.textContent = `Done in ${data.latency_ms || 0} ms${dur}.`;
const dur = data.duration_sec ? ` · Audio: ${Math.round(data.duration_sec)}s` : '';
const proc = data.processing_sec ? ` · GPU: ${data.processing_sec.toFixed(1)}s` : '';
const rtf = (data.duration_sec && data.processing_sec)
? ` · RTF: ${(data.processing_sec / data.duration_sec).toFixed(2)}` : '';
els.status.textContent = `Done in ${data.latency_ms || 0} ms${dur}${proc}${rtf}.`;
} catch (error) {
els.status.textContent = error.message;
renderTrace([{ label: 'Transcription error', detail: error.message, status: 'warning' }]);
@@ -613,19 +679,24 @@ async function runTranscribe() {
}
}
function updateTranscribeTrace(elapsed) {
function updateTranscribeTrace(elapsed, engine) {
const engineLabel = engine === 'openai' ? 'OpenAI API' : engine === 'azure' ? 'Azure Speech' : 'Whisper GPU';
let label, detail;
if (elapsed < 10) {
label = 'Uploading to Whisper';
detail = 'Sending audio to cuttlefish GPU…';
label = `Uploading to ${engineLabel}`;
detail = engine === 'gpu'
? 'Sending audio to cuttlefish GPU…'
: `Sending audio to ${engineLabel}`;
} else if (elapsed < 60) {
label = 'Processing on GPU';
detail = 'Whisper is transcribing. Large files take 13 minutes.';
label = `Processing ${engineLabel}`;
detail = engine === 'gpu'
? 'Whisper is transcribing. Large files take 13 minutes.'
: `${engineLabel} is processing the audio.`;
} else if (elapsed < 120) {
label = 'Still processing…';
detail = `${Math.floor(elapsed / 60)} min elapsed — Whisper is working through the audio.`;
label = 'Still processing…';
detail = `${Math.floor(elapsed / 60)} min elapsed — ${engineLabel} is working through the audio.`;
} else {
label = 'Still processing…';
label = 'Still processing…';
detail = `${Math.floor(elapsed / 60)} min ${pad2(elapsed % 60)}s — long recordings can take several minutes.`;
}
renderTrace([{ label, detail, status: 'running' }]);
@@ -789,6 +860,17 @@ function setupAudio() {
});
}
function setupTranscribeControls() {
document.querySelectorAll('input[name="engine"]').forEach((radio) => {
radio.addEventListener('change', () => {
const engine = currentTranscribeEngine();
document.getElementById('openaiKeyControl')?.classList.toggle('is-hidden', engine !== 'openai');
document.getElementById('azureKeyControl')?.classList.toggle('is-hidden', engine !== 'azure');
document.getElementById('modelControl')?.classList.toggle('is-hidden', engine === 'openai' || engine === 'azure');
});
});
}
function handleAudio(file) {
const allowedExts = ['mp3', 'wav', 'ogg', 'oga', 'm4a', 'mp4', 'flac', 'webm', 'aac'];
const ext = file.name.split('.').pop().toLowerCase();