feat: auto-select STT engine (Azure → Google Cloud → Whisper) and show provider in results
Removes user-facing engine/model/key/beam controls. The server now picks the best available engine automatically: 1. Microsoft Azure Speech — short clips (≤1MB, no diarization, audio/*) 2. Google Cloud Speech v2 — long audio, diarization, all languages 3. OpenAI Whisper GPU — local fallback Results display which provider was used (e.g. "Transcribed with Google Cloud Speech") via transcript-engine-badge and traceMeta. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+1
-48
@@ -3,11 +3,9 @@ declare(strict_types=1);
|
||||
$toolName = 'transcribe';
|
||||
$toolTitle = 'Transcribe audio';
|
||||
$toolKind = 'Audio Transcription';
|
||||
$toolBadge = 'Whisper / GPU';
|
||||
$toolBadge = 'Azure · Google · Whisper';
|
||||
require_once __DIR__ . '/includes/layout.php';
|
||||
$azureConfigured = !empty(dbnToolsEnv('DBN_AZURE_SPEECH_KEY'));
|
||||
?>
|
||||
<script>window.DBN_AZURE_SPEECH_CONFIGURED = <?= $azureConfigured ? 'true' : 'false' ?>;</script>
|
||||
<form id="toolForm" class="tool-form">
|
||||
|
||||
<div class="lang-switcher" id="uiLangSwitcher" role="group" aria-label="UI language">
|
||||
@@ -17,33 +15,6 @@ $azureConfigured = !empty(dbnToolsEnv('DBN_AZURE_SPEECH_KEY'));
|
||||
<button type="button" class="lang-btn" data-lang="pl">🇵🇱 PL</button>
|
||||
</div>
|
||||
|
||||
<div class="control-row" id="engineControl">
|
||||
<span class="control-label" data-i18n="engine">Engine</span>
|
||||
<label><input type="radio" name="engine" value="gpu" checked id="engineGpu"> <span data-i18n="engineGpuLabel">GPU (cuttlefish RTX 3060)</span></label>
|
||||
<label><input type="radio" name="engine" value="openai" id="engineOpenai"> <span data-i18n="engineOpenaiLabel">OpenAI Whisper API</span></label>
|
||||
<label><input type="radio" name="engine" value="azure" id="engineAzure"> <span data-i18n="engineAzureLabel">Azure AI Speech (nb-NO)</span></label>
|
||||
</div>
|
||||
|
||||
<div class="control-row is-hidden" id="openaiKeyControl">
|
||||
<span class="control-label" data-i18n="apiKey">API Key</span>
|
||||
<input type="password" id="openaiKeyInput" name="openai_key" placeholder="sk-…" class="byok-input" autocomplete="off">
|
||||
<small class="control-hint inline-hint" data-i18n="apiKeyHint">Used for this request only, never stored. Max 25 MB.</small>
|
||||
</div>
|
||||
|
||||
<div class="control-row is-hidden" id="azureKeyControl">
|
||||
<span class="control-label" data-i18n="apiKey">API Key</span>
|
||||
<input type="password" id="azureKeyInput" name="azure_key" placeholder="Azure Speech key" class="byok-input" autocomplete="off">
|
||||
<span class="control-label" style="margin-left:1.25rem" data-i18n="region">Region</span>
|
||||
<input type="text" id="azureRegionInput" name="azure_region" placeholder="norwayeast" class="byok-input byok-input--short" value="norwayeast">
|
||||
</div>
|
||||
|
||||
<div class="control-row" id="modelControl">
|
||||
<span class="control-label" data-i18n="model">Model</span>
|
||||
<label><input type="radio" name="model" value="small"> <span data-i18n="modelFastest">Fastest</span> <small class="control-hint">(small)</small></label>
|
||||
<label><input type="radio" name="model" value="medium"> <span data-i18n="modelBalanced">Balanced</span> <small class="control-hint">(medium)</small></label>
|
||||
<label><input type="radio" name="model" value="large-v3" checked> <span data-i18n="modelBest">Best quality</span> ★ <small class="control-hint">(large-v3)</small></label>
|
||||
</div>
|
||||
|
||||
<div class="control-row" id="transcribeLangControl">
|
||||
<span class="control-label" data-i18n="transcribeLang">Audio language</span>
|
||||
<label><input type="radio" name="transcribeLang" value="no" checked> Norsk (nb)</label>
|
||||
@@ -93,24 +64,6 @@ $azureConfigured = !empty(dbnToolsEnv('DBN_AZURE_SPEECH_KEY'));
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<details class="expert-settings" id="expertSettings">
|
||||
<summary class="expert-summary" data-i18n="expertSettings">Advanced settings</summary>
|
||||
<div class="expert-body">
|
||||
<div class="control-row">
|
||||
<span class="control-label" data-i18n="beamSize">Beam size</span>
|
||||
<label><input type="radio" name="beam_size" value="1"> 1 <small class="control-hint" data-i18n="beamFastest">(fastest)</small></label>
|
||||
<label><input type="radio" name="beam_size" value="3"> 3</label>
|
||||
<label><input type="radio" name="beam_size" value="5" checked> 5 <small class="control-hint" data-i18n="beamBest">(best)</small></label>
|
||||
</div>
|
||||
<p class="upload-hint" data-i18n="beamSizeHint">Controls search breadth — higher values improve accuracy but take longer. 5 is recommended for legal recordings.</p>
|
||||
<div class="control-row">
|
||||
<span class="control-label" data-i18n="vadFilter">VAD filter</span>
|
||||
<label><input type="checkbox" name="vad_filter" id="vadFilterCheck" value="1" checked> <span data-i18n="vadFilterLabel">Remove silence</span></label>
|
||||
</div>
|
||||
<p class="upload-hint" data-i18n="vadFilterHint">Voice Activity Detection — skips silent passages before transcribing. Speeds up processing and prevents the model hallucinating on silence.</p>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<!-- Hidden stubs so tools.js refs don't crash on this page -->
|
||||
<div class="is-hidden" id="languageControl" aria-hidden="true">
|
||||
<input type="radio" name="language" value="en" checked>
|
||||
|
||||
Reference in New Issue
Block a user