feat(transcribe): Azure Speech server-side key, remove translate option, add beam/VAD hints

- api/transcribe.php falls back to DBN_AZURE_SPEECH_KEY/REGION env vars so BYOK not required
- JS hides Azure key input when DBN_AZURE_SPEECH_CONFIGURED is true
- Remove Translate to English task option from Advanced settings
- Add explanatory hint text for Beam size and VAD filter in all 4 languages

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-14 23:23:33 +02:00
parent ff031d7a5b
commit e3d8daf6ca
3 changed files with 19 additions and 8 deletions
+3 -1
View File
@@ -70,7 +70,9 @@ if ($engine === 'openai') {
} elseif ($engine === 'azure') { } elseif ($engine === 'azure') {
$apiKey = trim((string)($_POST['azure_key'] ?? '')); $apiKey = trim((string)($_POST['azure_key'] ?? ''));
$region = preg_replace('/[^a-z0-9]/', '', strtolower(trim((string)($_POST['azure_region'] ?? 'norwayeast')))); if ($apiKey === '') $apiKey = (string)(dbnToolsEnv('DBN_AZURE_SPEECH_KEY') ?? '');
$region = preg_replace('/[^a-z0-9]/', '', strtolower(trim((string)($_POST['azure_region'] ?? ''))));
if ($region === '') $region = preg_replace('/[^a-z0-9]/', '', strtolower((string)(dbnToolsEnv('DBN_AZURE_SPEECH_REGION') ?? 'norwayeast')));
if (!$apiKey) { if (!$apiKey) {
dbnToolsError('An Azure Speech API key is required for the Azure engine.', 400, 'missing_azure_key'); dbnToolsError('An Azure Speech API key is required for the Azure engine.', 400, 'missing_azure_key');
} }
+12 -2
View File
@@ -56,8 +56,10 @@ const TRANSCRIBE_I18N = {
beamSize: 'Beam size', beamSize: 'Beam size',
beamFastest: '(fastest)', beamFastest: '(fastest)',
beamBest: '(best)', beamBest: '(best)',
beamSizeHint: 'Controls search breadth — higher values improve accuracy but take longer. 5 is recommended for legal recordings.',
vadFilter: 'VAD filter', vadFilter: 'VAD filter',
vadFilterLabel: 'Remove silence', vadFilterLabel: 'Remove silence',
vadFilterHint: 'Voice Activity Detection — skips silent passages before transcribing. Speeds up processing and prevents the model hallucinating on silence.',
run: 'Run', run: 'Run',
running: 'Transcribing…', running: 'Transcribing…',
runningOther: 'Running…', runningOther: 'Running…',
@@ -120,8 +122,10 @@ const TRANSCRIBE_I18N = {
beamSize: 'Beam size', beamSize: 'Beam size',
beamFastest: '(raskest)', beamFastest: '(raskest)',
beamBest: '(best)', beamBest: '(best)',
beamSizeHint: 'Styrer søkebredde — høyere verdier gir bedre nøyaktighet men tar lengre tid. 5 anbefales for juridiske opptak.',
vadFilter: 'VAD-filter', vadFilter: 'VAD-filter',
vadFilterLabel: 'Fjern stillhet', vadFilterLabel: 'Fjern stillhet',
vadFilterHint: 'Taleaktivitetsdeteksjon — hopper over stille partier før transkripsjon. Raskere behandling og forhindrer hallusinasjon på stillhet.',
run: 'Kjør', run: 'Kjør',
running: 'Transkriberer…', running: 'Transkriberer…',
runningOther: 'Kjører…', runningOther: 'Kjører…',
@@ -184,8 +188,10 @@ const TRANSCRIBE_I18N = {
beamSize: 'Розмір пучка', beamSize: 'Розмір пучка',
beamFastest: '(найшвидший)', beamFastest: '(найшвидший)',
beamBest: '(найкращий)', beamBest: '(найкращий)',
beamSizeHint: 'Ширина пошуку — більше значення підвищує точність, але займає більше часу. 5 рекомендовано для юридичних записів.',
vadFilter: 'VAD-фільтр', vadFilter: 'VAD-фільтр',
vadFilterLabel: 'Видалити тишу', vadFilterLabel: 'Видалити тишу',
vadFilterHint: 'Виявлення мовної активності — пропускає тихі ділянки перед транскрипцією. Прискорює обробку та запобігає галюцинаціям на тиші.',
run: 'Запустити', run: 'Запустити',
running: 'Транскрибування…', running: 'Транскрибування…',
runningOther: 'Виконання…', runningOther: 'Виконання…',
@@ -248,8 +254,10 @@ const TRANSCRIBE_I18N = {
beamSize: 'Rozmiar wiązki', beamSize: 'Rozmiar wiązki',
beamFastest: '(najszybszy)', beamFastest: '(najszybszy)',
beamBest: '(najlepszy)', beamBest: '(najlepszy)',
beamSizeHint: 'Kontroluje szerokość wyszukiwania — wyższe wartości poprawiają dokładność, ale wydłużają czas. 5 zalecane dla nagrań prawnych.',
vadFilter: 'Filtr VAD', vadFilter: 'Filtr VAD',
vadFilterLabel: 'Usuń ciszę', vadFilterLabel: 'Usuń ciszę',
vadFilterHint: 'Wykrywanie aktywności głosowej — pomija ciche fragmenty przed transkrypcją. Przyspiesza przetwarzanie i zapobiega halucynacjom na ciszy.',
run: 'Uruchom', run: 'Uruchom',
running: 'Transkrybowanie…', running: 'Transkrybowanie…',
runningOther: 'Uruchamianie…', runningOther: 'Uruchamianie…',
@@ -901,7 +909,7 @@ async function runTranscribe() {
return; return;
} }
} }
if (engine === 'azure') { if (engine === 'azure' && !window.DBN_AZURE_SPEECH_CONFIGURED) {
const key = document.getElementById('azureKeyInput')?.value?.trim(); const key = document.getElementById('azureKeyInput')?.value?.trim();
if (!key) { if (!key) {
els.status.textContent = currentUiT('missingAzureKey'); els.status.textContent = currentUiT('missingAzureKey');
@@ -1208,7 +1216,9 @@ function setupTranscribeControls() {
radio.addEventListener('change', () => { radio.addEventListener('change', () => {
const engine = currentTranscribeEngine(); const engine = currentTranscribeEngine();
document.getElementById('openaiKeyControl')?.classList.toggle('is-hidden', engine !== 'openai'); document.getElementById('openaiKeyControl')?.classList.toggle('is-hidden', engine !== 'openai');
document.getElementById('azureKeyControl')?.classList.toggle('is-hidden', engine !== 'azure'); // Hide azure key row if server has a pre-configured key
const azureNeedsKey = engine === 'azure' && !window.DBN_AZURE_SPEECH_CONFIGURED;
document.getElementById('azureKeyControl')?.classList.toggle('is-hidden', !azureNeedsKey);
document.getElementById('modelControl')?.classList.toggle('is-hidden', engine === 'openai' || engine === 'azure'); document.getElementById('modelControl')?.classList.toggle('is-hidden', engine === 'openai' || engine === 'azure');
}); });
}); });
+4 -5
View File
@@ -5,7 +5,9 @@ $toolTitle = 'Transcribe audio';
$toolKind = 'Audio Transcription'; $toolKind = 'Audio Transcription';
$toolBadge = 'Whisper / GPU'; $toolBadge = 'Whisper / GPU';
require_once __DIR__ . '/includes/layout.php'; require_once __DIR__ . '/includes/layout.php';
$azureConfigured = !empty(dbnToolsEnv('DBN_AZURE_SPEECH_KEY'));
?> ?>
<script>window.DBN_AZURE_SPEECH_CONFIGURED = <?= $azureConfigured ? 'true' : 'false' ?>;</script>
<form id="toolForm" class="tool-form"> <form id="toolForm" class="tool-form">
<div class="lang-switcher" id="uiLangSwitcher" role="group" aria-label="UI language"> <div class="lang-switcher" id="uiLangSwitcher" role="group" aria-label="UI language">
@@ -94,21 +96,18 @@ require_once __DIR__ . '/includes/layout.php';
<details class="expert-settings" id="expertSettings"> <details class="expert-settings" id="expertSettings">
<summary class="expert-summary" data-i18n="expertSettings">Advanced settings</summary> <summary class="expert-summary" data-i18n="expertSettings">Advanced settings</summary>
<div class="expert-body"> <div class="expert-body">
<div class="control-row">
<span class="control-label" data-i18n="task">Task</span>
<label><input type="radio" name="task" value="transcribe" checked> <span data-i18n="taskTranscribe">Transcribe</span></label>
<label><input type="radio" name="task" value="translate"> <span data-i18n="taskTranslate">Translate to English</span></label>
</div>
<div class="control-row"> <div class="control-row">
<span class="control-label" data-i18n="beamSize">Beam size</span> <span class="control-label" data-i18n="beamSize">Beam size</span>
<label><input type="radio" name="beam_size" value="1"> 1 <small class="control-hint" data-i18n="beamFastest">(fastest)</small></label> <label><input type="radio" name="beam_size" value="1"> 1 <small class="control-hint" data-i18n="beamFastest">(fastest)</small></label>
<label><input type="radio" name="beam_size" value="3"> 3</label> <label><input type="radio" name="beam_size" value="3"> 3</label>
<label><input type="radio" name="beam_size" value="5" checked> 5 <small class="control-hint" data-i18n="beamBest">(best)</small></label> <label><input type="radio" name="beam_size" value="5" checked> 5 <small class="control-hint" data-i18n="beamBest">(best)</small></label>
</div> </div>
<p class="upload-hint" data-i18n="beamSizeHint">Controls search breadth — higher values improve accuracy but take longer. 5 is recommended for legal recordings.</p>
<div class="control-row"> <div class="control-row">
<span class="control-label" data-i18n="vadFilter">VAD filter</span> <span class="control-label" data-i18n="vadFilter">VAD filter</span>
<label><input type="checkbox" name="vad_filter" id="vadFilterCheck" value="1" checked> <span data-i18n="vadFilterLabel">Remove silence</span></label> <label><input type="checkbox" name="vad_filter" id="vadFilterCheck" value="1" checked> <span data-i18n="vadFilterLabel">Remove silence</span></label>
</div> </div>
<p class="upload-hint" data-i18n="vadFilterHint">Voice Activity Detection — skips silent passages before transcribing. Speeds up processing and prevents the model hallucinating on silence.</p>
</div> </div>
</details> </details>