feat(transcribe): Azure Speech server-side key, remove translate option, add beam/VAD hints
- api/transcribe.php falls back to DBN_AZURE_SPEECH_KEY/REGION env vars so BYOK not required - JS hides Azure key input when DBN_AZURE_SPEECH_CONFIGURED is true - Remove Translate to English task option from Advanced settings - Add explanatory hint text for Beam size and VAD filter in all 4 languages Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+3
-1
@@ -70,7 +70,9 @@ if ($engine === 'openai') {
|
|||||||
|
|
||||||
} elseif ($engine === 'azure') {
|
} elseif ($engine === 'azure') {
|
||||||
$apiKey = trim((string)($_POST['azure_key'] ?? ''));
|
$apiKey = trim((string)($_POST['azure_key'] ?? ''));
|
||||||
$region = preg_replace('/[^a-z0-9]/', '', strtolower(trim((string)($_POST['azure_region'] ?? 'norwayeast'))));
|
if ($apiKey === '') $apiKey = (string)(dbnToolsEnv('DBN_AZURE_SPEECH_KEY') ?? '');
|
||||||
|
$region = preg_replace('/[^a-z0-9]/', '', strtolower(trim((string)($_POST['azure_region'] ?? ''))));
|
||||||
|
if ($region === '') $region = preg_replace('/[^a-z0-9]/', '', strtolower((string)(dbnToolsEnv('DBN_AZURE_SPEECH_REGION') ?? 'norwayeast')));
|
||||||
if (!$apiKey) {
|
if (!$apiKey) {
|
||||||
dbnToolsError('An Azure Speech API key is required for the Azure engine.', 400, 'missing_azure_key');
|
dbnToolsError('An Azure Speech API key is required for the Azure engine.', 400, 'missing_azure_key');
|
||||||
}
|
}
|
||||||
|
|||||||
+12
-2
@@ -56,8 +56,10 @@ const TRANSCRIBE_I18N = {
|
|||||||
beamSize: 'Beam size',
|
beamSize: 'Beam size',
|
||||||
beamFastest: '(fastest)',
|
beamFastest: '(fastest)',
|
||||||
beamBest: '(best)',
|
beamBest: '(best)',
|
||||||
|
beamSizeHint: 'Controls search breadth — higher values improve accuracy but take longer. 5 is recommended for legal recordings.',
|
||||||
vadFilter: 'VAD filter',
|
vadFilter: 'VAD filter',
|
||||||
vadFilterLabel: 'Remove silence',
|
vadFilterLabel: 'Remove silence',
|
||||||
|
vadFilterHint: 'Voice Activity Detection — skips silent passages before transcribing. Speeds up processing and prevents the model hallucinating on silence.',
|
||||||
run: 'Run',
|
run: 'Run',
|
||||||
running: 'Transcribing…',
|
running: 'Transcribing…',
|
||||||
runningOther: 'Running…',
|
runningOther: 'Running…',
|
||||||
@@ -120,8 +122,10 @@ const TRANSCRIBE_I18N = {
|
|||||||
beamSize: 'Beam size',
|
beamSize: 'Beam size',
|
||||||
beamFastest: '(raskest)',
|
beamFastest: '(raskest)',
|
||||||
beamBest: '(best)',
|
beamBest: '(best)',
|
||||||
|
beamSizeHint: 'Styrer søkebredde — høyere verdier gir bedre nøyaktighet men tar lengre tid. 5 anbefales for juridiske opptak.',
|
||||||
vadFilter: 'VAD-filter',
|
vadFilter: 'VAD-filter',
|
||||||
vadFilterLabel: 'Fjern stillhet',
|
vadFilterLabel: 'Fjern stillhet',
|
||||||
|
vadFilterHint: 'Taleaktivitetsdeteksjon — hopper over stille partier før transkripsjon. Raskere behandling og forhindrer hallusinasjon på stillhet.',
|
||||||
run: 'Kjør',
|
run: 'Kjør',
|
||||||
running: 'Transkriberer…',
|
running: 'Transkriberer…',
|
||||||
runningOther: 'Kjører…',
|
runningOther: 'Kjører…',
|
||||||
@@ -184,8 +188,10 @@ const TRANSCRIBE_I18N = {
|
|||||||
beamSize: 'Розмір пучка',
|
beamSize: 'Розмір пучка',
|
||||||
beamFastest: '(найшвидший)',
|
beamFastest: '(найшвидший)',
|
||||||
beamBest: '(найкращий)',
|
beamBest: '(найкращий)',
|
||||||
|
beamSizeHint: 'Ширина пошуку — більше значення підвищує точність, але займає більше часу. 5 рекомендовано для юридичних записів.',
|
||||||
vadFilter: 'VAD-фільтр',
|
vadFilter: 'VAD-фільтр',
|
||||||
vadFilterLabel: 'Видалити тишу',
|
vadFilterLabel: 'Видалити тишу',
|
||||||
|
vadFilterHint: 'Виявлення мовної активності — пропускає тихі ділянки перед транскрипцією. Прискорює обробку та запобігає галюцинаціям на тиші.',
|
||||||
run: 'Запустити',
|
run: 'Запустити',
|
||||||
running: 'Транскрибування…',
|
running: 'Транскрибування…',
|
||||||
runningOther: 'Виконання…',
|
runningOther: 'Виконання…',
|
||||||
@@ -248,8 +254,10 @@ const TRANSCRIBE_I18N = {
|
|||||||
beamSize: 'Rozmiar wiązki',
|
beamSize: 'Rozmiar wiązki',
|
||||||
beamFastest: '(najszybszy)',
|
beamFastest: '(najszybszy)',
|
||||||
beamBest: '(najlepszy)',
|
beamBest: '(najlepszy)',
|
||||||
|
beamSizeHint: 'Kontroluje szerokość wyszukiwania — wyższe wartości poprawiają dokładność, ale wydłużają czas. 5 zalecane dla nagrań prawnych.',
|
||||||
vadFilter: 'Filtr VAD',
|
vadFilter: 'Filtr VAD',
|
||||||
vadFilterLabel: 'Usuń ciszę',
|
vadFilterLabel: 'Usuń ciszę',
|
||||||
|
vadFilterHint: 'Wykrywanie aktywności głosowej — pomija ciche fragmenty przed transkrypcją. Przyspiesza przetwarzanie i zapobiega halucynacjom na ciszy.',
|
||||||
run: 'Uruchom',
|
run: 'Uruchom',
|
||||||
running: 'Transkrybowanie…',
|
running: 'Transkrybowanie…',
|
||||||
runningOther: 'Uruchamianie…',
|
runningOther: 'Uruchamianie…',
|
||||||
@@ -901,7 +909,7 @@ async function runTranscribe() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (engine === 'azure') {
|
if (engine === 'azure' && !window.DBN_AZURE_SPEECH_CONFIGURED) {
|
||||||
const key = document.getElementById('azureKeyInput')?.value?.trim();
|
const key = document.getElementById('azureKeyInput')?.value?.trim();
|
||||||
if (!key) {
|
if (!key) {
|
||||||
els.status.textContent = currentUiT('missingAzureKey');
|
els.status.textContent = currentUiT('missingAzureKey');
|
||||||
@@ -1208,7 +1216,9 @@ function setupTranscribeControls() {
|
|||||||
radio.addEventListener('change', () => {
|
radio.addEventListener('change', () => {
|
||||||
const engine = currentTranscribeEngine();
|
const engine = currentTranscribeEngine();
|
||||||
document.getElementById('openaiKeyControl')?.classList.toggle('is-hidden', engine !== 'openai');
|
document.getElementById('openaiKeyControl')?.classList.toggle('is-hidden', engine !== 'openai');
|
||||||
document.getElementById('azureKeyControl')?.classList.toggle('is-hidden', engine !== 'azure');
|
// Hide azure key row if server has a pre-configured key
|
||||||
|
const azureNeedsKey = engine === 'azure' && !window.DBN_AZURE_SPEECH_CONFIGURED;
|
||||||
|
document.getElementById('azureKeyControl')?.classList.toggle('is-hidden', !azureNeedsKey);
|
||||||
document.getElementById('modelControl')?.classList.toggle('is-hidden', engine === 'openai' || engine === 'azure');
|
document.getElementById('modelControl')?.classList.toggle('is-hidden', engine === 'openai' || engine === 'azure');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
+4
-5
@@ -5,7 +5,9 @@ $toolTitle = 'Transcribe audio';
|
|||||||
$toolKind = 'Audio Transcription';
|
$toolKind = 'Audio Transcription';
|
||||||
$toolBadge = 'Whisper / GPU';
|
$toolBadge = 'Whisper / GPU';
|
||||||
require_once __DIR__ . '/includes/layout.php';
|
require_once __DIR__ . '/includes/layout.php';
|
||||||
|
$azureConfigured = !empty(dbnToolsEnv('DBN_AZURE_SPEECH_KEY'));
|
||||||
?>
|
?>
|
||||||
|
<script>window.DBN_AZURE_SPEECH_CONFIGURED = <?= $azureConfigured ? 'true' : 'false' ?>;</script>
|
||||||
<form id="toolForm" class="tool-form">
|
<form id="toolForm" class="tool-form">
|
||||||
|
|
||||||
<div class="lang-switcher" id="uiLangSwitcher" role="group" aria-label="UI language">
|
<div class="lang-switcher" id="uiLangSwitcher" role="group" aria-label="UI language">
|
||||||
@@ -94,21 +96,18 @@ require_once __DIR__ . '/includes/layout.php';
|
|||||||
<details class="expert-settings" id="expertSettings">
|
<details class="expert-settings" id="expertSettings">
|
||||||
<summary class="expert-summary" data-i18n="expertSettings">Advanced settings</summary>
|
<summary class="expert-summary" data-i18n="expertSettings">Advanced settings</summary>
|
||||||
<div class="expert-body">
|
<div class="expert-body">
|
||||||
<div class="control-row">
|
|
||||||
<span class="control-label" data-i18n="task">Task</span>
|
|
||||||
<label><input type="radio" name="task" value="transcribe" checked> <span data-i18n="taskTranscribe">Transcribe</span></label>
|
|
||||||
<label><input type="radio" name="task" value="translate"> <span data-i18n="taskTranslate">Translate to English</span></label>
|
|
||||||
</div>
|
|
||||||
<div class="control-row">
|
<div class="control-row">
|
||||||
<span class="control-label" data-i18n="beamSize">Beam size</span>
|
<span class="control-label" data-i18n="beamSize">Beam size</span>
|
||||||
<label><input type="radio" name="beam_size" value="1"> 1 <small class="control-hint" data-i18n="beamFastest">(fastest)</small></label>
|
<label><input type="radio" name="beam_size" value="1"> 1 <small class="control-hint" data-i18n="beamFastest">(fastest)</small></label>
|
||||||
<label><input type="radio" name="beam_size" value="3"> 3</label>
|
<label><input type="radio" name="beam_size" value="3"> 3</label>
|
||||||
<label><input type="radio" name="beam_size" value="5" checked> 5 <small class="control-hint" data-i18n="beamBest">(best)</small></label>
|
<label><input type="radio" name="beam_size" value="5" checked> 5 <small class="control-hint" data-i18n="beamBest">(best)</small></label>
|
||||||
</div>
|
</div>
|
||||||
|
<p class="upload-hint" data-i18n="beamSizeHint">Controls search breadth — higher values improve accuracy but take longer. 5 is recommended for legal recordings.</p>
|
||||||
<div class="control-row">
|
<div class="control-row">
|
||||||
<span class="control-label" data-i18n="vadFilter">VAD filter</span>
|
<span class="control-label" data-i18n="vadFilter">VAD filter</span>
|
||||||
<label><input type="checkbox" name="vad_filter" id="vadFilterCheck" value="1" checked> <span data-i18n="vadFilterLabel">Remove silence</span></label>
|
<label><input type="checkbox" name="vad_filter" id="vadFilterCheck" value="1" checked> <span data-i18n="vadFilterLabel">Remove silence</span></label>
|
||||||
</div>
|
</div>
|
||||||
|
<p class="upload-hint" data-i18n="vadFilterHint">Voice Activity Detection — skips silent passages before transcribing. Speeds up processing and prevents the model hallucinating on silence.</p>
|
||||||
</div>
|
</div>
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user