'File exceeds server upload limit.', UPLOAD_ERR_FORM_SIZE => 'File exceeds form size limit.', UPLOAD_ERR_PARTIAL => 'File was only partially uploaded.', UPLOAD_ERR_NO_FILE => 'No audio file received.', ]; dbnToolsError($map[$code] ?? "Upload error (code {$code}).", 400, 'upload_error'); } $file = $_FILES['audio']; $maxBytes = 200 * 1024 * 1024; if ($file['size'] > $maxBytes) { dbnToolsError('File too large. Maximum 200 MB.', 413, 'file_too_large'); } $allowedExts = ['mp3', 'wav', 'ogg', 'oga', 'm4a', 'mp4', 'flac', 'webm', 'aac']; $ext = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION)); if (!in_array($ext, $allowedExts, true)) { dbnToolsError("Unsupported format: .{$ext}. Use MP3, WAV, OGG, M4A, FLAC, or WebM.", 415, 'unsupported_format'); } // ── Build Whisper request ───────────────────────────────────────────────────── $whisperBase = 'http://194.93.49.14:20019'; $endpoint = $diarize ? $whisperBase . '/transcribe/diarize' : $whisperBase . '/transcribe'; $boundary = '----DBN' . bin2hex(random_bytes(8)); $body = "--{$boundary}\r\n"; $body .= 'Content-Disposition: form-data; name="file"; filename="' . addslashes(basename($file['name'])) . '"' . "\r\n"; $body .= "Content-Type: application/octet-stream\r\n\r\n"; $fileContents = file_get_contents($file['tmp_name']); if ($fileContents === false) { dbnToolsError('Could not read uploaded file.', 500, 'file_read_error'); } $body .= $fileContents . "\r\n"; if ($language !== 'auto') { $body .= "--{$boundary}\r\n"; $body .= "Content-Disposition: form-data; name=\"language\"\r\n\r\n"; $body .= $language . "\r\n"; } if ($diarize && $numSpeakers > 1) { $body .= "--{$boundary}\r\n"; $body .= "Content-Disposition: form-data; name=\"num_speakers\"\r\n\r\n"; $body .= $numSpeakers . "\r\n"; } $body .= "--{$boundary}--\r\n"; // ── Call Whisper ────────────────────────────────────────────────────────────── $t0 = microtime(true); if (function_exists('curl_init')) { $ch = curl_init($endpoint); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body, CURLOPT_HTTPHEADER => [ "Content-Type: multipart/form-data; boundary={$boundary}", 'Accept: application/json', ], CURLOPT_TIMEOUT => 600, ]); $whisperBody = curl_exec($ch); $httpCode = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE); $curlErr = curl_error($ch); curl_close($ch); if ($whisperBody === false || $httpCode !== 200) { dbnToolsError('Whisper service error (HTTP ' . $httpCode . '): ' . $curlErr, 502, 'whisper_error'); } } else { $ctx = stream_context_create([ 'http' => [ 'method' => 'POST', 'timeout' => 600, 'header' => "Content-Type: multipart/form-data; boundary={$boundary}\r\nAccept: application/json\r\n", 'content' => $body, 'ignore_errors' => true, ], ]); $whisperBody = @file_get_contents($endpoint, false, $ctx); if ($whisperBody === false) { dbnToolsError('Whisper service unreachable. The GPU may be offline.', 502, 'whisper_unreachable'); } } $latencyMs = (int)round((microtime(true) - $t0) * 1000); $whisper = json_decode($whisperBody, true); if (!is_array($whisper) || empty($whisper['text'])) { dbnToolsError('Empty or invalid response from Whisper.', 502, 'whisper_empty'); } // ── Speaker role labelling ──────────────────────────────────────────────────── $segments = is_array($whisper['segments'] ?? null) ? $whisper['segments'] : []; $numDetected = (int)($whisper['num_speakers'] ?? 1); if ($numDetected < 2 && $segments) { $uniqueSpeakers = array_filter(array_unique(array_column($segments, 'speaker'))); if (count($uniqueSpeakers) > 1) $numDetected = count($uniqueSpeakers); } $speakerRoles = null; if ($diarize && $numDetected > 1 && $segments) { $speakerRoles = dbnLabelSpeakerRoles($segments); } // ── Respond ─────────────────────────────────────────────────────────────────── dbnToolsLogMetadata([ 'tool' => 'transcribe', 'language' => $language, 'ok' => true, 'latency_ms' => $latencyMs, ]); dbnToolsRespond([ 'ok' => true, 'tool' => 'transcribe', 'transcript' => (string)$whisper['text'], 'segments' => $segments, 'speaker_roles' => $speakerRoles, 'num_speakers' => $numDetected, 'language' => (string)($whisper['language'] ?? $language), 'duration_sec' => round((float)($whisper['duration_seconds'] ?? 0), 2), 'model' => (string)($whisper['model'] ?? 'whisper'), 'latency_ms' => $latencyMs, ]); // ── Speaker role labelling helper ───────────────────────────────────────────── function dbnLabelSpeakerRoles(array $segments): array { $sample = array_slice( array_values(array_filter($segments, fn($s) => isset($s['speaker']))), 0, 20 ); if (!$sample) return []; $lines = array_map(fn($s) => "[{$s['speaker']}] " . trim((string)($s['text'] ?? '')), $sample); $azure = new DbnAzureOpenAiGateway(); $system = 'You are analyzing a legal proceeding transcript. ' . 'Based on the first segments, identify the role of each speaker. ' . 'Common roles in Norwegian legal proceedings: dommer (judge), advokat (lawyer), ' . 'forelder (parent), barn (child), sakkyndig (expert witness), ' . 'saksbehandler (caseworker), tolk (interpreter), vitne (witness), ' . 'prosessfullmektig (counsel). ' . 'Return ONLY valid JSON: {"SPEAKER_00":"dommer","SPEAKER_01":"forelder"}. ' . 'Use Norwegian role names. Use "ukjent" if role cannot be determined. ' . 'Only include speakers present in the input.'; try { $text = $azure->chatText([ ['role' => 'system', 'content' => $system], ['role' => 'user', 'content' => implode("\n", $lines)], ], ['temperature' => 0.1, 'max_tokens' => 200]); $cleaned = preg_replace('/^```(?:json)?\s*|\s*```$/m', '', trim($text)); $json = json_decode($cleaned, true); return is_array($json) ? $json : []; } catch (Throwable) { return []; } }