'nb-NO', 'nb' => 'nb-NO', 'nn' => 'nn-NO', 'en' => 'en-US', 'sv' => 'sv-SE', 'da' => 'da-DK', 'de' => 'de-DE', 'fr' => 'fr-FR', ]; public function __construct(string $projectId, string $apiKey) { $this->projectId = $projectId; $this->apiKey = $apiKey; } public static function fromConfig(): ?self { $projectId = getenv('GCP_PROJECT_ID') ?: null; $apiKey = getenv('GCP_API_KEY') ?: null; if (!$projectId || !$apiKey) { $cfg = @include '/etc/bnl/gcp.php'; if (is_array($cfg)) { if (!$projectId && !empty($cfg['project_id'])) $projectId = (string)$cfg['project_id']; if (!$apiKey && !empty($cfg['api_key'])) $apiKey = (string)$cfg['api_key']; } } if (!$projectId || !$apiKey) return null; return new self($projectId, $apiKey); } /** * Transcribe an audio file using GCP Speech-to-Text v2. * * Returns a Whisper-compatible array on success: * ['text', 'language', 'duration', 'segments', 'num_speakers'] * Returns null on any failure (caller should fall back to Whisper). */ public function transcribe( string $audioPath, string $mimeType, string $language, bool $diarize, int $minSpeakers = 2, int $maxSpeakers = 6, int $timeoutSec = 270 ): ?array { $locale = $this->resolveLocale($language); $features = ['enableAutomaticPunctuation' => true]; if ($diarize) { $features['diarizationConfig'] = [ 'minSpeakerCount' => max(2, $minSpeakers), 'maxSpeakerCount' => max(max(2, $minSpeakers), $maxSpeakers), ]; } $langCodes = [$locale]; // Add Nynorsk as secondary when processing Norwegian content if ($locale === 'nb-NO') $langCodes[] = 'nn-NO'; $config = [ 'autoDecodingConfig' => (object)[], 'languageCodes' => $langCodes, 'model' => 'long', 'features' => $features, ]; $audioContent = base64_encode((string)file_get_contents($audioPath)); if ($audioContent === '') { error_log("GcpSpeechClient: failed to read audio file"); return null; } $body = json_encode(['config' => $config, 'content' => $audioContent]); $url = sprintf( 'https://speech.googleapis.com/v2/projects/%s/locations/global/recognizers/_:recognize?key=%s', rawurlencode($this->projectId), rawurlencode($this->apiKey) ); $ch = curl_init($url); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 60, ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $curlErr = curl_error($ch); curl_close($ch); if ($curlErr || !is_string($response)) { error_log("GcpSpeechClient: curl error: {$curlErr}"); return null; } if ($httpCode !== 200) { error_log("GcpSpeechClient: HTTP {$httpCode}: " . substr($response, 0, 300)); return null; } $data = json_decode($response, true); if (!is_array($data)) return null; // Long audio: GCP returns an operation name to poll if (isset($data['name']) && str_contains((string)$data['name'], '/operations/')) { $data = $this->pollOperation((string)$data['name'], $timeoutSec); if ($data === null) return null; } if (empty($data['results']) || !is_array($data['results'])) { error_log("GcpSpeechClient: no results in response"); return null; } return $this->normalizeResults($data['results'], $locale); } private function pollOperation(string $operationName, int $timeoutSec): ?array { $url = "https://speech.googleapis.com/v2/{$operationName}?key=" . rawurlencode($this->apiKey); $deadline = time() + $timeoutSec; $interval = 5; while (time() < $deadline) { sleep($interval); $interval = min($interval + 5, 15); $ch = curl_init($url); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode !== 200 || !is_string($response)) continue; $data = json_decode($response, true); if (!is_array($data)) continue; if (!empty($data['done'])) { return $data['response'] ?? null; } } error_log("GcpSpeechClient: operation timed out after {$timeoutSec}s"); return null; } private function normalizeResults(array $gcpResults, string $locale): ?array { $transcriptParts = []; $allWords = []; foreach ($gcpResults as $result) { $alt = $result['alternatives'][0] ?? null; if (!$alt) continue; if (!empty($alt['transcript'])) $transcriptParts[] = (string)$alt['transcript']; if (!empty($alt['words'])) $allWords = array_merge($allWords, $alt['words']); } $transcript = implode(' ', $transcriptParts); if ($transcript === '') return null; // Group consecutive same-speaker words into segments $segments = []; $speakerMap = []; // raw speakerLabel → 'SPEAKER_XX' $curSegment = null; foreach ($allWords as $word) { $rawLabel = (string)($word['speakerLabel'] ?? ''); if ($rawLabel !== '' && !isset($speakerMap[$rawLabel])) { $speakerMap[$rawLabel] = sprintf('SPEAKER_%02d', count($speakerMap)); } $speakerKey = $rawLabel !== '' ? $speakerMap[$rawLabel] : null; $start = isset($word['startOffset']) ? $this->offsetToSec((string)$word['startOffset']) : 0.0; $end = isset($word['endOffset']) ? $this->offsetToSec((string)$word['endOffset']) : $start; $text = (string)($word['word'] ?? ''); $newSegment = $curSegment === null || ($speakerKey !== null && $speakerKey !== ($curSegment['speaker'] ?? null)); if ($newSegment) { if ($curSegment !== null) $segments[] = $curSegment; $curSegment = ['text' => $text, 'start' => round($start, 3), 'end' => round($end, 3)]; if ($speakerKey !== null) $curSegment['speaker'] = $speakerKey; } else { $curSegment['text'] .= ' ' . $text; $curSegment['end'] = round($end, 3); } } if ($curSegment !== null) $segments[] = $curSegment; // Duration from last word or last segment $durationSec = 0.0; if ($allWords) { $last = end($allWords); $durationSec = $this->offsetToSec((string)($last['endOffset'] ?? '0s')); } elseif ($segments) { $durationSec = (float)(end($segments)['end'] ?? 0.0); } return [ 'text' => $transcript, 'language' => strtolower(explode('-', $locale)[0]), 'duration' => $durationSec, 'segments' => $segments, 'num_speakers' => max(1, count($speakerMap)), ]; } /** Convert GCP offset string like "1.200s" to float seconds. */ private function offsetToSec(string $offset): float { return (float)rtrim($offset, 's'); } private function resolveLocale(string $language): string { if ($language === '') return 'nb-NO'; return self::LOCALE_MAP[$language] ?? $language; } }