From f270a32056b6fc5bd24840be8b40ccb461ce9770 Mon Sep 17 00:00:00 2001 From: davegilligan Date: Mon, 15 Jun 2026 18:50:39 +0200 Subject: [PATCH] fix(tools): tier-aware GPU cloud fallback for ask synthesis When a persona-pinned GPU fine-tune is offline, degrade to the requested quality tier's Bedrock model (Quick->Haiku, Pro->Sonnet) instead of a hardcoded gpt-4o, so Pro genuinely differs from Quick while the pod is off. Legacy/azure engines keep gpt-4o as the floor. Generalize the degraded notice/trace wording (no longer asserts gpt-4o). Co-Authored-By: Claude Opus 4.7 --- includes/LegalTools.php | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/includes/LegalTools.php b/includes/LegalTools.php index d404d02..6267d39 100644 --- a/includes/LegalTools.php +++ b/includes/LegalTools.php @@ -316,7 +316,7 @@ PROMPT; } array_unshift($uncertain, $this->degradedModelNotice($language)); $json['what_remains_uncertain'] = $uncertain; - $trace[] = $this->trace('Model routing', 'Fine-tuned legal model unavailable; answered with gpt-4o fallback (corpus + retrieval unaffected).', 'warning'); + $trace[] = $this->trace('Model routing', 'Fine-tuned legal model unavailable; answered with cloud fallback (corpus + retrieval unaffected).', 'warning'); } $trace[] = $this->trace('Synthesis', 'Azure OpenAI generated an answer using only the retrieved source excerpts.', 'complete'); @@ -1253,9 +1253,18 @@ PROMPT; return true; } - /** The always-up cloud fallback used when a GPU-backed fine-tune is offline. */ - private function cloudFallbackGateway(): array + /** + * The always-up cloud fallback used when a GPU-backed fine-tune is offline. Tier-aware: + * a requested quality tier (Quick→Haiku, Pro→Sonnet) degrades to that tier's Bedrock model + * so Pro still buys a stronger model than Quick while the pod is off. Legacy/azure engines + * keep gpt-4o as the capable general floor. + */ + private function cloudFallbackGateway(string $engine): array { + if (($engine === 'claude_haiku' || $engine === 'claude_sonnet') + && $this->azure instanceof DbnBedrockGateway) { + return [$this->azure, DbnBedrockModelRouter::deploymentForEngine($engine, true)]; + } return [$this->azure, 'gpt-4o']; } @@ -1338,7 +1347,7 @@ PROMPT; error_log('[dbn-persona] GPU model ' . $model . ' marked unavailable; using cloud fallback.'); $failedModel = $model; $degraded = true; - [$gw, $model] = $this->cloudFallbackGateway(); + [$gw, $model] = $this->cloudFallbackGateway($engine); } try { @@ -1352,7 +1361,7 @@ PROMPT; $this->writeGpuHealth($model, false); $failedModel = $model; $degraded = true; - [$gw, $model] = $this->cloudFallbackGateway(); + [$gw, $model] = $this->cloudFallbackGateway($engine); $raw = $gw->withDeployment($model)->chatText($messages, $options); } @@ -1365,14 +1374,14 @@ PROMPT; ]; } - /** Localized notice shown when the fine-tuned legal model was offline and gpt-4o answered. */ + /** Localized notice shown when the fine-tuned legal model was offline and a general cloud model answered. */ private function degradedModelNotice(string $language): string { return match (dbnToolsNormalizeUiLanguage($language)) { - 'no' => 'Den spesialiserte, finjusterte norske juridiske modellen er midlertidig utilgjengelig, så dette svaret ble generert av den generelle modellen (gpt-4o). Det juridiske korpuset, kildene og bevisene er fullt tilgjengelige – gjennomgå de siterte kildene som vanlig.', - 'uk' => 'Спеціалізована доточена норвезька юридична модель тимчасово недоступна, тому цю відповідь згенерувала загальна модель (gpt-4o). Юридичний корпус, джерела та докази повністю доступні — перегляньте цитовані джерела, як зазвичай.', - 'pl' => 'Wyspecjalizowany, dostrojony norweski model prawny jest tymczasowo niedostępny, więc tę odpowiedź wygenerował model ogólny (gpt-4o). Korpus prawny, źródła i dowody są w pełni dostępne — przejrzyj cytowane źródła jak zwykle.', - default => 'The specialized fine-tuned Norwegian legal model is temporarily offline, so this answer was generated by the general model (gpt-4o). The legal corpus, sources, and evidence are fully live — review the cited sources as usual.', + 'no' => 'Den spesialiserte, finjusterte norske juridiske modellen er midlertidig utilgjengelig, så dette svaret ble generert av en generell skymodell. Det juridiske korpuset, kildene og bevisene er fullt tilgjengelige – gjennomgå de siterte kildene som vanlig.', + 'uk' => 'Спеціалізована доточена норвезька юридична модель тимчасово недоступна, тому цю відповідь згенерувала загальна хмарна модель. Юридичний корпус, джерела та докази повністю доступні — перегляньте цитовані джерела, як зазвичай.', + 'pl' => 'Wyspecjalizowany, dostrojony norweski model prawny jest tymczasowo niedostępny, więc tę odpowiedź wygenerował ogólny model w chmurze. Korpus prawny, źródła i dowody są w pełni dostępne — przejrzyj cytowane źródła jak zwykle.', + default => 'The specialized fine-tuned Norwegian legal model is temporarily offline, so this answer was generated by a general cloud model. The legal corpus, sources, and evidence are fully live — review the cited sources as usual.', }; }