fix(tools): tier-aware GPU cloud fallback for ask synthesis

When a persona-pinned GPU fine-tune is offline, degrade to the requested
quality tier's Bedrock model (Quick->Haiku, Pro->Sonnet) instead of a
hardcoded gpt-4o, so Pro genuinely differs from Quick while the pod is off.
Legacy/azure engines keep gpt-4o as the floor. Generalize the degraded
notice/trace wording (no longer asserts gpt-4o).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 18:50:39 +02:00
parent a4b5b6e3f2
commit f270a32056
+19 -10
View File
@@ -316,7 +316,7 @@ PROMPT;
} }
array_unshift($uncertain, $this->degradedModelNotice($language)); array_unshift($uncertain, $this->degradedModelNotice($language));
$json['what_remains_uncertain'] = $uncertain; $json['what_remains_uncertain'] = $uncertain;
$trace[] = $this->trace('Model routing', 'Fine-tuned legal model unavailable; answered with gpt-4o fallback (corpus + retrieval unaffected).', 'warning'); $trace[] = $this->trace('Model routing', 'Fine-tuned legal model unavailable; answered with cloud fallback (corpus + retrieval unaffected).', 'warning');
} }
$trace[] = $this->trace('Synthesis', 'Azure OpenAI generated an answer using only the retrieved source excerpts.', 'complete'); $trace[] = $this->trace('Synthesis', 'Azure OpenAI generated an answer using only the retrieved source excerpts.', 'complete');
@@ -1253,9 +1253,18 @@ PROMPT;
return true; return true;
} }
/** The always-up cloud fallback used when a GPU-backed fine-tune is offline. */ /**
private function cloudFallbackGateway(): array * The always-up cloud fallback used when a GPU-backed fine-tune is offline. Tier-aware:
* a requested quality tier (Quick→Haiku, Pro→Sonnet) degrades to that tier's Bedrock model
* so Pro still buys a stronger model than Quick while the pod is off. Legacy/azure engines
* keep gpt-4o as the capable general floor.
*/
private function cloudFallbackGateway(string $engine): array
{ {
if (($engine === 'claude_haiku' || $engine === 'claude_sonnet')
&& $this->azure instanceof DbnBedrockGateway) {
return [$this->azure, DbnBedrockModelRouter::deploymentForEngine($engine, true)];
}
return [$this->azure, 'gpt-4o']; return [$this->azure, 'gpt-4o'];
} }
@@ -1338,7 +1347,7 @@ PROMPT;
error_log('[dbn-persona] GPU model ' . $model . ' marked unavailable; using cloud fallback.'); error_log('[dbn-persona] GPU model ' . $model . ' marked unavailable; using cloud fallback.');
$failedModel = $model; $failedModel = $model;
$degraded = true; $degraded = true;
[$gw, $model] = $this->cloudFallbackGateway(); [$gw, $model] = $this->cloudFallbackGateway($engine);
} }
try { try {
@@ -1352,7 +1361,7 @@ PROMPT;
$this->writeGpuHealth($model, false); $this->writeGpuHealth($model, false);
$failedModel = $model; $failedModel = $model;
$degraded = true; $degraded = true;
[$gw, $model] = $this->cloudFallbackGateway(); [$gw, $model] = $this->cloudFallbackGateway($engine);
$raw = $gw->withDeployment($model)->chatText($messages, $options); $raw = $gw->withDeployment($model)->chatText($messages, $options);
} }
@@ -1365,14 +1374,14 @@ PROMPT;
]; ];
} }
/** Localized notice shown when the fine-tuned legal model was offline and gpt-4o answered. */ /** Localized notice shown when the fine-tuned legal model was offline and a general cloud model answered. */
private function degradedModelNotice(string $language): string private function degradedModelNotice(string $language): string
{ {
return match (dbnToolsNormalizeUiLanguage($language)) { return match (dbnToolsNormalizeUiLanguage($language)) {
'no' => 'Den spesialiserte, finjusterte norske juridiske modellen er midlertidig utilgjengelig, så dette svaret ble generert av den generelle modellen (gpt-4o). Det juridiske korpuset, kildene og bevisene er fullt tilgjengelige gjennomgå de siterte kildene som vanlig.', 'no' => 'Den spesialiserte, finjusterte norske juridiske modellen er midlertidig utilgjengelig, så dette svaret ble generert av en generell skymodell. Det juridiske korpuset, kildene og bevisene er fullt tilgjengelige gjennomgå de siterte kildene som vanlig.',
'uk' => 'Спеціалізована доточена норвезька юридична модель тимчасово недоступна, тому цю відповідь згенерувала загальна модель (gpt-4o). Юридичний корпус, джерела та докази повністю доступні — перегляньте цитовані джерела, як зазвичай.', 'uk' => 'Спеціалізована доточена норвезька юридична модель тимчасово недоступна, тому цю відповідь згенерувала загальна хмарна модель. Юридичний корпус, джерела та докази повністю доступні — перегляньте цитовані джерела, як зазвичай.',
'pl' => 'Wyspecjalizowany, dostrojony norweski model prawny jest tymczasowo niedostępny, więc tę odpowiedź wygenerował model ogólny (gpt-4o). Korpus prawny, źródła i dowody są w pełni dostępne — przejrzyj cytowane źródła jak zwykle.', 'pl' => 'Wyspecjalizowany, dostrojony norweski model prawny jest tymczasowo niedostępny, więc tę odpowiedź wygenerował ogólny model w chmurze. Korpus prawny, źródła i dowody są w pełni dostępne — przejrzyj cytowane źródła jak zwykle.',
default => 'The specialized fine-tuned Norwegian legal model is temporarily offline, so this answer was generated by the general model (gpt-4o). The legal corpus, sources, and evidence are fully live — review the cited sources as usual.', default => 'The specialized fine-tuned Norwegian legal model is temporarily offline, so this answer was generated by a general cloud model. The legal corpus, sources, and evidence are fully live — review the cited sources as usual.',
}; };
} }