fix(tools): tier-aware GPU cloud fallback for ask synthesis
When a persona-pinned GPU fine-tune is offline, degrade to the requested quality tier's Bedrock model (Quick->Haiku, Pro->Sonnet) instead of a hardcoded gpt-4o, so Pro genuinely differs from Quick while the pod is off. Legacy/azure engines keep gpt-4o as the floor. Generalize the degraded notice/trace wording (no longer asserts gpt-4o). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+19
-10
@@ -316,7 +316,7 @@ PROMPT;
|
||||
}
|
||||
array_unshift($uncertain, $this->degradedModelNotice($language));
|
||||
$json['what_remains_uncertain'] = $uncertain;
|
||||
$trace[] = $this->trace('Model routing', 'Fine-tuned legal model unavailable; answered with gpt-4o fallback (corpus + retrieval unaffected).', 'warning');
|
||||
$trace[] = $this->trace('Model routing', 'Fine-tuned legal model unavailable; answered with cloud fallback (corpus + retrieval unaffected).', 'warning');
|
||||
}
|
||||
|
||||
$trace[] = $this->trace('Synthesis', 'Azure OpenAI generated an answer using only the retrieved source excerpts.', 'complete');
|
||||
@@ -1253,9 +1253,18 @@ PROMPT;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** The always-up cloud fallback used when a GPU-backed fine-tune is offline. */
|
||||
private function cloudFallbackGateway(): array
|
||||
/**
|
||||
* The always-up cloud fallback used when a GPU-backed fine-tune is offline. Tier-aware:
|
||||
* a requested quality tier (Quick→Haiku, Pro→Sonnet) degrades to that tier's Bedrock model
|
||||
* so Pro still buys a stronger model than Quick while the pod is off. Legacy/azure engines
|
||||
* keep gpt-4o as the capable general floor.
|
||||
*/
|
||||
private function cloudFallbackGateway(string $engine): array
|
||||
{
|
||||
if (($engine === 'claude_haiku' || $engine === 'claude_sonnet')
|
||||
&& $this->azure instanceof DbnBedrockGateway) {
|
||||
return [$this->azure, DbnBedrockModelRouter::deploymentForEngine($engine, true)];
|
||||
}
|
||||
return [$this->azure, 'gpt-4o'];
|
||||
}
|
||||
|
||||
@@ -1338,7 +1347,7 @@ PROMPT;
|
||||
error_log('[dbn-persona] GPU model ' . $model . ' marked unavailable; using cloud fallback.');
|
||||
$failedModel = $model;
|
||||
$degraded = true;
|
||||
[$gw, $model] = $this->cloudFallbackGateway();
|
||||
[$gw, $model] = $this->cloudFallbackGateway($engine);
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -1352,7 +1361,7 @@ PROMPT;
|
||||
$this->writeGpuHealth($model, false);
|
||||
$failedModel = $model;
|
||||
$degraded = true;
|
||||
[$gw, $model] = $this->cloudFallbackGateway();
|
||||
[$gw, $model] = $this->cloudFallbackGateway($engine);
|
||||
$raw = $gw->withDeployment($model)->chatText($messages, $options);
|
||||
}
|
||||
|
||||
@@ -1365,14 +1374,14 @@ PROMPT;
|
||||
];
|
||||
}
|
||||
|
||||
/** Localized notice shown when the fine-tuned legal model was offline and gpt-4o answered. */
|
||||
/** Localized notice shown when the fine-tuned legal model was offline and a general cloud model answered. */
|
||||
private function degradedModelNotice(string $language): string
|
||||
{
|
||||
return match (dbnToolsNormalizeUiLanguage($language)) {
|
||||
'no' => 'Den spesialiserte, finjusterte norske juridiske modellen er midlertidig utilgjengelig, så dette svaret ble generert av den generelle modellen (gpt-4o). Det juridiske korpuset, kildene og bevisene er fullt tilgjengelige – gjennomgå de siterte kildene som vanlig.',
|
||||
'uk' => 'Спеціалізована доточена норвезька юридична модель тимчасово недоступна, тому цю відповідь згенерувала загальна модель (gpt-4o). Юридичний корпус, джерела та докази повністю доступні — перегляньте цитовані джерела, як зазвичай.',
|
||||
'pl' => 'Wyspecjalizowany, dostrojony norweski model prawny jest tymczasowo niedostępny, więc tę odpowiedź wygenerował model ogólny (gpt-4o). Korpus prawny, źródła i dowody są w pełni dostępne — przejrzyj cytowane źródła jak zwykle.',
|
||||
default => 'The specialized fine-tuned Norwegian legal model is temporarily offline, so this answer was generated by the general model (gpt-4o). The legal corpus, sources, and evidence are fully live — review the cited sources as usual.',
|
||||
'no' => 'Den spesialiserte, finjusterte norske juridiske modellen er midlertidig utilgjengelig, så dette svaret ble generert av en generell skymodell. Det juridiske korpuset, kildene og bevisene er fullt tilgjengelige – gjennomgå de siterte kildene som vanlig.',
|
||||
'uk' => 'Спеціалізована доточена норвезька юридична модель тимчасово недоступна, тому цю відповідь згенерувала загальна хмарна модель. Юридичний корпус, джерела та докази повністю доступні — перегляньте цитовані джерела, як зазвичай.',
|
||||
'pl' => 'Wyspecjalizowany, dostrojony norweski model prawny jest tymczasowo niedostępny, więc tę odpowiedź wygenerował ogólny model w chmurze. Korpus prawny, źródła i dowody są w pełni dostępne — przejrzyj cytowane źródła jak zwykle.',
|
||||
default => 'The specialized fine-tuned Norwegian legal model is temporarily offline, so this answer was generated by a general cloud model. The legal corpus, sources, and evidence are fully live — review the cited sources as usual.',
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user