fix(tools): route quick/pro tiers to Haiku/Sonnet on Bedrock
Tier engine strings (claude_haiku/claude_sonnet) were stripped back to azure_mini by per-method whitelists, so both tiers ran gpt-4o-mini and Pro charged 2x for the same model. Add a shared DbnBedrockModelRouter:: deploymentForEngine() helper and route the cloud path through it across summarize, ask, barnevernet, discrepancy, deep-research, and korrespond. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -173,7 +173,7 @@ try {
|
||||
$ftUid = dbnToolsFreeTierCheck('korrespond');
|
||||
$engine = ToolModels::engineForUser($ftUid, 'azure_mini');
|
||||
$inputEngine = (string)($input['engine'] ?? '');
|
||||
if (in_array($inputEngine, ['azure_mini', 'claude_sonnet'], true)) {
|
||||
if (in_array($inputEngine, ['azure_mini', 'claude_haiku', 'claude_sonnet'], true)) {
|
||||
$engine = $inputEngine;
|
||||
}
|
||||
$run = ['credits' => null, 'metadata' => []];
|
||||
|
||||
@@ -75,7 +75,7 @@ final class DbnBvjAnalyzerAgent
|
||||
string $additionalNotes = '',
|
||||
?callable $emit = null
|
||||
): array {
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true)
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'claude_haiku', 'claude_sonnet'], true)
|
||||
? $engine : 'azure_mini';
|
||||
$language = dbnToolsNormalizeUiLanguage($language);
|
||||
$controls = $this->normalizeControls($controls);
|
||||
@@ -941,11 +941,11 @@ PROMPT;
|
||||
];
|
||||
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 4500, 'timeout' => 240];
|
||||
|
||||
$cloudDeploy = DbnBedrockModelRouter::deploymentForEngine($engine, $this->azure instanceof DbnBedrockGateway);
|
||||
$deployLabel = match ($engine) {
|
||||
'gpu' => 'GPU (cuttlefish)',
|
||||
'dbn_legal_v3' => 'dbn-legal-agent-v3',
|
||||
'azure_full' => 'gpt-4o',
|
||||
default => $this->azure->chatDeployment(),
|
||||
default => $cloudDeploy,
|
||||
};
|
||||
|
||||
$raw = '';
|
||||
@@ -956,10 +956,8 @@ PROMPT;
|
||||
} elseif ($engine === 'gpu') {
|
||||
$response = dbnToolsCallGpuLlm($messages, $opts);
|
||||
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
||||
} elseif ($engine === 'azure_full') {
|
||||
$raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts);
|
||||
} else {
|
||||
$raw = $this->azure->chatText($messages, $opts);
|
||||
$raw = $this->azure->withDeployment($cloudDeploy)->chatText($messages, $opts);
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsAbort('Synthesis LLM request failed: ' . $e->getMessage(), 502, 'llm_error');
|
||||
|
||||
@@ -73,6 +73,22 @@ final class DbnBedrockModelRouter
|
||||
return $route['model'] ?? self::LITELLM_SONNET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps a quality-tier engine string to the LiteLLM deployment name passed to
|
||||
* withDeployment(). claude_haiku/claude_sonnet route to Bedrock Claude when the
|
||||
* active gateway is Bedrock; otherwise they degrade to the Azure GPT-4o family.
|
||||
*/
|
||||
public static function deploymentForEngine(string $engine, bool $isBedrock): string
|
||||
{
|
||||
switch ($engine) {
|
||||
case 'claude_sonnet': return $isBedrock ? self::LITELLM_SONNET : 'gpt-4o';
|
||||
case 'claude_haiku': return $isBedrock ? self::LITELLM_HAIKU : 'gpt-4o-mini';
|
||||
case 'azure_full': return 'gpt-4o';
|
||||
case 'azure_mini':
|
||||
default: return 'gpt-4o-mini';
|
||||
}
|
||||
}
|
||||
|
||||
public static function supportsThinking(string $modelName): bool
|
||||
{
|
||||
return in_array($modelName, self::THINKING_MODELS, true);
|
||||
|
||||
@@ -1167,11 +1167,11 @@ PROMPT;
|
||||
} elseif ($engine === 'azure_full') {
|
||||
$raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts);
|
||||
$deployLabel = 'gpt-4o';
|
||||
} elseif ($engine === 'azure_mini' && $this->azure instanceof DbnBedrockGateway) {
|
||||
// When Bedrock enabled, azure_mini → Haiku (fast, ~20-50s synthesis)
|
||||
$haiku = $this->azure->withDeployment(DbnBedrockModelRouter::LITELLM_HAIKU);
|
||||
$raw = $haiku->chatText($messages, array_merge($opts, ['timeout' => 90]));
|
||||
$deployLabel = 'Claude Haiku 4.5 (AWS Bedrock)';
|
||||
} elseif ($engine === 'claude_haiku' || ($engine === 'azure_mini' && $this->azure instanceof DbnBedrockGateway)) {
|
||||
// Quick tier (claude_haiku) and azure_mini-under-Bedrock → Haiku (fast, ~20-50s synthesis)
|
||||
$deploy = DbnBedrockModelRouter::deploymentForEngine('claude_haiku', $this->azure instanceof DbnBedrockGateway);
|
||||
$raw = $this->azure->withDeployment($deploy)->chatText($messages, array_merge($opts, ['timeout' => 90]));
|
||||
$deployLabel = $deploy;
|
||||
$thinkingTrace = null;
|
||||
} elseif ($engine === 'claude_sonnet' || ($this->azure instanceof DbnBedrockGateway)) {
|
||||
if (
|
||||
|
||||
@@ -49,7 +49,7 @@ final class DbnDiscrepancyAgent
|
||||
array $sliceSelection,
|
||||
?callable $emit = null
|
||||
): array {
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini';
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'claude_haiku', 'claude_sonnet'], true) ? $engine : 'azure_mini';
|
||||
$language = dbnToolsNormalizeUiLanguage($language);
|
||||
|
||||
$textA = mb_substr((string)($fileA['text'] ?? ''), 0, self::MAX_DOC_CHARS, 'UTF-8');
|
||||
@@ -754,11 +754,11 @@ PROMPT;
|
||||
): array {
|
||||
$locale = dbnToolsLanguageName($language);
|
||||
$sourceCount = count($numberedSources);
|
||||
$cloudDeploy = DbnBedrockModelRouter::deploymentForEngine($engine, $this->azure instanceof DbnBedrockGateway);
|
||||
$deployLabel = match ($engine) {
|
||||
'gpu' => 'GPU (cuttlefish)',
|
||||
'dbn_legal_v3' => 'dbn-legal-agent-v3',
|
||||
'azure_full' => 'gpt-4o',
|
||||
default => $this->azure->chatDeployment(),
|
||||
default => $cloudDeploy,
|
||||
};
|
||||
|
||||
if (empty($numberedSources)) {
|
||||
@@ -872,10 +872,8 @@ PROMPT;
|
||||
} elseif ($engine === 'gpu') {
|
||||
$response = dbnToolsCallGpuLlm($messages, $opts);
|
||||
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
||||
} elseif ($engine === 'azure_full') {
|
||||
$raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts);
|
||||
} else {
|
||||
$raw = $this->azure->chatText($messages, $opts);
|
||||
$raw = $this->azure->withDeployment($cloudDeploy)->chatText($messages, $opts);
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsAbort('Synthesis LLM request failed: ' . $e->getMessage(), 502, 'llm_error');
|
||||
|
||||
@@ -212,7 +212,7 @@ final class DbnLegalToolsService
|
||||
|
||||
public function ask(string $question, string $language = 'en', string $engine = 'azure_mini', ?string $persona = null): array
|
||||
{
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full'], true) ? $engine : 'azure_mini';
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'claude_haiku', 'claude_sonnet'], true) ? $engine : 'azure_mini';
|
||||
$client = dbnToolsRequireClient();
|
||||
$personaResolved = dbnToolsResolvePersona((int)$client['id'], $persona);
|
||||
$search = $this->search($question, $language, 7, 'disabled', null, 'both', $personaResolved['slug']);
|
||||
@@ -1231,7 +1231,7 @@ PROMPT;
|
||||
error_log('[dbn-persona] gateway init failed for model ' . $model . ': ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
return [$this->azure, ($engine === 'azure_full') ? 'gpt-4o' : 'gpt-4o-mini'];
|
||||
return [$this->azure, DbnBedrockModelRouter::deploymentForEngine($engine, $this->azure instanceof DbnBedrockGateway)];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2060,7 +2060,7 @@ PROMPT;
|
||||
string $depth = 'standard'
|
||||
): array {
|
||||
$text = $this->requirePasteText($text);
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini';
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'claude_haiku', 'claude_sonnet'], true) ? $engine : 'azure_mini';
|
||||
|
||||
$locale = dbnToolsLanguageName($language);
|
||||
|
||||
@@ -2114,7 +2114,7 @@ PROMPT;
|
||||
['role' => 'system', 'content' => $system],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
];
|
||||
$maxTok = ($engine === 'azure_full') ? 8000 : 4000;
|
||||
$maxTok = in_array($engine, ['azure_full', 'claude_sonnet'], true) ? 8000 : 4000;
|
||||
$chatOpts = ['json' => true, 'temperature' => 0.1, 'max_tokens' => $maxTok, 'timeout' => 120];
|
||||
|
||||
$deployLabel = $this->azure->chatDeployment();
|
||||
@@ -2122,12 +2122,10 @@ PROMPT;
|
||||
if ($engine === 'gpu') {
|
||||
$response = $this->callGpuLlm($messages, $chatOpts);
|
||||
$deployLabel = 'GPU (local)';
|
||||
} elseif ($engine === 'azure_full') {
|
||||
$response = $this->azure->withDeployment('gpt-4o')->chat($messages, $chatOpts);
|
||||
$deployLabel = 'gpt-4o';
|
||||
} else {
|
||||
$response = $this->azure->withDeployment('gpt-4o-mini')->chat($messages, $chatOpts);
|
||||
$deployLabel = 'gpt-4o-mini';
|
||||
$deploy = DbnBedrockModelRouter::deploymentForEngine($engine, $this->azure instanceof DbnBedrockGateway);
|
||||
$response = $this->azure->withDeployment($deploy)->chat($messages, $chatOpts);
|
||||
$deployLabel = $deploy;
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsAbort('LLM request failed: ' . $e->getMessage(), 502, 'llm_error');
|
||||
|
||||
Reference in New Issue
Block a user