From a4b5b6e3f2a3b54c26e356cf40dbfdb90b74e339 Mon Sep 17 00:00:00 2001 From: davegilligan Date: Mon, 15 Jun 2026 17:35:22 +0200 Subject: [PATCH] fix(tools): route quick/pro tiers to Haiku/Sonnet on Bedrock Tier engine strings (claude_haiku/claude_sonnet) were stripped back to azure_mini by per-method whitelists, so both tiers ran gpt-4o-mini and Pro charged 2x for the same model. Add a shared DbnBedrockModelRouter:: deploymentForEngine() helper and route the cloud path through it across summarize, ask, barnevernet, discrepancy, deep-research, and korrespond. Co-Authored-By: Claude Opus 4.7 --- api/korrespond.php | 2 +- includes/BvjAnalyzerAgent.php | 10 ++++------ includes/DbnBedrockModelRouter.php | 16 ++++++++++++++++ includes/DeepResearchAgent.php | 10 +++++----- includes/DiscrepancyAgent.php | 10 ++++------ includes/LegalTools.php | 16 +++++++--------- 6 files changed, 37 insertions(+), 27 deletions(-) diff --git a/api/korrespond.php b/api/korrespond.php index 7643422..8cfc01c 100644 --- a/api/korrespond.php +++ b/api/korrespond.php @@ -173,7 +173,7 @@ try { $ftUid = dbnToolsFreeTierCheck('korrespond'); $engine = ToolModels::engineForUser($ftUid, 'azure_mini'); $inputEngine = (string)($input['engine'] ?? ''); - if (in_array($inputEngine, ['azure_mini', 'claude_sonnet'], true)) { + if (in_array($inputEngine, ['azure_mini', 'claude_haiku', 'claude_sonnet'], true)) { $engine = $inputEngine; } $run = ['credits' => null, 'metadata' => []]; diff --git a/includes/BvjAnalyzerAgent.php b/includes/BvjAnalyzerAgent.php index 65d1546..fdd18b2 100644 --- a/includes/BvjAnalyzerAgent.php +++ b/includes/BvjAnalyzerAgent.php @@ -75,7 +75,7 @@ final class DbnBvjAnalyzerAgent string $additionalNotes = '', ?callable $emit = null ): array { - $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) + $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'claude_haiku', 'claude_sonnet'], true) ? $engine : 'azure_mini'; $language = dbnToolsNormalizeUiLanguage($language); $controls = $this->normalizeControls($controls); @@ -941,11 +941,11 @@ PROMPT; ]; $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 4500, 'timeout' => 240]; + $cloudDeploy = DbnBedrockModelRouter::deploymentForEngine($engine, $this->azure instanceof DbnBedrockGateway); $deployLabel = match ($engine) { 'gpu' => 'GPU (cuttlefish)', 'dbn_legal_v3' => 'dbn-legal-agent-v3', - 'azure_full' => 'gpt-4o', - default => $this->azure->chatDeployment(), + default => $cloudDeploy, }; $raw = ''; @@ -956,10 +956,8 @@ PROMPT; } elseif ($engine === 'gpu') { $response = dbnToolsCallGpuLlm($messages, $opts); $raw = (string)($response['choices'][0]['message']['content'] ?? ''); - } elseif ($engine === 'azure_full') { - $raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts); } else { - $raw = $this->azure->chatText($messages, $opts); + $raw = $this->azure->withDeployment($cloudDeploy)->chatText($messages, $opts); } } catch (Throwable $e) { dbnToolsAbort('Synthesis LLM request failed: ' . $e->getMessage(), 502, 'llm_error'); diff --git a/includes/DbnBedrockModelRouter.php b/includes/DbnBedrockModelRouter.php index 24d7558..4f37132 100644 --- a/includes/DbnBedrockModelRouter.php +++ b/includes/DbnBedrockModelRouter.php @@ -73,6 +73,22 @@ final class DbnBedrockModelRouter return $route['model'] ?? self::LITELLM_SONNET; } + /** + * Maps a quality-tier engine string to the LiteLLM deployment name passed to + * withDeployment(). claude_haiku/claude_sonnet route to Bedrock Claude when the + * active gateway is Bedrock; otherwise they degrade to the Azure GPT-4o family. + */ + public static function deploymentForEngine(string $engine, bool $isBedrock): string + { + switch ($engine) { + case 'claude_sonnet': return $isBedrock ? self::LITELLM_SONNET : 'gpt-4o'; + case 'claude_haiku': return $isBedrock ? self::LITELLM_HAIKU : 'gpt-4o-mini'; + case 'azure_full': return 'gpt-4o'; + case 'azure_mini': + default: return 'gpt-4o-mini'; + } + } + public static function supportsThinking(string $modelName): bool { return in_array($modelName, self::THINKING_MODELS, true); diff --git a/includes/DeepResearchAgent.php b/includes/DeepResearchAgent.php index b16ac37..40b12eb 100644 --- a/includes/DeepResearchAgent.php +++ b/includes/DeepResearchAgent.php @@ -1167,11 +1167,11 @@ PROMPT; } elseif ($engine === 'azure_full') { $raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts); $deployLabel = 'gpt-4o'; - } elseif ($engine === 'azure_mini' && $this->azure instanceof DbnBedrockGateway) { - // When Bedrock enabled, azure_mini → Haiku (fast, ~20-50s synthesis) - $haiku = $this->azure->withDeployment(DbnBedrockModelRouter::LITELLM_HAIKU); - $raw = $haiku->chatText($messages, array_merge($opts, ['timeout' => 90])); - $deployLabel = 'Claude Haiku 4.5 (AWS Bedrock)'; + } elseif ($engine === 'claude_haiku' || ($engine === 'azure_mini' && $this->azure instanceof DbnBedrockGateway)) { + // Quick tier (claude_haiku) and azure_mini-under-Bedrock → Haiku (fast, ~20-50s synthesis) + $deploy = DbnBedrockModelRouter::deploymentForEngine('claude_haiku', $this->azure instanceof DbnBedrockGateway); + $raw = $this->azure->withDeployment($deploy)->chatText($messages, array_merge($opts, ['timeout' => 90])); + $deployLabel = $deploy; $thinkingTrace = null; } elseif ($engine === 'claude_sonnet' || ($this->azure instanceof DbnBedrockGateway)) { if ( diff --git a/includes/DiscrepancyAgent.php b/includes/DiscrepancyAgent.php index d6aa3aa..e9e8924 100644 --- a/includes/DiscrepancyAgent.php +++ b/includes/DiscrepancyAgent.php @@ -49,7 +49,7 @@ final class DbnDiscrepancyAgent array $sliceSelection, ?callable $emit = null ): array { - $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini'; + $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'claude_haiku', 'claude_sonnet'], true) ? $engine : 'azure_mini'; $language = dbnToolsNormalizeUiLanguage($language); $textA = mb_substr((string)($fileA['text'] ?? ''), 0, self::MAX_DOC_CHARS, 'UTF-8'); @@ -754,11 +754,11 @@ PROMPT; ): array { $locale = dbnToolsLanguageName($language); $sourceCount = count($numberedSources); + $cloudDeploy = DbnBedrockModelRouter::deploymentForEngine($engine, $this->azure instanceof DbnBedrockGateway); $deployLabel = match ($engine) { 'gpu' => 'GPU (cuttlefish)', 'dbn_legal_v3' => 'dbn-legal-agent-v3', - 'azure_full' => 'gpt-4o', - default => $this->azure->chatDeployment(), + default => $cloudDeploy, }; if (empty($numberedSources)) { @@ -872,10 +872,8 @@ PROMPT; } elseif ($engine === 'gpu') { $response = dbnToolsCallGpuLlm($messages, $opts); $raw = (string)($response['choices'][0]['message']['content'] ?? ''); - } elseif ($engine === 'azure_full') { - $raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts); } else { - $raw = $this->azure->chatText($messages, $opts); + $raw = $this->azure->withDeployment($cloudDeploy)->chatText($messages, $opts); } } catch (Throwable $e) { dbnToolsAbort('Synthesis LLM request failed: ' . $e->getMessage(), 502, 'llm_error'); diff --git a/includes/LegalTools.php b/includes/LegalTools.php index 2489fdb..d404d02 100644 --- a/includes/LegalTools.php +++ b/includes/LegalTools.php @@ -212,7 +212,7 @@ final class DbnLegalToolsService public function ask(string $question, string $language = 'en', string $engine = 'azure_mini', ?string $persona = null): array { - $engine = in_array($engine, ['azure_mini', 'azure_full'], true) ? $engine : 'azure_mini'; + $engine = in_array($engine, ['azure_mini', 'azure_full', 'claude_haiku', 'claude_sonnet'], true) ? $engine : 'azure_mini'; $client = dbnToolsRequireClient(); $personaResolved = dbnToolsResolvePersona((int)$client['id'], $persona); $search = $this->search($question, $language, 7, 'disabled', null, 'both', $personaResolved['slug']); @@ -1231,7 +1231,7 @@ PROMPT; error_log('[dbn-persona] gateway init failed for model ' . $model . ': ' . $e->getMessage()); } } - return [$this->azure, ($engine === 'azure_full') ? 'gpt-4o' : 'gpt-4o-mini']; + return [$this->azure, DbnBedrockModelRouter::deploymentForEngine($engine, $this->azure instanceof DbnBedrockGateway)]; } /** @@ -2060,7 +2060,7 @@ PROMPT; string $depth = 'standard' ): array { $text = $this->requirePasteText($text); - $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini'; + $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'claude_haiku', 'claude_sonnet'], true) ? $engine : 'azure_mini'; $locale = dbnToolsLanguageName($language); @@ -2114,7 +2114,7 @@ PROMPT; ['role' => 'system', 'content' => $system], ['role' => 'user', 'content' => $prompt], ]; - $maxTok = ($engine === 'azure_full') ? 8000 : 4000; + $maxTok = in_array($engine, ['azure_full', 'claude_sonnet'], true) ? 8000 : 4000; $chatOpts = ['json' => true, 'temperature' => 0.1, 'max_tokens' => $maxTok, 'timeout' => 120]; $deployLabel = $this->azure->chatDeployment(); @@ -2122,12 +2122,10 @@ PROMPT; if ($engine === 'gpu') { $response = $this->callGpuLlm($messages, $chatOpts); $deployLabel = 'GPU (local)'; - } elseif ($engine === 'azure_full') { - $response = $this->azure->withDeployment('gpt-4o')->chat($messages, $chatOpts); - $deployLabel = 'gpt-4o'; } else { - $response = $this->azure->withDeployment('gpt-4o-mini')->chat($messages, $chatOpts); - $deployLabel = 'gpt-4o-mini'; + $deploy = DbnBedrockModelRouter::deploymentForEngine($engine, $this->azure instanceof DbnBedrockGateway); + $response = $this->azure->withDeployment($deploy)->chat($messages, $chatOpts); + $deployLabel = $deploy; } } catch (Throwable $e) { dbnToolsAbort('LLM request failed: ' . $e->getMessage(), 502, 'llm_error');