Fix sub-question expansion timeout + engine routing for Bedrock advocate
- expandQueries(): truncate seedDescription to 2000 chars (full uploads were 48K+ tokens, exceeding the 35s timeout with Sonnet); switch to Haiku gateway when Bedrock is active (fast + adequate for sub-Q generation); timeout → 60s - interpretSeed(): same Haiku + 60s fix for English non-advocate path - synthesise(): add explicit azure_mini + Bedrock → Haiku branch so the fast engine actually uses Haiku (~20-40s) instead of falling through to Sonnet (~180s) - advocate.php: relabel azure_mini as "Claude Haiku 4.5 (fast)" with accurate timing; relabel claude_sonnet as "(thorough)" to reflect the distinction Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -501,8 +501,11 @@ PROMPT;
|
||||
]);
|
||||
$raw = (string)($resp['choices'][0]['message']['content'] ?? '');
|
||||
} else {
|
||||
$raw = $this->azure->chatText([$sysMsg, $userMsg],
|
||||
['json' => true, 'temperature' => 0.1, 'max_tokens' => 500, 'timeout' => 30]);
|
||||
$interpGateway = ($this->azure instanceof DbnBedrockGateway)
|
||||
? $this->azure->withDeployment(DbnBedrockModelRouter::LITELLM_HAIKU)
|
||||
: $this->azure;
|
||||
$raw = $interpGateway->chatText([$sysMsg, $userMsg],
|
||||
['json' => true, 'temperature' => 0.1, 'max_tokens' => 500, 'timeout' => 60]);
|
||||
}
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
if (is_array($json) && !empty($json['brief'])) {
|
||||
@@ -532,6 +535,10 @@ PROMPT;
|
||||
? "\nKey retrieval anchors (incorporate these terms into your sub-questions where relevant):\n" . implode(', ', $keySignals) . "\n"
|
||||
: '';
|
||||
|
||||
// Truncate seed to 2000 chars — $brief already captures the key context;
|
||||
// the full upload text (up to 192K chars) would push past the 60s timeout.
|
||||
$seedExcerpt = mb_strimwidth($seedDescription, 0, 2000, '…', 'UTF-8');
|
||||
|
||||
if ($advocateRole !== '') {
|
||||
$prompt = <<<PROMPT
|
||||
You are a Norwegian family-law research assistant building a case for: {$advocateRole}.
|
||||
@@ -546,7 +553,7 @@ Research brief:
|
||||
{$brief}
|
||||
{$anchorsLine}
|
||||
Raw input:
|
||||
{$seedDescription}
|
||||
{$seedExcerpt}
|
||||
|
||||
Return JSON only in {$locale}:
|
||||
{
|
||||
@@ -571,7 +578,7 @@ Research brief:
|
||||
{$brief}
|
||||
{$anchorsLine}
|
||||
Raw input:
|
||||
{$seedDescription}
|
||||
{$seedExcerpt}
|
||||
|
||||
Return JSON only:
|
||||
{
|
||||
@@ -600,8 +607,11 @@ PROMPT;
|
||||
]);
|
||||
$raw = (string)($resp['choices'][0]['message']['content'] ?? '');
|
||||
} else {
|
||||
$raw = $this->azure->chatText([$sysMsg, $userMsg],
|
||||
['json' => true, 'temperature' => 0.2, 'max_tokens' => 700, 'timeout' => 35]);
|
||||
$expGateway = ($this->azure instanceof DbnBedrockGateway)
|
||||
? $this->azure->withDeployment(DbnBedrockModelRouter::LITELLM_HAIKU)
|
||||
: $this->azure;
|
||||
$raw = $expGateway->chatText([$sysMsg, $userMsg],
|
||||
['json' => true, 'temperature' => 0.2, 'max_tokens' => 700, 'timeout' => 60]);
|
||||
}
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
$items = is_array($json['sub_questions'] ?? null) ? $json['sub_questions'] : [];
|
||||
@@ -1142,6 +1152,12 @@ PROMPT;
|
||||
} elseif ($engine === 'azure_full') {
|
||||
$raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts);
|
||||
$deployLabel = 'gpt-4o';
|
||||
} elseif ($engine === 'azure_mini' && $this->azure instanceof DbnBedrockGateway) {
|
||||
// When Bedrock enabled, azure_mini → Haiku (fast, ~20-40s synthesis)
|
||||
$haiku = $this->azure->withDeployment(DbnBedrockModelRouter::LITELLM_HAIKU);
|
||||
$raw = $haiku->chatText($messages, array_merge($opts, ['max_tokens' => 2500, 'timeout' => 90]));
|
||||
$deployLabel = 'Claude Haiku 4.5 (AWS Bedrock)';
|
||||
$thinkingTrace = null;
|
||||
} elseif ($engine === 'claude_sonnet' || ($this->azure instanceof DbnBedrockGateway)) {
|
||||
if (
|
||||
$this->azure instanceof DbnBedrockGateway
|
||||
|
||||
Reference in New Issue
Block a user