c84ed2ed78
The dbn-legal-agent-v3 fine-tune (Track 1 / family) emits a labelled-prose
template — duplicate `answer:` prefixes, markdown-escaped underscores (`\_`),
and a trailing raw JSON blob — rather than the strict JSON the Azure/gpt-4o
path produces via response_format. decodeJsonObject() returned null on that
invalid JSON, so ask() dumped the entire raw blob into `answer`.
Fix at the parse layer (no upstream response_format change, to avoid fighting
the fine-tune's training):
- dbnToolsRepairJsonText(): strip fences, drop only invalid `\_`/`\*` escapes,
then balanced-brace scan collecting every top-level {...} (longest first) to
recover an appended JSON object. Shared by both gateways' decodeJsonObject(),
so all JSON tools benefit.
- dbnToolsParseLabeledFields(): parse labelled-prose into real fields when no
JSON decodes, tolerating escaped key names and collapsing duplicate prefixes.
- ask() null-fallback now builds clean structured fields from the parsed prose
instead of dumping raw; what_remains_uncertain becomes a proper list.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
266 lines
10 KiB
PHP
266 lines
10 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
require_once __DIR__ . '/bootstrap.php';
|
|
|
|
/**
|
|
* Bedrock Claude gateway — routes through LiteLLM on Colin (10.0.1.10:4000).
|
|
* AWS credentials live only in LiteLLM config; this class never touches them.
|
|
* nova-lite already works this way; Claude models follow the same pattern.
|
|
*
|
|
* LiteLLM model names to add to Colin's config:
|
|
* claude-haiku-bedrock → anthropic.claude-3-haiku-20240307-v1:0 (bedrock provider)
|
|
* claude-sonnet-bedrock → anthropic.claude-3-5-sonnet-20241022-v2:0 (bedrock provider)
|
|
*/
|
|
final class DbnBedrockGateway
|
|
{
|
|
private string $liteLlmUrl;
|
|
private string $liteLlmKey;
|
|
private string $chatModelName; // LiteLLM model name, e.g. 'claude-sonnet-bedrock'
|
|
private string $embeddingModelName;
|
|
|
|
public function __construct(?array $config = null)
|
|
{
|
|
$base = rtrim((string)dbnToolsEnv('LITELLM_BASE_URL', 'http://10.0.1.10:4000'), '/');
|
|
$this->liteLlmUrl = $base . '/v1/chat/completions';
|
|
$this->liteLlmKey = (string)(dbnToolsEnv('LITELLM_MASTER_KEY') ?: 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d');
|
|
$this->chatModelName = $config['chat_model_name'] ?? (string)dbnToolsEnv('DBN_BEDROCK_CHAT_MODEL', 'claude-sonnet-bedrock');
|
|
$this->embeddingModelName = $config['embedding_model_name'] ?? (string)dbnToolsEnv('DBN_BEDROCK_EMBEDDING_MODEL', 'amazon.titan-embed-text-v2:0');
|
|
}
|
|
|
|
// ── Interface parity with DbnAzureOpenAiGateway ───────────────────────────
|
|
|
|
public function missingChatConfig(): array
|
|
{
|
|
$missing = [];
|
|
if (trim($this->liteLlmUrl) === '') $missing[] = 'litellm_url';
|
|
if (trim($this->chatModelName) === '') $missing[] = 'chat_model_name';
|
|
return $missing;
|
|
}
|
|
|
|
public function missingEmbeddingConfig(): array
|
|
{
|
|
return trim($this->embeddingModelName) === '' ? ['embedding_model_name'] : [];
|
|
}
|
|
|
|
public function requireChat(): void
|
|
{
|
|
$missing = $this->missingChatConfig();
|
|
if ($missing) {
|
|
dbnToolsAbort(
|
|
'Bedrock gateway (LiteLLM) is missing configuration: ' . implode(', ', $missing) . '.',
|
|
503,
|
|
'bedrock_config_missing',
|
|
['missing' => $missing]
|
|
);
|
|
}
|
|
}
|
|
|
|
public function requireEmbedding(): void
|
|
{
|
|
$missing = $this->missingEmbeddingConfig();
|
|
if ($missing) {
|
|
dbnToolsAbort(
|
|
'Bedrock embedding gateway (LiteLLM) missing: ' . implode(', ', $missing) . '.',
|
|
503,
|
|
'bedrock_embedding_config_missing',
|
|
['missing' => $missing]
|
|
);
|
|
}
|
|
}
|
|
|
|
public function withDeployment(string $modelName): static
|
|
{
|
|
$clone = clone $this;
|
|
$clone->chatModelName = $modelName;
|
|
return $clone;
|
|
}
|
|
|
|
public function chatDeployment(): string
|
|
{
|
|
return $this->chatModelName;
|
|
}
|
|
|
|
public function embeddingDeployment(): string
|
|
{
|
|
return $this->embeddingModelName;
|
|
}
|
|
|
|
public function chat(array $messages, array $options = []): array
|
|
{
|
|
$this->requireChat();
|
|
|
|
$payload = [
|
|
'model' => $this->chatModelName,
|
|
'messages' => $messages,
|
|
'temperature' => (float)($options['temperature'] ?? 0.2),
|
|
'max_tokens' => $options['max_tokens'] ?? 1200,
|
|
];
|
|
// response_format is intentionally omitted for Claude via Bedrock.
|
|
// LiteLLM converts json_object to a tool-use constraint, routing output
|
|
// into tool_calls instead of content. Claude follows JSON instructions
|
|
// in the system prompt without needing response_format.
|
|
|
|
return $this->postJson($this->liteLlmUrl, $payload, (int)($options['timeout'] ?? 90));
|
|
}
|
|
|
|
public function chatText(array $messages, array $options = []): string
|
|
{
|
|
$response = $this->chat($messages, $options);
|
|
$content = $response['choices'][0]['message']['content'] ?? '';
|
|
if (!is_string($content) || trim($content) === '') {
|
|
throw new RuntimeException('Bedrock (LiteLLM) returned an empty chat response.');
|
|
}
|
|
return trim($content);
|
|
}
|
|
|
|
public function embeddings(array|string $input, array $options = []): array
|
|
{
|
|
$this->requireEmbedding();
|
|
$url = rtrim((string)dbnToolsEnv('LITELLM_BASE_URL', 'http://10.0.1.10:4000'), '/') . '/v1/embeddings';
|
|
return $this->postJson($url, [
|
|
'model' => $this->embeddingModelName,
|
|
'input' => $input,
|
|
], (int)($options['timeout'] ?? 30));
|
|
}
|
|
|
|
public function ping(int $timeout = 8): bool
|
|
{
|
|
try {
|
|
$text = $this->chatText([
|
|
['role' => 'system', 'content' => 'Return one word only: ok'],
|
|
['role' => 'user', 'content' => 'health'],
|
|
], ['temperature' => 0, 'max_tokens' => 5, 'timeout' => $timeout]);
|
|
return trim($text) !== '';
|
|
} catch (Throwable $e) {
|
|
error_log('DBN Bedrock (LiteLLM) health check failed: ' . $e->getMessage());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public function decodeJsonObject(string $content): ?array
|
|
{
|
|
return dbnToolsRepairJsonText($content);
|
|
}
|
|
|
|
// ── Bedrock-specific ──────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Extended thinking via LiteLLM — passes thinking params through to Bedrock.
|
|
* LiteLLM forwards additionalModelRequestFields to the Bedrock Converse API.
|
|
* Returns ['text' => string, 'thinking' => string|null, 'usage' => array].
|
|
*/
|
|
public function chatWithThinking(array $messages, array $options = []): array
|
|
{
|
|
$this->requireChat();
|
|
|
|
$budget = (int)($options['thinking_budget'] ?? 8000);
|
|
$maxTokens = (int)($options['max_tokens'] ?? max($budget + 4000, 16000));
|
|
if ($maxTokens <= $budget) {
|
|
$maxTokens = $budget + 4000;
|
|
}
|
|
|
|
$payload = [
|
|
'model' => $this->chatModelName,
|
|
'messages' => $messages,
|
|
'temperature' => 1.0, // required for extended thinking
|
|
'max_tokens' => $maxTokens,
|
|
'thinking' => [ // LiteLLM passes this to Bedrock as additionalModelRequestFields
|
|
'type' => 'enabled',
|
|
'budget_tokens'=> $budget,
|
|
],
|
|
];
|
|
|
|
$response = $this->postJson($this->liteLlmUrl, $payload, (int)($options['timeout'] ?? 300));
|
|
|
|
// LiteLLM may surface thinking in 'thinking' field or as a special content block
|
|
$content = $response['choices'][0]['message']['content'] ?? '';
|
|
$thinking = $response['choices'][0]['message']['thinking'] ?? null;
|
|
|
|
// If content is an array of blocks (pass-through of Bedrock format), extract text+thinking
|
|
if (is_array($content)) {
|
|
$text = '';
|
|
$thinking = null;
|
|
foreach ($content as $block) {
|
|
if (($block['type'] ?? '') === 'thinking') {
|
|
$thinking = $block['thinking'] ?? null;
|
|
} elseif (($block['type'] ?? '') === 'text') {
|
|
$text .= $block['text'] ?? '';
|
|
}
|
|
}
|
|
$content = trim($text);
|
|
}
|
|
|
|
return [
|
|
'text' => trim((string)$content),
|
|
'thinking'=> $thinking,
|
|
'usage' => $response['usage'] ?? [],
|
|
];
|
|
}
|
|
|
|
// ── Private: HTTP ─────────────────────────────────────────────────────────
|
|
|
|
private function postJson(string $url, array $payload, int $timeout): array
|
|
{
|
|
$body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
|
if ($body === false) {
|
|
throw new RuntimeException('Unable to encode Bedrock (LiteLLM) request.');
|
|
}
|
|
|
|
$headers = [
|
|
'Content-Type: application/json',
|
|
'Authorization: Bearer ' . $this->liteLlmKey,
|
|
];
|
|
|
|
if (function_exists('curl_init')) {
|
|
$ch = curl_init($url);
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_POST => true,
|
|
CURLOPT_POSTFIELDS => $body,
|
|
CURLOPT_HTTPHEADER => $headers,
|
|
CURLOPT_TIMEOUT => $timeout,
|
|
]);
|
|
$response = curl_exec($ch);
|
|
$code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
|
$error = curl_error($ch);
|
|
curl_close($ch);
|
|
|
|
if ($response === false) {
|
|
throw new RuntimeException('Bedrock (LiteLLM) cURL failed: ' . $error);
|
|
}
|
|
return $this->decodeResponse($response, $code);
|
|
}
|
|
|
|
$context = stream_context_create(['http' => [
|
|
'method' => 'POST',
|
|
'header' => implode("\r\n", $headers),
|
|
'content' => $body,
|
|
'timeout' => $timeout,
|
|
'ignore_errors' => true,
|
|
]]);
|
|
$response = @file_get_contents($url, false, $context);
|
|
$code = 0;
|
|
if (isset($http_response_header[0]) && preg_match('/\s(\d{3})\s/', $http_response_header[0], $m)) {
|
|
$code = (int)$m[1];
|
|
}
|
|
if ($response === false) {
|
|
throw new RuntimeException('Bedrock (LiteLLM) request failed.');
|
|
}
|
|
return $this->decodeResponse($response, $code);
|
|
}
|
|
|
|
private function decodeResponse(string $response, int $code): array
|
|
{
|
|
$decoded = json_decode($response, true);
|
|
if (!is_array($decoded)) {
|
|
throw new RuntimeException('Bedrock (LiteLLM) returned non-JSON (HTTP ' . $code . ').');
|
|
}
|
|
if ($code < 200 || $code >= 300) {
|
|
$message = $decoded['error']['message'] ?? $decoded['message'] ?? ('HTTP ' . $code);
|
|
throw new RuntimeException('Bedrock (LiteLLM) request failed: ' . $message);
|
|
}
|
|
return $decoded;
|
|
}
|
|
}
|