fix(tools): parse-harden Do Better Legal ask against leaky fine-tune output
The dbn-legal-agent-v3 fine-tune (Track 1 / family) emits a labelled-prose
template — duplicate `answer:` prefixes, markdown-escaped underscores (`\_`),
and a trailing raw JSON blob — rather than the strict JSON the Azure/gpt-4o
path produces via response_format. decodeJsonObject() returned null on that
invalid JSON, so ask() dumped the entire raw blob into `answer`.
Fix at the parse layer (no upstream response_format change, to avoid fighting
the fine-tune's training):
- dbnToolsRepairJsonText(): strip fences, drop only invalid `\_`/`\*` escapes,
then balanced-brace scan collecting every top-level {...} (longest first) to
recover an appended JSON object. Shared by both gateways' decodeJsonObject(),
so all JSON tools benefit.
- dbnToolsParseLabeledFields(): parse labelled-prose into real fields when no
JSON decodes, tolerating escaped key names and collapsing duplicate prefixes.
- ask() null-fallback now builds clean structured fields from the parsed prose
instead of dumping raw; what_remains_uncertain becomes a proper list.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -148,26 +148,7 @@ final class DbnAzureOpenAiGateway
|
|||||||
|
|
||||||
public function decodeJsonObject(string $content): ?array
|
public function decodeJsonObject(string $content): ?array
|
||||||
{
|
{
|
||||||
$content = trim($content);
|
return dbnToolsRepairJsonText($content);
|
||||||
$content = (string)preg_replace('/^```(?:json)?\s*\n?/i', '', $content);
|
|
||||||
$content = (string)preg_replace('/\n?```\s*$/', '', $content);
|
|
||||||
$content = trim($content);
|
|
||||||
|
|
||||||
$decoded = json_decode($content, true);
|
|
||||||
if (is_array($decoded)) {
|
|
||||||
return $decoded;
|
|
||||||
}
|
|
||||||
|
|
||||||
$start = strpos($content, '{');
|
|
||||||
$end = strrpos($content, '}');
|
|
||||||
if ($start !== false && $end !== false && $end > $start) {
|
|
||||||
$candidate = substr($content, $start, $end - $start + 1);
|
|
||||||
$decoded = json_decode($candidate, true);
|
|
||||||
if (is_array($decoded)) {
|
|
||||||
return $decoded;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function postJson(string $url, array $payload, int $timeout): array
|
private function postJson(string $url, array $payload, int $timeout): array
|
||||||
|
|||||||
@@ -140,26 +140,7 @@ final class DbnBedrockGateway
|
|||||||
|
|
||||||
public function decodeJsonObject(string $content): ?array
|
public function decodeJsonObject(string $content): ?array
|
||||||
{
|
{
|
||||||
$content = trim($content);
|
return dbnToolsRepairJsonText($content);
|
||||||
$content = (string)preg_replace('/^```(?:json)?\s*\n?/i', '', $content);
|
|
||||||
$content = (string)preg_replace('/\n?```\s*$/', '', $content);
|
|
||||||
$content = trim($content);
|
|
||||||
|
|
||||||
$decoded = json_decode($content, true);
|
|
||||||
if (is_array($decoded)) {
|
|
||||||
return $decoded;
|
|
||||||
}
|
|
||||||
|
|
||||||
$start = strpos($content, '{');
|
|
||||||
$end = strrpos($content, '}');
|
|
||||||
if ($start !== false && $end !== false && $end > $start) {
|
|
||||||
$candidate = substr($content, $start, $end - $start + 1);
|
|
||||||
$decoded = json_decode($candidate, true);
|
|
||||||
if (is_array($decoded)) {
|
|
||||||
return $decoded;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Bedrock-specific ──────────────────────────────────────────────────────
|
// ── Bedrock-specific ──────────────────────────────────────────────────────
|
||||||
|
|||||||
+23
-4
@@ -268,12 +268,31 @@ PROMPT;
|
|||||||
|
|
||||||
$json = $gateway->decodeJsonObject($raw);
|
$json = $gateway->decodeJsonObject($raw);
|
||||||
if (!$json) {
|
if (!$json) {
|
||||||
|
// Some fine-tuned models emit a labelled-prose template instead of JSON.
|
||||||
|
// Parse those labels into the real fields rather than dumping the raw blob.
|
||||||
|
$fields = dbnToolsParseLabeledFields($raw, [
|
||||||
|
'answer', 'what_we_found', 'evidence_trail', 'what_remains_uncertain', 'next_practical_step',
|
||||||
|
]);
|
||||||
|
$uncertain = trim((string)($fields['what_remains_uncertain'] ?? ''));
|
||||||
|
$uncertainList = $uncertain !== ''
|
||||||
|
? array_values(array_filter(array_map(
|
||||||
|
static fn(string $l): string => trim(ltrim($l, "-*• \t")),
|
||||||
|
preg_split('/\r?\n/', $uncertain) ?: []
|
||||||
|
), static fn(string $l): bool => $l !== ''))
|
||||||
|
: ['The response format could not be validated as structured JSON.'];
|
||||||
|
$cleanAnswer = trim((string)($fields['answer'] ?? ''));
|
||||||
|
if ($cleanAnswer === '') {
|
||||||
|
// No usable label — strip the trailing appended JSON blob from raw.
|
||||||
|
$cleanAnswer = trim((string)preg_replace('/\s*\{[\s\S]*$/', '', (string)preg_replace('/\\\\([_*])/', '$1', $raw)));
|
||||||
|
}
|
||||||
$json = [
|
$json = [
|
||||||
'answer' => $raw,
|
'answer' => $cleanAnswer !== '' ? $cleanAnswer : $raw,
|
||||||
'what_we_found' => 'Azure returned a plain-text answer based on the retrieved excerpts.',
|
'what_we_found' => trim((string)($fields['what_we_found'] ?? ''))
|
||||||
|
?: 'The model returned a plain-text answer based on the retrieved excerpts.',
|
||||||
'evidence_trail' => [],
|
'evidence_trail' => [],
|
||||||
'what_remains_uncertain' => ['The response format could not be validated as structured JSON.'],
|
'what_remains_uncertain' => $uncertainList,
|
||||||
'next_practical_step' => 'Review the source excerpts manually before relying on the answer.',
|
'next_practical_step' => trim((string)($fields['next_practical_step'] ?? ''))
|
||||||
|
?: 'Review the source excerpts manually before relying on the answer.',
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1461,6 +1461,117 @@ function dbnToolsExtractCleanAnswer(string $text): string
|
|||||||
return trim($text);
|
return trim($text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Robustly extract a JSON object from a model reply, tolerating the artifacts the
|
||||||
|
* fine-tuned models leak: ```fences```, markdown-escaped underscores/asterisks
|
||||||
|
* (`\_`, `\*` — never valid JSON escapes), and prose wrapped around a real JSON
|
||||||
|
* blob. Returns the decoded array, or null if nothing parses. Shared by both
|
||||||
|
* gateways' decodeJsonObject(), so every JSON tool benefits.
|
||||||
|
*/
|
||||||
|
function dbnToolsRepairJsonText(string $content): ?array
|
||||||
|
{
|
||||||
|
$content = trim($content);
|
||||||
|
$content = (string)preg_replace('/^```(?:json)?\s*\n?/i', '', $content);
|
||||||
|
$content = (string)preg_replace('/\n?```\s*$/', '', $content);
|
||||||
|
// Drop only invalid markdown escapes; leave legitimate \n \" \\ \/ \t intact.
|
||||||
|
$content = (string)preg_replace('/\\\\([_*])/', '$1', $content);
|
||||||
|
$content = trim($content);
|
||||||
|
|
||||||
|
$decoded = json_decode($content, true);
|
||||||
|
if (is_array($decoded)) {
|
||||||
|
return $decoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect every balanced top-level {...} block (ignoring braces inside JSON
|
||||||
|
// strings), then try the longest first — handles "prose then appended JSON".
|
||||||
|
$candidates = [];
|
||||||
|
$depth = 0;
|
||||||
|
$start = -1;
|
||||||
|
$inStr = false;
|
||||||
|
$escaped = false;
|
||||||
|
$len = strlen($content);
|
||||||
|
for ($i = 0; $i < $len; $i++) {
|
||||||
|
$ch = $content[$i];
|
||||||
|
if ($inStr) {
|
||||||
|
if ($escaped) {
|
||||||
|
$escaped = false;
|
||||||
|
} elseif ($ch === '\\') {
|
||||||
|
$escaped = true;
|
||||||
|
} elseif ($ch === '"') {
|
||||||
|
$inStr = false;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($ch === '"') {
|
||||||
|
$inStr = true;
|
||||||
|
} elseif ($ch === '{') {
|
||||||
|
if ($depth === 0) {
|
||||||
|
$start = $i;
|
||||||
|
}
|
||||||
|
$depth++;
|
||||||
|
} elseif ($ch === '}') {
|
||||||
|
if ($depth > 0) {
|
||||||
|
$depth--;
|
||||||
|
if ($depth === 0 && $start >= 0) {
|
||||||
|
$candidates[] = substr($content, $start, $i - $start + 1);
|
||||||
|
$start = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
usort($candidates, static fn(string $a, string $b): int => strlen($b) <=> strlen($a));
|
||||||
|
foreach ($candidates as $candidate) {
|
||||||
|
$decoded = json_decode($candidate, true);
|
||||||
|
if (is_array($decoded)) {
|
||||||
|
return $decoded;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a labelled-prose reply (`answer: ...`, `what_we_found: ...`) into an assoc
|
||||||
|
* array keyed by $keys, for fine-tunes that ignore the JSON contract. Tolerates
|
||||||
|
* markdown-escaped key names (`what\_we\_found`). Each value runs until the next
|
||||||
|
* known key label or a trailing { JSON blob (discarded). Returns only found keys.
|
||||||
|
*/
|
||||||
|
function dbnToolsParseLabeledFields(string $text, array $keys): array
|
||||||
|
{
|
||||||
|
$text = (string)preg_replace('/\\\\([_*])/', '$1', trim($text));
|
||||||
|
if ($text === '' || empty($keys)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
// Find each "key:" label position (line start, case-insensitive).
|
||||||
|
$labels = [];
|
||||||
|
foreach ($keys as $key) {
|
||||||
|
if (preg_match('/^\s*' . preg_quote($key, '/') . '\s*:/im', $text, $m, PREG_OFFSET_CAPTURE)) {
|
||||||
|
$labelStart = $m[0][1];
|
||||||
|
$valueStart = $labelStart + strlen($m[0][0]);
|
||||||
|
$labels[] = ['key' => $key, 'start' => $labelStart, 'value_start' => $valueStart];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!$labels) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
usort($labels, static fn(array $a, array $b): int => $a['start'] <=> $b['start']);
|
||||||
|
|
||||||
|
$out = [];
|
||||||
|
$count = count($labels);
|
||||||
|
for ($i = 0; $i < $count; $i++) {
|
||||||
|
$end = ($i + 1 < $count) ? $labels[$i + 1]['start'] : strlen($text);
|
||||||
|
$value = substr($text, $labels[$i]['value_start'], $end - $labels[$i]['value_start']);
|
||||||
|
// Drop a trailing appended JSON blob from the last field's value.
|
||||||
|
$brace = strpos($value, '{');
|
||||||
|
if ($brace !== false && $i + 1 === $count) {
|
||||||
|
$value = substr($value, 0, $brace);
|
||||||
|
}
|
||||||
|
// Collapse a duplicated "key:" prefix the model sometimes repeats inside the value.
|
||||||
|
$value = (string)preg_replace('/^\s*' . preg_quote($labels[$i]['key'], '/') . '\s*:\s*/i', '', trim($value));
|
||||||
|
$out[$labels[$i]['key']] = trim($value);
|
||||||
|
}
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
function dbnToolsInferCheckSeverity(string $text): string
|
function dbnToolsInferCheckSeverity(string $text): string
|
||||||
{
|
{
|
||||||
if (preg_match('/ugyldig|§\s*41|kontradiksjon|klar nødvendighet|strand lobben|biologiske bånd/i', $text)) {
|
if (preg_match('/ugyldig|§\s*41|kontradiksjon|klar nødvendighet|strand lobben|biologiske bånd/i', $text)) {
|
||||||
|
|||||||
Reference in New Issue
Block a user