refactor: remove dbn_legal engine from BVJ Analyzer
dbn-legal-agent is not suitable for structured RAG synthesis: - Fine-tune contamination appends feedback loops after JSON output - 7-min latency vs 45s for gpt-4o-mini - 8B base gives weaker instruction-following on complex JSON contracts - No improvement in citation accuracy (RAG provides the legal content) dbn-legal-agent kept for open-ended freeform Norwegian legal Q&A where citation structure isn't required. BVJ synthesis now uses azure_mini|azure_full|gpu only. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -43,7 +43,7 @@ final class DbnBvjAnalyzerAgent
|
||||
*
|
||||
* @param array $uploadedFiles [{filename, text, chars, truncated}]
|
||||
* @param string $advocateRole Party the user represents
|
||||
* @param string $engine Affects synthesis only: azure_mini|azure_full|gpu|dbn_legal
|
||||
* @param string $engine Affects synthesis only: azure_mini|azure_full|gpu
|
||||
* @param string $language 'en' or 'no'
|
||||
* @param array $sliceSelection Corpus slice toggles
|
||||
* @param array $controls sub_q_count, chunk_limit, similarity_threshold, reranker_top_k, temperature
|
||||
@@ -60,7 +60,7 @@ final class DbnBvjAnalyzerAgent
|
||||
string $additionalNotes = '',
|
||||
?callable $emit = null
|
||||
): array {
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'dbn_legal'], true)
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true)
|
||||
? $engine : 'azure_mini';
|
||||
$language = in_array($language, ['en', 'no'], true) ? $language : 'en';
|
||||
$controls = $this->normalizeControls($controls);
|
||||
@@ -350,7 +350,6 @@ final class DbnBvjAnalyzerAgent
|
||||
$engineLabel = match ($engine) {
|
||||
'azure_full' => 'Azure gpt-4o',
|
||||
'gpu' => 'GPU qwen2.5:14b',
|
||||
'dbn_legal' => 'dbn-legal-agent',
|
||||
default => 'Azure gpt-4o-mini',
|
||||
};
|
||||
$emitRunning('synthesis', 'Synthesis',
|
||||
@@ -724,7 +723,6 @@ PROMPT;
|
||||
],
|
||||
'deploy_label' => match($engine) {
|
||||
'gpu' => 'GPU (cuttlefish)',
|
||||
'dbn_legal' => 'dbn-legal-agent',
|
||||
'azure_full' => 'gpt-4o',
|
||||
default => $this->azure->chatDeployment(),
|
||||
},
|
||||
@@ -842,13 +840,7 @@ Rules:
|
||||
- Respond in {$locale}.
|
||||
PROMPT;
|
||||
|
||||
$sysPrompt = $engine === 'dbn_legal'
|
||||
// dbn-legal-agent has a fine-tune that appends self-rating feedback loops after the
|
||||
// JSON. Explicitly forbid that pattern and stop before it can start.
|
||||
? 'You output valid JSON only. Output the JSON object, then stop immediately. '
|
||||
. 'Do not add any self-assessment, rating, "END OF MESSAGE", feedback loop, '
|
||||
. 'USER/SYSTEM turns, or any text after the closing brace of the JSON object.'
|
||||
: 'You return valid JSON only. No markdown fences.';
|
||||
$sysPrompt = 'You return valid JSON only. No markdown fences.';
|
||||
|
||||
$messages = [
|
||||
['role' => 'system', 'content' => $sysPrompt],
|
||||
@@ -858,35 +850,13 @@ PROMPT;
|
||||
|
||||
$deployLabel = match ($engine) {
|
||||
'gpu' => 'GPU (cuttlefish)',
|
||||
'dbn_legal' => 'dbn-legal-agent',
|
||||
'azure_full' => 'gpt-4o',
|
||||
default => $this->azure->chatDeployment(),
|
||||
};
|
||||
|
||||
$raw = '';
|
||||
try {
|
||||
if ($engine === 'dbn_legal') {
|
||||
// dbn-legal-agent is slow (~6 t/s on cuttlefish). Stream the response and emit
|
||||
// keepalive events every 15 s so the browser connection stays alive.
|
||||
$raw = $this->callGpuLlmStream($messages, [
|
||||
'model' => 'dbn-legal-agent',
|
||||
'temperature' => $temperature,
|
||||
'max_tokens' => 2800,
|
||||
'timeout' => 660,
|
||||
// Stop sequences cut generation the moment the feedback loop tries to start.
|
||||
'stop' => ["\nEND OF MESSAGE", "\nPlease rate", "\nUSER:", "지금 번역하기"],
|
||||
], $emit ? static function () use ($emit): void {
|
||||
$emit('progress', ['detail' => 'dbn-legal-agent generating…']);
|
||||
} : null);
|
||||
|
||||
// Belt-and-suspenders: even with stop sequences the model may still include
|
||||
// preamble or trailing junk. Extract only the first complete {...} object.
|
||||
$jsonStart = strpos($raw, '{');
|
||||
$jsonEnd = strrpos($raw, '}');
|
||||
if ($jsonStart !== false && $jsonEnd !== false && $jsonEnd > $jsonStart) {
|
||||
$raw = substr($raw, $jsonStart, $jsonEnd - $jsonStart + 1);
|
||||
}
|
||||
} elseif ($engine === 'gpu') {
|
||||
if ($engine === 'gpu') {
|
||||
$response = dbnToolsCallGpuLlm($messages, $opts);
|
||||
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
||||
} elseif ($engine === 'azure_full') {
|
||||
|
||||
Reference in New Issue
Block a user