refactor: remove dbn_legal engine from BVJ Analyzer

dbn-legal-agent is not suitable for structured RAG synthesis:
- Fine-tune contamination appends feedback loops after JSON output
- 7-min latency vs 45s for gpt-4o-mini
- 8B base gives weaker instruction-following on complex JSON contracts
- No improvement in citation accuracy (RAG provides the legal content)

dbn-legal-agent kept for open-ended freeform Norwegian legal Q&A
where citation structure isn't required. BVJ synthesis now uses
azure_mini|azure_full|gpu only.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-15 22:13:16 +02:00
parent 7e0fce4167
commit ba6c197f1b
2 changed files with 6 additions and 37 deletions
+2 -3
View File
@@ -38,10 +38,9 @@ require_once __DIR__ . '/includes/layout.php';
<span class="control-label">Engine</span> <span class="control-label">Engine</span>
<label><input type="radio" name="bvjEngine" value="azure_mini" checked> Azure gpt-4o-mini &#9733; <small class="control-hint">(~30-60s)</small></label> <label><input type="radio" name="bvjEngine" value="azure_mini" checked> Azure gpt-4o-mini &#9733; <small class="control-hint">(~30-60s)</small></label>
<label><input type="radio" name="bvjEngine" value="azure_full"> Azure gpt-4o <small class="control-hint">(best · ~90-180s)</small></label> <label><input type="radio" name="bvjEngine" value="azure_full"> Azure gpt-4o <small class="control-hint">(best · ~90-180s)</small></label>
<label><input type="radio" name="bvjEngine" value="gpu"> GPU (cuttlefish) <small class="control-hint">(local · ~45-90s)</small></label> <label><input type="radio" name="bvjEngine" value="gpu"> GPU qwen2.5:14b <small class="control-hint">(local · ~45-90s)</small></label>
<label><input type="radio" name="bvjEngine" value="dbn_legal"> &#x1F1F3;&#x1F1F4; Norwegian specialist <small class="control-hint">(dbn-legal-agent · ~60-120s)</small></label>
</div> </div>
<p class="upload-hint">Engine applies to the final advocacy synthesis only. Document classification, party extraction, and timeline are always fast (azure-mini). Norwegian specialist is best for Barnevernloven, ECHR Article 8, and Bufdir analysis.</p> <p class="upload-hint">Engine applies to the final advocacy synthesis only. Document classification, party extraction, and timeline are always fast (azure-mini).</p>
<div class="dr-slice-section"> <div class="dr-slice-section">
<p class="control-label">Corpus slices</p> <p class="control-label">Corpus slices</p>
+4 -34
View File
@@ -43,7 +43,7 @@ final class DbnBvjAnalyzerAgent
* *
* @param array $uploadedFiles [{filename, text, chars, truncated}] * @param array $uploadedFiles [{filename, text, chars, truncated}]
* @param string $advocateRole Party the user represents * @param string $advocateRole Party the user represents
* @param string $engine Affects synthesis only: azure_mini|azure_full|gpu|dbn_legal * @param string $engine Affects synthesis only: azure_mini|azure_full|gpu
* @param string $language 'en' or 'no' * @param string $language 'en' or 'no'
* @param array $sliceSelection Corpus slice toggles * @param array $sliceSelection Corpus slice toggles
* @param array $controls sub_q_count, chunk_limit, similarity_threshold, reranker_top_k, temperature * @param array $controls sub_q_count, chunk_limit, similarity_threshold, reranker_top_k, temperature
@@ -60,7 +60,7 @@ final class DbnBvjAnalyzerAgent
string $additionalNotes = '', string $additionalNotes = '',
?callable $emit = null ?callable $emit = null
): array { ): array {
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'dbn_legal'], true) $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true)
? $engine : 'azure_mini'; ? $engine : 'azure_mini';
$language = in_array($language, ['en', 'no'], true) ? $language : 'en'; $language = in_array($language, ['en', 'no'], true) ? $language : 'en';
$controls = $this->normalizeControls($controls); $controls = $this->normalizeControls($controls);
@@ -350,7 +350,6 @@ final class DbnBvjAnalyzerAgent
$engineLabel = match ($engine) { $engineLabel = match ($engine) {
'azure_full' => 'Azure gpt-4o', 'azure_full' => 'Azure gpt-4o',
'gpu' => 'GPU qwen2.5:14b', 'gpu' => 'GPU qwen2.5:14b',
'dbn_legal' => 'dbn-legal-agent',
default => 'Azure gpt-4o-mini', default => 'Azure gpt-4o-mini',
}; };
$emitRunning('synthesis', 'Synthesis', $emitRunning('synthesis', 'Synthesis',
@@ -724,7 +723,6 @@ PROMPT;
], ],
'deploy_label' => match($engine) { 'deploy_label' => match($engine) {
'gpu' => 'GPU (cuttlefish)', 'gpu' => 'GPU (cuttlefish)',
'dbn_legal' => 'dbn-legal-agent',
'azure_full' => 'gpt-4o', 'azure_full' => 'gpt-4o',
default => $this->azure->chatDeployment(), default => $this->azure->chatDeployment(),
}, },
@@ -842,13 +840,7 @@ Rules:
- Respond in {$locale}. - Respond in {$locale}.
PROMPT; PROMPT;
$sysPrompt = $engine === 'dbn_legal' $sysPrompt = 'You return valid JSON only. No markdown fences.';
// dbn-legal-agent has a fine-tune that appends self-rating feedback loops after the
// JSON. Explicitly forbid that pattern and stop before it can start.
? 'You output valid JSON only. Output the JSON object, then stop immediately. '
. 'Do not add any self-assessment, rating, "END OF MESSAGE", feedback loop, '
. 'USER/SYSTEM turns, or any text after the closing brace of the JSON object.'
: 'You return valid JSON only. No markdown fences.';
$messages = [ $messages = [
['role' => 'system', 'content' => $sysPrompt], ['role' => 'system', 'content' => $sysPrompt],
@@ -858,35 +850,13 @@ PROMPT;
$deployLabel = match ($engine) { $deployLabel = match ($engine) {
'gpu' => 'GPU (cuttlefish)', 'gpu' => 'GPU (cuttlefish)',
'dbn_legal' => 'dbn-legal-agent',
'azure_full' => 'gpt-4o', 'azure_full' => 'gpt-4o',
default => $this->azure->chatDeployment(), default => $this->azure->chatDeployment(),
}; };
$raw = ''; $raw = '';
try { try {
if ($engine === 'dbn_legal') { if ($engine === 'gpu') {
// dbn-legal-agent is slow (~6 t/s on cuttlefish). Stream the response and emit
// keepalive events every 15 s so the browser connection stays alive.
$raw = $this->callGpuLlmStream($messages, [
'model' => 'dbn-legal-agent',
'temperature' => $temperature,
'max_tokens' => 2800,
'timeout' => 660,
// Stop sequences cut generation the moment the feedback loop tries to start.
'stop' => ["\nEND OF MESSAGE", "\nPlease rate", "\nUSER:", "지금 번역하기"],
], $emit ? static function () use ($emit): void {
$emit('progress', ['detail' => 'dbn-legal-agent generating…']);
} : null);
// Belt-and-suspenders: even with stop sequences the model may still include
// preamble or trailing junk. Extract only the first complete {...} object.
$jsonStart = strpos($raw, '{');
$jsonEnd = strrpos($raw, '}');
if ($jsonStart !== false && $jsonEnd !== false && $jsonEnd > $jsonStart) {
$raw = substr($raw, $jsonStart, $jsonEnd - $jsonStart + 1);
}
} elseif ($engine === 'gpu') {
$response = dbnToolsCallGpuLlm($messages, $opts); $response = dbnToolsCallGpuLlm($messages, $opts);
$raw = (string)($response['choices'][0]['message']['content'] ?? ''); $raw = (string)($response['choices'][0]['message']['content'] ?? '');
} elseif ($engine === 'azure_full') { } elseif ($engine === 'azure_full') {