refactor: remove dbn_legal engine from BVJ Analyzer

dbn-legal-agent is not suitable for structured RAG synthesis: - Fine-tune contamination appends feedback loops after JSON output - 7-min latency vs 45s for gpt-4o-mini - 8B base gives weaker instruction-following on complex JSON contracts - No improvement in citation accuracy (RAG provides the legal content) dbn-legal-agent kept for open-ended freeform Norwegian legal Q&A where citation structure isn't required. BVJ synthesis now uses azure_mini|azure_full|gpu only. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-15 22:13:16 +02:00
parent 7e0fce4167
commit ba6c197f1b
2 changed files with 6 additions and 37 deletions
@@ -43,7 +43,7 @@ final class DbnBvjAnalyzerAgent
     *
     * @param array     $uploadedFiles   [{filename, text, chars, truncated}]
     * @param string    $advocateRole    Party the user represents
-     * @param string    $engine          Affects synthesis only: azure_mini|azure_full|gpu|dbn_legal
+     * @param string    $engine          Affects synthesis only: azure_mini|azure_full|gpu
     * @param string    $language        'en' or 'no'
     * @param array     $sliceSelection  Corpus slice toggles
     * @param array     $controls        sub_q_count, chunk_limit, similarity_threshold, reranker_top_k, temperature
@@ -60,7 +60,7 @@ final class DbnBvjAnalyzerAgent
        string    $additionalNotes = '',
        ?callable $emit = null
    ): array {
-        $engine   = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'dbn_legal'], true)
+        $engine   = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true)
                    ? $engine : 'azure_mini';
        $language = in_array($language, ['en', 'no'], true) ? $language : 'en';
        $controls = $this->normalizeControls($controls);
@@ -350,7 +350,6 @@ final class DbnBvjAnalyzerAgent
        $engineLabel = match ($engine) {
            'azure_full' => 'Azure gpt-4o',
            'gpu'        => 'GPU qwen2.5:14b',
-            'dbn_legal'  => 'dbn-legal-agent',
            default      => 'Azure gpt-4o-mini',
        };
        $emitRunning('synthesis', 'Synthesis',
@@ -724,7 +723,6 @@ PROMPT;
                ],
                'deploy_label' => match($engine) {
                    'gpu'        => 'GPU (cuttlefish)',
-                    'dbn_legal'  => 'dbn-legal-agent',
                    'azure_full' => 'gpt-4o',
                    default      => $this->azure->chatDeployment(),
                },
@@ -842,13 +840,7 @@ Rules:
 - Respond in {$locale}.
 PROMPT;

-        $sysPrompt = $engine === 'dbn_legal'
-            // dbn-legal-agent has a fine-tune that appends self-rating feedback loops after the
-            // JSON. Explicitly forbid that pattern and stop before it can start.
-            ? 'You output valid JSON only. Output the JSON object, then stop immediately. '
-              . 'Do not add any self-assessment, rating, "END OF MESSAGE", feedback loop, '
-              . 'USER/SYSTEM turns, or any text after the closing brace of the JSON object.'
-            : 'You return valid JSON only. No markdown fences.';
+        $sysPrompt = 'You return valid JSON only. No markdown fences.';

        $messages = [
            ['role' => 'system', 'content' => $sysPrompt],
@@ -858,35 +850,13 @@ PROMPT;

        $deployLabel = match ($engine) {
            'gpu'        => 'GPU (cuttlefish)',
-            'dbn_legal'  => 'dbn-legal-agent',
            'azure_full' => 'gpt-4o',
            default      => $this->azure->chatDeployment(),
        };

        $raw = '';
        try {
-            if ($engine === 'dbn_legal') {
-                // dbn-legal-agent is slow (~6 t/s on cuttlefish). Stream the response and emit
-                // keepalive events every 15 s so the browser connection stays alive.
-                $raw = $this->callGpuLlmStream($messages, [
-                    'model'       => 'dbn-legal-agent',
-                    'temperature' => $temperature,
-                    'max_tokens'  => 2800,
-                    'timeout'     => 660,
-                    // Stop sequences cut generation the moment the feedback loop tries to start.
-                    'stop'        => ["\nEND OF MESSAGE", "\nPlease rate", "\nUSER:", "지금 번역하기"],
-                ], $emit ? static function () use ($emit): void {
-                    $emit('progress', ['detail' => 'dbn-legal-agent generating…']);
-                } : null);
-
-                // Belt-and-suspenders: even with stop sequences the model may still include
-                // preamble or trailing junk. Extract only the first complete {...} object.
-                $jsonStart = strpos($raw, '{');
-                $jsonEnd   = strrpos($raw, '}');
-                if ($jsonStart !== false && $jsonEnd !== false && $jsonEnd > $jsonStart) {
-                    $raw = substr($raw, $jsonStart, $jsonEnd - $jsonStart + 1);
-                }
-            } elseif ($engine === 'gpu') {
+            if ($engine === 'gpu') {
                $response = dbnToolsCallGpuLlm($messages, $opts);
                $raw = (string)($response['choices'][0]['message']['content'] ?? '');
            } elseif ($engine === 'azure_full') {