Add Legal Analysis tool — two-pass DBN-legal pipeline

Restores the dbn-legal-agent-v3 fine-tune on ocelot (was silently aliased to plain qwen2.5:14b in LiteLLM since the viper retirement) and ships a new tool that uses it via a two-pass flow: Pass 1 (Azure 4o-mini) → extract up to 5 distinct legal issues Pass 2 (ocelot v3 only) → answer each issue, ≤350 tokens, with corpus Pass 3 (Azure 4o-mini) → synthesise overall assessment + next steps The 12GB-VRAM constraint motivates the split: dbn-legal-agent-v3 stays hot in VRAM through the 5 sequential per-issue calls because issue extraction and synthesis run on Azure, not on ocelot. New surface: - includes/LegalAnalysisAgent.php - api/legal-analysis.php (NDJSON streaming endpoint) - legal-analysis.php (dedicated tool page) - assets/js/legal-analysis.js (streamed UI with per-issue cards) - Save-result + case-result.php rendering for legal-analysis output - Nav registration in all four UI languages Add-on integration: a "⚖️🇳🇴 Run deep legal analysis on this text" button now appears on Summarize, Ask, and Redact result pages and streams the same pipeline inline below the existing result. Existing tools relabelled: the misleading "🇳🇴 Norwegian specialist v3 ⭐" option on advocate/deep-research/discrepancy/barnevernet is now honestly "DBN Legal Agent" — now that the real fine-tune is actually deployed, the label finally matches reality. The advocate.php v2 option was removed since the v2 GGUF is retired. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-24 04:21:01 +02:00
parent 2013648ee0
commit 7e6463ed22
14 changed files with 1361 additions and 25 deletions
@@ -0,0 +1,309 @@
+<?php
+declare(strict_types=1);
+
+require_once __DIR__ . '/bootstrap.php';
+require_once __DIR__ . '/AzureOpenAiGateway.php';
+require_once __DIR__ . '/LegalTools.php';
+
+/**
+ * Two-pass legal analysis:
+ *   1. Extract distinct legal issues from a document (Azure GPT-4o-mini)
+ *   2. For each issue: retrieve corpus passages, ask dbn-legal-agent-v3 a single
+ *      targeted question (cap 350 tokens to avoid the documented loop bug)
+ *   3. Synthesise overall assessment + next steps (Azure GPT-4o-mini)
+ *
+ * Only step 2 touches the GPU. Steps 1 and 3 use Azure so dbn-legal-agent-v3
+ * stays hot in the 12GB RTX 3060 VRAM across all per-issue calls.
+ */
+final class DbnLegalAnalysisAgent
+{
+    private const MAX_ISSUES        = 5;
+    private const LEGAL_MAX_TOKENS  = 350;
+    private const LEGAL_TIMEOUT     = 60;
+    private const LEGAL_MODEL       = 'dbn-legal-agent-v3';
+
+    private DbnAzureOpenAiGateway $azureMini;
+    private DbnLegalToolsService  $legalSvc;
+
+    public function __construct()
+    {
+        $this->azureMini = (new DbnAzureOpenAiGateway())->withDeployment('gpt-4o-mini');
+        $this->legalSvc  = new DbnLegalToolsService();
+    }
+
+    /**
+     * Pass 1 — extract distinct legal issues. Azure-only.
+     *
+     * @return array<int,array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string}>
+     */
+    public function extractIssues(string $text, string $language, string $docType): array
+    {
+        $locale = dbnToolsLanguageName($language);
+        $text   = mb_substr($text, 0, 24000, 'UTF-8'); // keep prompt within 4o-mini context
+
+        $prompt = <<<PROMPT
+You analyse the document below and extract up to 5 DISTINCT legal issues that warrant
+expert Norwegian-law review (barnevernsloven, EMK/ECHR, Hague Convention, family law,
+process law). Each issue must be answerable as a SINGLE focused legal question
+(≤ 25 words), not a multi-part essay.
+
+Document type hint: {$docType}
+Document language: {$locale}
+
+Return JSON only:
+{
+  "issues": [
+    {
+      "id": 1,
+      "question": "<short Norwegian legal question, single issue>",
+      "brief_context": "<≤2 sentences from the document that triggered this question>",
+      "doc_type": "<barnevernet|adopsjon|emergency|samvær|other>",
+      "severity_hint": "<high|medium|low>"
+    }
+  ]
+}
+
+Rules:
+- Skip non-legal observations (logistics, social commentary, opinions).
+- Each question should be answerable with citations to barnevernsloven, EMK Art. X,
+  named Høyesterett/EMD cases — NOT general advice.
+- If the document has fewer than 5 real legal issues, return fewer entries.
+- If NO real legal issue exists, return {"issues": []}.
+
+DOCUMENT:
+---
+{$text}
+---
+PROMPT;
+
+        $raw = $this->azureMini->chatText(
+            [
+                ['role' => 'system', 'content' => 'You return valid JSON only. No prose, no fences.'],
+                ['role' => 'user',   'content' => $prompt],
+            ],
+            ['json' => true, 'temperature' => 0.1, 'max_tokens' => 1500, 'timeout' => 90]
+        );
+
+        $decoded = $this->azureMini->decodeJsonObject($raw);
+        $issues  = is_array($decoded['issues'] ?? null) ? $decoded['issues'] : [];
+
+        $clean = [];
+        $id = 1;
+        foreach ($issues as $issue) {
+            $question = trim((string)($issue['question'] ?? ''));
+            if ($question === '' || mb_strlen($question, 'UTF-8') < 10) {
+                continue;
+            }
+            $clean[] = [
+                'id'            => $id++,
+                'question'      => mb_substr($question, 0, 280, 'UTF-8'),
+                'brief_context' => mb_substr(trim((string)($issue['brief_context'] ?? '')), 0, 400, 'UTF-8'),
+                'doc_type'      => (string)($issue['doc_type'] ?? $docType),
+                'severity_hint' => in_array($issue['severity_hint'] ?? '', ['high','medium','low'], true)
+                    ? $issue['severity_hint']
+                    : 'medium',
+            ];
+            if (count($clean) >= self::MAX_ISSUES) {
+                break;
+            }
+        }
+        return $clean;
+    }
+
+    /**
+     * Pass 2 — single targeted question to dbn-legal-agent-v3 with corpus context.
+     * Ocelot-only. Capped at 350 tokens / 60s to avoid the documented loop bug.
+     *
+     * @param array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string} $issue
+     * @return array{id:int,question:string,answer:string,severity:string,legal_basis:string,citations_from_corpus:array,what_to_check:string,brief_context:string}
+     */
+    public function answerIssue(array $issue, string $corpusContext, string $language): array
+    {
+        $sysMsg = 'Du er en ekspert på norsk barnevernsloven og EMD-praksis. '
+                . 'Svar alltid på norsk med korrekt juridisk terminologi. '
+                . 'Bruk terskler fra barnevernsloven 2021: § 4-25 krever «klar nødvendighet». '
+                . 'Strand Lobben mot Norge (37283/13) setter krav om rehabiliteringsplan før adopsjon. '
+                . 'Aldri oppfinn paragrafnumre, saksnumre eller dommernavn. '
+                . 'Avslutt med en «Kilder:»-seksjon som lister lovparagrafer og dommer du har sitert.';
+
+        $userMsg = $issue['question'];
+        if ($issue['brief_context'] !== '') {
+            $userMsg .= "\n\nKontekst fra saken: " . $issue['brief_context'];
+        }
+        if ($corpusContext !== '') {
+            $userMsg .= "\n\nRelevante kilder fra Do Better Norge-korpuset:\n" . $corpusContext;
+        }
+
+        $answer = '';
+        $error  = null;
+        try {
+            $response = dbnToolsCallGpuLlm(
+                [
+                    ['role' => 'system', 'content' => $sysMsg],
+                    ['role' => 'user',   'content' => $userMsg],
+                ],
+                [
+                    'model'       => self::LEGAL_MODEL,
+                    'temperature' => 0.1,
+                    'max_tokens'  => self::LEGAL_MAX_TOKENS,
+                    'timeout'     => self::LEGAL_TIMEOUT,
+                ]
+            );
+            $answer = trim((string)($response['choices'][0]['message']['content'] ?? ''));
+        } catch (Throwable $e) {
+            $error = $e->getMessage();
+        }
+
+        $clean = dbnToolsExtractCleanAnswer($answer);
+        if (mb_strlen($clean, 'UTF-8') < 30) {
+            $clean = $answer !== ''
+                ? $answer
+                : ($error !== null ? "[Modellfeil: $error]" : '[Modellen returnerte ingen brukbar tekst.]');
+        }
+
+        $severity = $clean !== '' ? dbnToolsInferCheckSeverity($clean) : $issue['severity_hint'];
+        $legalBasis = dbnToolsExtractCheckLegalBasis($clean);
+
+        return [
+            'id'                    => $issue['id'],
+            'question'              => $issue['question'],
+            'brief_context'         => $issue['brief_context'],
+            'answer'                => $clean,
+            'severity'              => $severity,
+            'legal_basis'           => $legalBasis,
+            'citations_from_corpus' => [], // populated by orchestrator if it kept the chunks
+            'what_to_check'         => 'Verifiser med norsk familieretsadvokat før handling.',
+        ];
+    }
+
+    /**
+     * Pass 3 — synthesise overall assessment. Azure-only.
+     */
+    public function synthesise(array $issues, string $language, string $docType): array
+    {
+        $locale = dbnToolsLanguageName($language);
+
+        $bullets = [];
+        foreach ($issues as $i) {
+            $bullets[] = sprintf(
+                "- [%s] %s\n  Svar: %s",
+                strtoupper((string)$i['severity']),
+                $i['question'],
+                mb_substr((string)$i['answer'], 0, 600, 'UTF-8')
+            );
+        }
+        $issuesBlock = implode("\n", $bullets);
+
+        $prompt = <<<PROMPT
+Below are 1-5 legal questions raised about a {$docType} document, each with an answer
+from a Norwegian-law specialist model. Write a concise overall assessment in {$locale}.
+
+ISSUES + ANSWERS:
+{$issuesBlock}
+
+Return JSON only:
+{
+  "overall_assessment": "<3-5 sentences summarising the legal picture across all issues>",
+  "next_steps": ["<concrete action 1>", "<concrete action 2>", "<concrete action 3>"],
+  "disclaimer": "This is automated legal analysis, not legal advice. Verify with a qualified Norwegian lawyer before acting."
+}
+PROMPT;
+
+        try {
+            $raw = $this->azureMini->chatText(
+                [
+                    ['role' => 'system', 'content' => 'You return valid JSON only. No prose, no fences.'],
+                    ['role' => 'user',   'content' => $prompt],
+                ],
+                ['json' => true, 'temperature' => 0.2, 'max_tokens' => 700, 'timeout' => 60]
+            );
+            $decoded = $this->azureMini->decodeJsonObject($raw);
+            if (is_array($decoded) && !empty($decoded['overall_assessment'])) {
+                return [
+                    'overall_assessment' => (string)$decoded['overall_assessment'],
+                    'next_steps'         => is_array($decoded['next_steps'] ?? null) ? array_slice($decoded['next_steps'], 0, 5) : [],
+                    'disclaimer'         => (string)($decoded['disclaimer'] ?? 'Automated analysis — not legal advice.'),
+                ];
+            }
+        } catch (Throwable $e) {
+            error_log('legal-analysis synthesis failed: ' . $e->getMessage());
+        }
+
+        return [
+            'overall_assessment' => 'Synthesis step did not return structured output. See individual issue answers below.',
+            'next_steps'         => [],
+            'disclaimer'         => 'Automated analysis — not legal advice. Verify with a qualified Norwegian lawyer.',
+        ];
+    }
+
+    /**
+     * Full orchestrated run. Emits progress events via the $emit callable.
+     *
+     * @param callable $emit (string $event, array $payload): void
+     */
+    public function runFullAnalysis(string $text, string $language, string $docType, callable $emit): array
+    {
+        $startMs = (int)round(microtime(true) * 1000);
+
+        // Pass 1
+        $emit('progress', ['step' => 'extracting_issues', 'detail' => 'Identifying distinct legal issues…']);
+        $issues = $this->extractIssues($text, $language, $docType);
+
+        if (empty($issues)) {
+            return [
+                'ok'                 => true,
+                'issues'             => [],
+                'overall_assessment' => 'No discrete legal issues identified in this document.',
+                'next_steps'         => [],
+                'disclaimer'         => 'Automated analysis — not legal advice.',
+                'model'              => self::LEGAL_MODEL,
+                'latency_ms'         => (int)round(microtime(true) * 1000) - $startMs,
+            ];
+        }
+
+        $emit('progress', [
+            'step'   => 'issues_extracted',
+            'detail' => sprintf('Found %d legal issue(s); asking specialist…', count($issues)),
+            'issues' => array_map(fn($i) => ['id' => $i['id'], 'question' => $i['question'], 'severity_hint' => $i['severity_hint']], $issues),
+        ]);
+
+        // Pass 2 — one issue at a time
+        $answered = [];
+        foreach ($issues as $issue) {
+            $emit('progress', [
+                'step'   => 'issue_searching_corpus',
+                'detail' => sprintf('Issue %d: searching legal corpus…', $issue['id']),
+                'issue_id' => $issue['id'],
+            ]);
+
+            $corpusQuery   = $issue['question'] . "\n" . $issue['brief_context'];
+            $corpusContext = $this->legalSvc->corpusContextForSummarize($corpusQuery, 3);
+
+            $emit('progress', [
+                'step'   => 'issue_answering',
+                'detail' => sprintf('Issue %d: asking dbn-legal-agent-v3…', $issue['id']),
+                'issue_id' => $issue['id'],
+            ]);
+
+            $answer = $this->answerIssue($issue, $corpusContext, $language);
+            $answered[] = $answer;
+
+            $emit('issue_answered', ['issue' => $answer]);
+        }
+
+        // Pass 3
+        $emit('progress', ['step' => 'synthesising', 'detail' => 'Synthesising overall assessment…']);
+        $synth = $this->synthesise($answered, $language, $docType);
+
+        return [
+            'ok'                 => true,
+            'issues'             => $answered,
+            'overall_assessment' => $synth['overall_assessment'],
+            'next_steps'         => $synth['next_steps'],
+            'disclaimer'         => $synth['disclaimer'],
+            'doc_type'           => $docType,
+            'model'              => self::LEGAL_MODEL,
+            'latency_ms'         => (int)round(microtime(true) * 1000) - $startMs,
+        ];
+    }
+}