Add Legal Analysis tool — two-pass DBN-legal pipeline
Restores the dbn-legal-agent-v3 fine-tune on ocelot (was silently aliased to plain qwen2.5:14b in LiteLLM since the viper retirement) and ships a new tool that uses it via a two-pass flow: Pass 1 (Azure 4o-mini) → extract up to 5 distinct legal issues Pass 2 (ocelot v3 only) → answer each issue, ≤350 tokens, with corpus Pass 3 (Azure 4o-mini) → synthesise overall assessment + next steps The 12GB-VRAM constraint motivates the split: dbn-legal-agent-v3 stays hot in VRAM through the 5 sequential per-issue calls because issue extraction and synthesis run on Azure, not on ocelot. New surface: - includes/LegalAnalysisAgent.php - api/legal-analysis.php (NDJSON streaming endpoint) - legal-analysis.php (dedicated tool page) - assets/js/legal-analysis.js (streamed UI with per-issue cards) - Save-result + case-result.php rendering for legal-analysis output - Nav registration in all four UI languages Add-on integration: a "⚖️🇳🇴 Run deep legal analysis on this text" button now appears on Summarize, Ask, and Redact result pages and streams the same pipeline inline below the existing result. Existing tools relabelled: the misleading "🇳🇴 Norwegian specialist v3 ⭐" option on advocate/deep-research/discrepancy/barnevernet is now honestly "DBN Legal Agent" — now that the real fine-tune is actually deployed, the label finally matches reality. The advocate.php v2 option was removed since the v2 GGUF is retired. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,309 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/bootstrap.php';
|
||||
require_once __DIR__ . '/AzureOpenAiGateway.php';
|
||||
require_once __DIR__ . '/LegalTools.php';
|
||||
|
||||
/**
|
||||
* Two-pass legal analysis:
|
||||
* 1. Extract distinct legal issues from a document (Azure GPT-4o-mini)
|
||||
* 2. For each issue: retrieve corpus passages, ask dbn-legal-agent-v3 a single
|
||||
* targeted question (cap 350 tokens to avoid the documented loop bug)
|
||||
* 3. Synthesise overall assessment + next steps (Azure GPT-4o-mini)
|
||||
*
|
||||
* Only step 2 touches the GPU. Steps 1 and 3 use Azure so dbn-legal-agent-v3
|
||||
* stays hot in the 12GB RTX 3060 VRAM across all per-issue calls.
|
||||
*/
|
||||
final class DbnLegalAnalysisAgent
|
||||
{
|
||||
private const MAX_ISSUES = 5;
|
||||
private const LEGAL_MAX_TOKENS = 350;
|
||||
private const LEGAL_TIMEOUT = 60;
|
||||
private const LEGAL_MODEL = 'dbn-legal-agent-v3';
|
||||
|
||||
private DbnAzureOpenAiGateway $azureMini;
|
||||
private DbnLegalToolsService $legalSvc;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->azureMini = (new DbnAzureOpenAiGateway())->withDeployment('gpt-4o-mini');
|
||||
$this->legalSvc = new DbnLegalToolsService();
|
||||
}
|
||||
|
||||
/**
|
||||
* Pass 1 — extract distinct legal issues. Azure-only.
|
||||
*
|
||||
* @return array<int,array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string}>
|
||||
*/
|
||||
public function extractIssues(string $text, string $language, string $docType): array
|
||||
{
|
||||
$locale = dbnToolsLanguageName($language);
|
||||
$text = mb_substr($text, 0, 24000, 'UTF-8'); // keep prompt within 4o-mini context
|
||||
|
||||
$prompt = <<<PROMPT
|
||||
You analyse the document below and extract up to 5 DISTINCT legal issues that warrant
|
||||
expert Norwegian-law review (barnevernsloven, EMK/ECHR, Hague Convention, family law,
|
||||
process law). Each issue must be answerable as a SINGLE focused legal question
|
||||
(≤ 25 words), not a multi-part essay.
|
||||
|
||||
Document type hint: {$docType}
|
||||
Document language: {$locale}
|
||||
|
||||
Return JSON only:
|
||||
{
|
||||
"issues": [
|
||||
{
|
||||
"id": 1,
|
||||
"question": "<short Norwegian legal question, single issue>",
|
||||
"brief_context": "<≤2 sentences from the document that triggered this question>",
|
||||
"doc_type": "<barnevernet|adopsjon|emergency|samvær|other>",
|
||||
"severity_hint": "<high|medium|low>"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Skip non-legal observations (logistics, social commentary, opinions).
|
||||
- Each question should be answerable with citations to barnevernsloven, EMK Art. X,
|
||||
named Høyesterett/EMD cases — NOT general advice.
|
||||
- If the document has fewer than 5 real legal issues, return fewer entries.
|
||||
- If NO real legal issue exists, return {"issues": []}.
|
||||
|
||||
DOCUMENT:
|
||||
---
|
||||
{$text}
|
||||
---
|
||||
PROMPT;
|
||||
|
||||
$raw = $this->azureMini->chatText(
|
||||
[
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No prose, no fences.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
],
|
||||
['json' => true, 'temperature' => 0.1, 'max_tokens' => 1500, 'timeout' => 90]
|
||||
);
|
||||
|
||||
$decoded = $this->azureMini->decodeJsonObject($raw);
|
||||
$issues = is_array($decoded['issues'] ?? null) ? $decoded['issues'] : [];
|
||||
|
||||
$clean = [];
|
||||
$id = 1;
|
||||
foreach ($issues as $issue) {
|
||||
$question = trim((string)($issue['question'] ?? ''));
|
||||
if ($question === '' || mb_strlen($question, 'UTF-8') < 10) {
|
||||
continue;
|
||||
}
|
||||
$clean[] = [
|
||||
'id' => $id++,
|
||||
'question' => mb_substr($question, 0, 280, 'UTF-8'),
|
||||
'brief_context' => mb_substr(trim((string)($issue['brief_context'] ?? '')), 0, 400, 'UTF-8'),
|
||||
'doc_type' => (string)($issue['doc_type'] ?? $docType),
|
||||
'severity_hint' => in_array($issue['severity_hint'] ?? '', ['high','medium','low'], true)
|
||||
? $issue['severity_hint']
|
||||
: 'medium',
|
||||
];
|
||||
if (count($clean) >= self::MAX_ISSUES) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return $clean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pass 2 — single targeted question to dbn-legal-agent-v3 with corpus context.
|
||||
* Ocelot-only. Capped at 350 tokens / 60s to avoid the documented loop bug.
|
||||
*
|
||||
* @param array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string} $issue
|
||||
* @return array{id:int,question:string,answer:string,severity:string,legal_basis:string,citations_from_corpus:array,what_to_check:string,brief_context:string}
|
||||
*/
|
||||
public function answerIssue(array $issue, string $corpusContext, string $language): array
|
||||
{
|
||||
$sysMsg = 'Du er en ekspert på norsk barnevernsloven og EMD-praksis. '
|
||||
. 'Svar alltid på norsk med korrekt juridisk terminologi. '
|
||||
. 'Bruk terskler fra barnevernsloven 2021: § 4-25 krever «klar nødvendighet». '
|
||||
. 'Strand Lobben mot Norge (37283/13) setter krav om rehabiliteringsplan før adopsjon. '
|
||||
. 'Aldri oppfinn paragrafnumre, saksnumre eller dommernavn. '
|
||||
. 'Avslutt med en «Kilder:»-seksjon som lister lovparagrafer og dommer du har sitert.';
|
||||
|
||||
$userMsg = $issue['question'];
|
||||
if ($issue['brief_context'] !== '') {
|
||||
$userMsg .= "\n\nKontekst fra saken: " . $issue['brief_context'];
|
||||
}
|
||||
if ($corpusContext !== '') {
|
||||
$userMsg .= "\n\nRelevante kilder fra Do Better Norge-korpuset:\n" . $corpusContext;
|
||||
}
|
||||
|
||||
$answer = '';
|
||||
$error = null;
|
||||
try {
|
||||
$response = dbnToolsCallGpuLlm(
|
||||
[
|
||||
['role' => 'system', 'content' => $sysMsg],
|
||||
['role' => 'user', 'content' => $userMsg],
|
||||
],
|
||||
[
|
||||
'model' => self::LEGAL_MODEL,
|
||||
'temperature' => 0.1,
|
||||
'max_tokens' => self::LEGAL_MAX_TOKENS,
|
||||
'timeout' => self::LEGAL_TIMEOUT,
|
||||
]
|
||||
);
|
||||
$answer = trim((string)($response['choices'][0]['message']['content'] ?? ''));
|
||||
} catch (Throwable $e) {
|
||||
$error = $e->getMessage();
|
||||
}
|
||||
|
||||
$clean = dbnToolsExtractCleanAnswer($answer);
|
||||
if (mb_strlen($clean, 'UTF-8') < 30) {
|
||||
$clean = $answer !== ''
|
||||
? $answer
|
||||
: ($error !== null ? "[Modellfeil: $error]" : '[Modellen returnerte ingen brukbar tekst.]');
|
||||
}
|
||||
|
||||
$severity = $clean !== '' ? dbnToolsInferCheckSeverity($clean) : $issue['severity_hint'];
|
||||
$legalBasis = dbnToolsExtractCheckLegalBasis($clean);
|
||||
|
||||
return [
|
||||
'id' => $issue['id'],
|
||||
'question' => $issue['question'],
|
||||
'brief_context' => $issue['brief_context'],
|
||||
'answer' => $clean,
|
||||
'severity' => $severity,
|
||||
'legal_basis' => $legalBasis,
|
||||
'citations_from_corpus' => [], // populated by orchestrator if it kept the chunks
|
||||
'what_to_check' => 'Verifiser med norsk familieretsadvokat før handling.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Pass 3 — synthesise overall assessment. Azure-only.
|
||||
*/
|
||||
public function synthesise(array $issues, string $language, string $docType): array
|
||||
{
|
||||
$locale = dbnToolsLanguageName($language);
|
||||
|
||||
$bullets = [];
|
||||
foreach ($issues as $i) {
|
||||
$bullets[] = sprintf(
|
||||
"- [%s] %s\n Svar: %s",
|
||||
strtoupper((string)$i['severity']),
|
||||
$i['question'],
|
||||
mb_substr((string)$i['answer'], 0, 600, 'UTF-8')
|
||||
);
|
||||
}
|
||||
$issuesBlock = implode("\n", $bullets);
|
||||
|
||||
$prompt = <<<PROMPT
|
||||
Below are 1-5 legal questions raised about a {$docType} document, each with an answer
|
||||
from a Norwegian-law specialist model. Write a concise overall assessment in {$locale}.
|
||||
|
||||
ISSUES + ANSWERS:
|
||||
{$issuesBlock}
|
||||
|
||||
Return JSON only:
|
||||
{
|
||||
"overall_assessment": "<3-5 sentences summarising the legal picture across all issues>",
|
||||
"next_steps": ["<concrete action 1>", "<concrete action 2>", "<concrete action 3>"],
|
||||
"disclaimer": "This is automated legal analysis, not legal advice. Verify with a qualified Norwegian lawyer before acting."
|
||||
}
|
||||
PROMPT;
|
||||
|
||||
try {
|
||||
$raw = $this->azureMini->chatText(
|
||||
[
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No prose, no fences.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
],
|
||||
['json' => true, 'temperature' => 0.2, 'max_tokens' => 700, 'timeout' => 60]
|
||||
);
|
||||
$decoded = $this->azureMini->decodeJsonObject($raw);
|
||||
if (is_array($decoded) && !empty($decoded['overall_assessment'])) {
|
||||
return [
|
||||
'overall_assessment' => (string)$decoded['overall_assessment'],
|
||||
'next_steps' => is_array($decoded['next_steps'] ?? null) ? array_slice($decoded['next_steps'], 0, 5) : [],
|
||||
'disclaimer' => (string)($decoded['disclaimer'] ?? 'Automated analysis — not legal advice.'),
|
||||
];
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
error_log('legal-analysis synthesis failed: ' . $e->getMessage());
|
||||
}
|
||||
|
||||
return [
|
||||
'overall_assessment' => 'Synthesis step did not return structured output. See individual issue answers below.',
|
||||
'next_steps' => [],
|
||||
'disclaimer' => 'Automated analysis — not legal advice. Verify with a qualified Norwegian lawyer.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Full orchestrated run. Emits progress events via the $emit callable.
|
||||
*
|
||||
* @param callable $emit (string $event, array $payload): void
|
||||
*/
|
||||
public function runFullAnalysis(string $text, string $language, string $docType, callable $emit): array
|
||||
{
|
||||
$startMs = (int)round(microtime(true) * 1000);
|
||||
|
||||
// Pass 1
|
||||
$emit('progress', ['step' => 'extracting_issues', 'detail' => 'Identifying distinct legal issues…']);
|
||||
$issues = $this->extractIssues($text, $language, $docType);
|
||||
|
||||
if (empty($issues)) {
|
||||
return [
|
||||
'ok' => true,
|
||||
'issues' => [],
|
||||
'overall_assessment' => 'No discrete legal issues identified in this document.',
|
||||
'next_steps' => [],
|
||||
'disclaimer' => 'Automated analysis — not legal advice.',
|
||||
'model' => self::LEGAL_MODEL,
|
||||
'latency_ms' => (int)round(microtime(true) * 1000) - $startMs,
|
||||
];
|
||||
}
|
||||
|
||||
$emit('progress', [
|
||||
'step' => 'issues_extracted',
|
||||
'detail' => sprintf('Found %d legal issue(s); asking specialist…', count($issues)),
|
||||
'issues' => array_map(fn($i) => ['id' => $i['id'], 'question' => $i['question'], 'severity_hint' => $i['severity_hint']], $issues),
|
||||
]);
|
||||
|
||||
// Pass 2 — one issue at a time
|
||||
$answered = [];
|
||||
foreach ($issues as $issue) {
|
||||
$emit('progress', [
|
||||
'step' => 'issue_searching_corpus',
|
||||
'detail' => sprintf('Issue %d: searching legal corpus…', $issue['id']),
|
||||
'issue_id' => $issue['id'],
|
||||
]);
|
||||
|
||||
$corpusQuery = $issue['question'] . "\n" . $issue['brief_context'];
|
||||
$corpusContext = $this->legalSvc->corpusContextForSummarize($corpusQuery, 3);
|
||||
|
||||
$emit('progress', [
|
||||
'step' => 'issue_answering',
|
||||
'detail' => sprintf('Issue %d: asking dbn-legal-agent-v3…', $issue['id']),
|
||||
'issue_id' => $issue['id'],
|
||||
]);
|
||||
|
||||
$answer = $this->answerIssue($issue, $corpusContext, $language);
|
||||
$answered[] = $answer;
|
||||
|
||||
$emit('issue_answered', ['issue' => $answer]);
|
||||
}
|
||||
|
||||
// Pass 3
|
||||
$emit('progress', ['step' => 'synthesising', 'detail' => 'Synthesising overall assessment…']);
|
||||
$synth = $this->synthesise($answered, $language, $docType);
|
||||
|
||||
return [
|
||||
'ok' => true,
|
||||
'issues' => $answered,
|
||||
'overall_assessment' => $synth['overall_assessment'],
|
||||
'next_steps' => $synth['next_steps'],
|
||||
'disclaimer' => $synth['disclaimer'],
|
||||
'doc_type' => $docType,
|
||||
'model' => self::LEGAL_MODEL,
|
||||
'latency_ms' => (int)round(microtime(true) * 1000) - $startMs,
|
||||
];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user