Fix legal analysis issue extraction for long documents
This commit is contained in:
@@ -118,6 +118,14 @@ try {
|
|||||||
$emit('progress', ['step' => 'synthesising', 'detail' => 'Synthesising overall assessment…']);
|
$emit('progress', ['step' => 'synthesising', 'detail' => 'Synthesising overall assessment…']);
|
||||||
$synth = $agent->synthesise($answered, $language, $docType);
|
$synth = $agent->synthesise($answered, $language, $docType);
|
||||||
|
|
||||||
|
$legalCheck = [];
|
||||||
|
try {
|
||||||
|
$legalCheck = dbnToolsRunLegalCheck(
|
||||||
|
mb_strimwidth((string)($synth['overall_assessment'] ?? ''), 0, 800),
|
||||||
|
$docType
|
||||||
|
);
|
||||||
|
} catch (Throwable) {}
|
||||||
|
|
||||||
$result = [
|
$result = [
|
||||||
'ok' => true,
|
'ok' => true,
|
||||||
'issues' => $answered,
|
'issues' => $answered,
|
||||||
@@ -126,6 +134,7 @@ try {
|
|||||||
'disclaimer' => $synth['disclaimer'],
|
'disclaimer' => $synth['disclaimer'],
|
||||||
'doc_type' => $docType,
|
'doc_type' => $docType,
|
||||||
'model' => 'dbn-legal-agent-v3',
|
'model' => 'dbn-legal-agent-v3',
|
||||||
|
'legal_check' => $legalCheck,
|
||||||
'latency_ms' => (int)round((microtime(true) - $startTime) * 1000),
|
'latency_ms' => (int)round((microtime(true) - $startTime) * 1000),
|
||||||
];
|
];
|
||||||
if ($ftRemaining >= 0) {
|
if ($ftRemaining >= 0) {
|
||||||
|
|||||||
@@ -353,6 +353,9 @@
|
|||||||
if (result.disclaimer) {
|
if (result.disclaimer) {
|
||||||
topHtml += '<p class="disclaimer-note">' + esc(result.disclaimer) + '</p>';
|
topHtml += '<p class="disclaimer-note">' + esc(result.disclaimer) + '</p>';
|
||||||
}
|
}
|
||||||
|
if (Array.isArray(result.legal_check) && result.legal_check.length) {
|
||||||
|
topHtml += renderLegalCheck(result.legal_check);
|
||||||
|
}
|
||||||
topHtml += '</section>';
|
topHtml += '</section>';
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -383,6 +386,23 @@
|
|||||||
setStatus('');
|
setStatus('');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function renderLegalCheck(findings) {
|
||||||
|
return '<div class="korr-legal-check">'
|
||||||
|
+ '<h4 class="korr-legal-check__title">Legal threshold check <small>(dbn-legal-agent-v3)</small></h4>'
|
||||||
|
+ findings.map(function (f) {
|
||||||
|
var severity = f.severity || 'low';
|
||||||
|
return '<div class="bvj-red-flag">'
|
||||||
|
+ '<div class="bvj-red-flag__head">'
|
||||||
|
+ '<div class="bvj-red-flag__desc">' + esc(f.description || '') + '</div>'
|
||||||
|
+ '<span class="bvj-severity bvj-severity-' + esc(severity) + '">' + esc(severity) + '</span>'
|
||||||
|
+ '</div>'
|
||||||
|
+ (f.legal_basis ? '<span class="bvj-red-flag__legal">' + esc(f.legal_basis) + '</span>' : '')
|
||||||
|
+ (f.what_to_check ? '<details class="bvj-red-flag__details"><summary>What to verify</summary><p class="bvj-red-flag__check">' + esc(f.what_to_check) + '</p></details>' : '')
|
||||||
|
+ '</div>';
|
||||||
|
}).join('')
|
||||||
|
+ '</div>';
|
||||||
|
}
|
||||||
|
|
||||||
// ── Helpers ───────────────────────────────────────────────────────────────
|
// ── Helpers ───────────────────────────────────────────────────────────────
|
||||||
function setBusy(on) {
|
function setBusy(on) {
|
||||||
if (runBtn) runBtn.disabled = on;
|
if (runBtn) runBtn.disabled = on;
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ final class DbnLegalAnalysisAgent
|
|||||||
*
|
*
|
||||||
* @return array<int,array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string}>
|
* @return array<int,array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string}>
|
||||||
*/
|
*/
|
||||||
public function extractIssues(string $text, string $language, string $docType): array
|
private function extractIssuesFromSingleChunk(string $text, string $language, string $docType): array
|
||||||
{
|
{
|
||||||
$locale = dbnToolsLanguageName($language);
|
$locale = dbnToolsLanguageName($language);
|
||||||
$text = mb_substr($text, 0, 24000, 'UTF-8'); // keep prompt within 4o-mini context
|
$text = mb_substr($text, 0, 24000, 'UTF-8'); // keep prompt within 4o-mini context
|
||||||
@@ -118,6 +118,239 @@ PROMPT;
|
|||||||
return $clean;
|
return $clean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pass 1 - extract distinct legal issues from representative document windows.
|
||||||
|
*
|
||||||
|
* @return array<int,array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string}>
|
||||||
|
*/
|
||||||
|
public function extractIssues(string $text, string $language, string $docType): array
|
||||||
|
{
|
||||||
|
$text = trim($text);
|
||||||
|
if ($text === '') {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$allIssues = [];
|
||||||
|
foreach ($this->issueExtractionChunks($text) as $chunk) {
|
||||||
|
try {
|
||||||
|
$chunkIssues = $this->extractIssuesFromSingleChunk((string)$chunk['text'], $language, $docType);
|
||||||
|
} catch (Throwable $e) {
|
||||||
|
error_log('legal-analysis issue extraction failed for ' . (string)$chunk['label'] . ': ' . $e->getMessage());
|
||||||
|
$chunkIssues = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($chunkIssues as $issue) {
|
||||||
|
$this->appendUniqueIssue($allIssues, $issue);
|
||||||
|
if (count($allIssues) >= self::MAX_ISSUES) {
|
||||||
|
break 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$allIssues && $this->looksLikeSubstantiveFamilyLawDocument($text, $docType)) {
|
||||||
|
$allIssues = $this->fallbackLegalIssues($language, $docType, $text);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($allIssues as $idx => &$issue) {
|
||||||
|
$issue['id'] = $idx + 1;
|
||||||
|
}
|
||||||
|
unset($issue);
|
||||||
|
|
||||||
|
return array_slice($allIssues, 0, self::MAX_ISSUES);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array<int,array{label:string,text:string,offset:int,score:int}>
|
||||||
|
*/
|
||||||
|
private function issueExtractionChunks(string $text): array
|
||||||
|
{
|
||||||
|
$len = mb_strlen($text, 'UTF-8');
|
||||||
|
$window = 24000;
|
||||||
|
if ($len <= $window + 4000) {
|
||||||
|
return [[
|
||||||
|
'label' => 'full document',
|
||||||
|
'text' => $text,
|
||||||
|
'offset' => 0,
|
||||||
|
'score' => 0,
|
||||||
|
]];
|
||||||
|
}
|
||||||
|
|
||||||
|
$chunks = [];
|
||||||
|
$add = function (string $label, int $offset, int $score = 0) use (&$chunks, $text, $len, $window): void {
|
||||||
|
$offset = max(0, min($offset, max(0, $len - $window)));
|
||||||
|
foreach ($chunks as $existing) {
|
||||||
|
if (abs((int)$existing['offset'] - $offset) < 6000) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$chunks[] = [
|
||||||
|
'label' => $label,
|
||||||
|
'text' => mb_substr($text, $offset, $window, 'UTF-8'),
|
||||||
|
'offset' => $offset,
|
||||||
|
'score' => $score,
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
$add('beginning of document', 0, 1);
|
||||||
|
$add('middle of document', (int)floor(($len - $window) / 2), 1);
|
||||||
|
$add('end of document', $len - $window, 1);
|
||||||
|
|
||||||
|
$keywords = [
|
||||||
|
'samvaer', 'samvær', 'omsorg', 'barnevern', 'sakkyndig', 'risiko',
|
||||||
|
'tilknytning', 'rus', 'vold', 'emk', 'barnets beste', 'foreldre',
|
||||||
|
'bekymring', 'kontakt', 'plassering', 'fylkesnemnd', 'retten',
|
||||||
|
];
|
||||||
|
|
||||||
|
$candidates = [];
|
||||||
|
for ($offset = 0; $offset < $len; $offset += 10000) {
|
||||||
|
$chunk = mb_substr($text, $offset, $window, 'UTF-8');
|
||||||
|
if ($chunk === '') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$lower = mb_strtolower($chunk, 'UTF-8');
|
||||||
|
$score = 0;
|
||||||
|
foreach ($keywords as $kw) {
|
||||||
|
$score += substr_count($lower, mb_strtolower($kw, 'UTF-8'));
|
||||||
|
}
|
||||||
|
if ($score > 0) {
|
||||||
|
$candidates[] = ['offset' => $offset, 'score' => $score];
|
||||||
|
}
|
||||||
|
if ($offset + $window >= $len) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
usort($candidates, static fn(array $a, array $b): int => ($b['score'] <=> $a['score']));
|
||||||
|
foreach (array_slice($candidates, 0, 4) as $candidate) {
|
||||||
|
$add('keyword-heavy legal section', (int)$candidate['offset'], (int)$candidate['score']);
|
||||||
|
if (count($chunks) >= 6) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
usort($chunks, static function (array $a, array $b): int {
|
||||||
|
if ($a['score'] !== $b['score']) {
|
||||||
|
return $b['score'] <=> $a['score'];
|
||||||
|
}
|
||||||
|
return $a['offset'] <=> $b['offset'];
|
||||||
|
});
|
||||||
|
|
||||||
|
return array_slice($chunks, 0, 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array<int,array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string}> $issues
|
||||||
|
* @param array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string} $candidate
|
||||||
|
*/
|
||||||
|
private function appendUniqueIssue(array &$issues, array $candidate): void
|
||||||
|
{
|
||||||
|
$candidateKey = $this->issueDedupeKey((string)$candidate['question']);
|
||||||
|
foreach ($issues as $existing) {
|
||||||
|
$existingKey = $this->issueDedupeKey((string)$existing['question']);
|
||||||
|
if ($candidateKey === $existingKey || $this->issueSimilarity($candidateKey, $existingKey) >= 0.58) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$issues[] = $candidate;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function issueDedupeKey(string $question): string
|
||||||
|
{
|
||||||
|
$question = mb_strtolower($question, 'UTF-8');
|
||||||
|
$question = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $question) ?? $question;
|
||||||
|
$question = preg_replace('/\s+/u', ' ', trim($question)) ?? $question;
|
||||||
|
return $question;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function issueSimilarity(string $a, string $b): float
|
||||||
|
{
|
||||||
|
$aWords = array_values(array_unique(array_filter(preg_split('/\s+/u', $a) ?: [], static fn($w) => mb_strlen($w, 'UTF-8') > 3)));
|
||||||
|
$bWords = array_values(array_unique(array_filter(preg_split('/\s+/u', $b) ?: [], static fn($w) => mb_strlen($w, 'UTF-8') > 3)));
|
||||||
|
if (!$aWords || !$bWords) {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
$intersection = count(array_intersect($aWords, $bWords));
|
||||||
|
$union = count(array_unique(array_merge($aWords, $bWords)));
|
||||||
|
return $union > 0 ? $intersection / $union : 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function looksLikeSubstantiveFamilyLawDocument(string $text, string $docType): bool
|
||||||
|
{
|
||||||
|
if (mb_strlen($text, 'UTF-8') < 8000) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (in_array($docType, ['barnevernet', 'adopsjon', 'emergency', 'samvær', 'fylkesnemnd'], true)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$lower = mb_strtolower($text, 'UTF-8');
|
||||||
|
$hits = 0;
|
||||||
|
foreach (['sakkyndig', 'barnevern', 'barnets beste', 'samvær', 'samvaer', 'omsorg', 'tilknytning', 'emk', 'fylkesnemnd'] as $kw) {
|
||||||
|
if (str_contains($lower, mb_strtolower($kw, 'UTF-8'))) {
|
||||||
|
$hits++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $hits >= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array<int,array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string}>
|
||||||
|
*/
|
||||||
|
private function fallbackLegalIssues(string $language, string $docType, string $text): array
|
||||||
|
{
|
||||||
|
$context = mb_substr(preg_replace('/\s+/u', ' ', trim($text)) ?? trim($text), 0, 300, 'UTF-8');
|
||||||
|
|
||||||
|
if ($language === 'no') {
|
||||||
|
return [
|
||||||
|
[
|
||||||
|
'id' => 1,
|
||||||
|
'question' => 'Hvordan skal barnets beste og samvaer vurderes etter norsk rett?',
|
||||||
|
'brief_context' => 'Langt familie- eller barnevernsdokument der modellen ikke identifiserte strukturerte spørsmål. Utdrag: ' . $context,
|
||||||
|
'doc_type' => $docType,
|
||||||
|
'severity_hint' => 'high',
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'id' => 2,
|
||||||
|
'question' => 'Er den sakkyndige vurderingen og bevisgrunnlaget tilstrekkelig for konklusjonene?',
|
||||||
|
'brief_context' => 'Dokumentet ser ut til å inneholde sakkyndige eller faktiske vurderinger som bør testes juridisk.',
|
||||||
|
'doc_type' => $docType,
|
||||||
|
'severity_hint' => 'medium',
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'id' => 3,
|
||||||
|
'question' => 'Er saksbehandling, kontradiksjon og offentlige plikter oppfylt etter norsk rett og EMK?',
|
||||||
|
'brief_context' => 'Lang sak bør vurderes for prosessuelle rettigheter, dokumentasjonsplikt og forholdsmessighet.',
|
||||||
|
'doc_type' => $docType,
|
||||||
|
'severity_hint' => 'medium',
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
[
|
||||||
|
'id' => 1,
|
||||||
|
'question' => 'How should the child best-interests and contact/visitation assessment be reviewed under Norwegian law?',
|
||||||
|
'brief_context' => 'Long family-law or child-welfare document where the model did not return structured issues. Excerpt: ' . $context,
|
||||||
|
'doc_type' => $docType,
|
||||||
|
'severity_hint' => 'high',
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'id' => 2,
|
||||||
|
'question' => 'Is the expert assessment and evidentiary basis sufficient for the conclusions reached?',
|
||||||
|
'brief_context' => 'The document appears to contain expert or factual assessments that require legal testing.',
|
||||||
|
'doc_type' => $docType,
|
||||||
|
'severity_hint' => 'medium',
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'id' => 3,
|
||||||
|
'question' => 'Were procedural fairness, contradiction rights, and public-authority duties satisfied under Norwegian law and ECHR?',
|
||||||
|
'brief_context' => 'A long case file should be checked for procedural rights, documentation duties, and proportionality.',
|
||||||
|
'doc_type' => $docType,
|
||||||
|
'severity_hint' => 'medium',
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pass 2 — single targeted question to dbn-legal-agent-v3 with corpus context.
|
* Pass 2 — single targeted question to dbn-legal-agent-v3 with corpus context.
|
||||||
* Ocelot-only. Capped at 350 tokens / 60s to avoid the documented loop bug.
|
* Ocelot-only. Capped at 350 tokens / 60s to avoid the documented loop bug.
|
||||||
|
|||||||
Reference in New Issue
Block a user