personaPrompt = $prompt !== '' ? $prompt : null; return $this; } public function withPersonaSlug(?string $slug): self { $slug = is_string($slug) ? trim($slug) : ''; if ($slug !== '') { $this->personaSlug = $slug; } return $this; } public function __construct() { // On Azure: gpt-4o-mini for extraction/synthesis. On Bedrock: factory picks Haiku/Sonnet. $this->azureMini = DbnGatewayFactory::bedrockEnabled() ? DbnGatewayFactory::makeForTool('legal-analysis') : (new DbnAzureOpenAiGateway())->withDeployment('gpt-4o-mini'); $this->legalSvc = new DbnLegalToolsService(); } /** * Pass 1 — extract distinct legal issues. Azure-only. * * @return array */ private function extractIssuesFromSingleChunk(string $text, string $language, string $docType): array { $locale = dbnToolsLanguageName($language); $text = mb_substr($text, 0, 24000, 'UTF-8'); // keep prompt within 4o-mini context $prompt = <<", "brief_context": "<≤2 sentences in {$locale} summarising what in the document triggered this question — paraphrase, do not quote in Norwegian unless quoting a statute>", "doc_type": "", "severity_hint": "" } ] } Rules: - Skip non-legal observations (logistics, social commentary, opinions). - Each question should be answerable with citations to barnevernsloven, EMK Art. X, named Høyesterett/EMD cases — NOT general advice. - If the document has fewer than 5 real legal issues, return fewer entries. - If NO real legal issue exists, return {"issues": []}. - The source document may be in Norwegian — that is fine; still write your output in {$locale}. DOCUMENT: --- {$text} --- PROMPT; $raw = $this->azureMini->chatText( [ ['role' => 'system', 'content' => 'You return valid JSON only. No prose, no fences.'], ['role' => 'user', 'content' => $prompt], ], ['json' => true, 'temperature' => 0.1, 'max_tokens' => 1500, 'timeout' => 90] ); $decoded = $this->azureMini->decodeJsonObject($raw); $issues = is_array($decoded['issues'] ?? null) ? $decoded['issues'] : []; $clean = []; $id = 1; foreach ($issues as $issue) { $question = trim((string)($issue['question'] ?? '')); if ($question === '' || mb_strlen($question, 'UTF-8') < 10) { continue; } $clean[] = [ 'id' => $id++, 'question' => mb_substr($question, 0, 280, 'UTF-8'), 'brief_context' => mb_substr(trim((string)($issue['brief_context'] ?? '')), 0, 400, 'UTF-8'), 'doc_type' => (string)($issue['doc_type'] ?? $docType), 'severity_hint' => in_array($issue['severity_hint'] ?? '', ['high','medium','low'], true) ? $issue['severity_hint'] : 'medium', ]; if (count($clean) >= self::MAX_ISSUES) { break; } } return $clean; } /** * Pass 1 - extract distinct legal issues from representative document windows. * * @return array */ public function extractIssues(string $text, string $language, string $docType): array { $text = trim($text); if ($text === '') { return []; } $allIssues = []; foreach ($this->issueExtractionChunks($text) as $chunk) { try { $chunkIssues = $this->extractIssuesFromSingleChunk((string)$chunk['text'], $language, $docType); } catch (Throwable $e) { error_log('legal-analysis issue extraction failed for ' . (string)$chunk['label'] . ': ' . $e->getMessage()); $chunkIssues = []; } foreach ($chunkIssues as $issue) { $this->appendUniqueIssue($allIssues, $issue); if (count($allIssues) >= self::MAX_ISSUES) { break 2; } } } if (!$allIssues && $this->looksLikeSubstantiveFamilyLawDocument($text, $docType)) { $allIssues = $this->fallbackLegalIssues($language, $docType, $text); } foreach ($allIssues as $idx => &$issue) { $issue['id'] = $idx + 1; } unset($issue); return array_slice($allIssues, 0, self::MAX_ISSUES); } /** * @return array */ private function issueExtractionChunks(string $text): array { $len = mb_strlen($text, 'UTF-8'); $window = 24000; if ($len <= $window + 4000) { return [[ 'label' => 'full document', 'text' => $text, 'offset' => 0, 'score' => 0, ]]; } $chunks = []; $add = function (string $label, int $offset, int $score = 0) use (&$chunks, $text, $len, $window): void { $offset = max(0, min($offset, max(0, $len - $window))); foreach ($chunks as $existing) { if (abs((int)$existing['offset'] - $offset) < 6000) { return; } } $chunks[] = [ 'label' => $label, 'text' => mb_substr($text, $offset, $window, 'UTF-8'), 'offset' => $offset, 'score' => $score, ]; }; $add('beginning of document', 0, 1); $add('middle of document', (int)floor(($len - $window) / 2), 1); $add('end of document', $len - $window, 1); $keywords = [ 'samvaer', 'samvær', 'omsorg', 'barnevern', 'sakkyndig', 'risiko', 'tilknytning', 'rus', 'vold', 'emk', 'barnets beste', 'foreldre', 'bekymring', 'kontakt', 'plassering', 'fylkesnemnd', 'retten', ]; $candidates = []; for ($offset = 0; $offset < $len; $offset += 10000) { $chunk = mb_substr($text, $offset, $window, 'UTF-8'); if ($chunk === '') { break; } $lower = mb_strtolower($chunk, 'UTF-8'); $score = 0; foreach ($keywords as $kw) { $score += substr_count($lower, mb_strtolower($kw, 'UTF-8')); } if ($score > 0) { $candidates[] = ['offset' => $offset, 'score' => $score]; } if ($offset + $window >= $len) { break; } } usort($candidates, static fn(array $a, array $b): int => ($b['score'] <=> $a['score'])); foreach (array_slice($candidates, 0, 4) as $candidate) { $add('keyword-heavy legal section', (int)$candidate['offset'], (int)$candidate['score']); if (count($chunks) >= 6) { break; } } usort($chunks, static function (array $a, array $b): int { if ($a['score'] !== $b['score']) { return $b['score'] <=> $a['score']; } return $a['offset'] <=> $b['offset']; }); return array_slice($chunks, 0, 6); } /** * @param array $issues * @param array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string} $candidate */ private function appendUniqueIssue(array &$issues, array $candidate): void { $candidateKey = $this->issueDedupeKey((string)$candidate['question']); foreach ($issues as $existing) { $existingKey = $this->issueDedupeKey((string)$existing['question']); if ($candidateKey === $existingKey || $this->issueSimilarity($candidateKey, $existingKey) >= 0.58) { return; } } $issues[] = $candidate; } private function issueDedupeKey(string $question): string { $question = mb_strtolower($question, 'UTF-8'); $question = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $question) ?? $question; $question = preg_replace('/\s+/u', ' ', trim($question)) ?? $question; return $question; } private function issueSimilarity(string $a, string $b): float { $aWords = array_values(array_unique(array_filter(preg_split('/\s+/u', $a) ?: [], static fn($w) => mb_strlen($w, 'UTF-8') > 3))); $bWords = array_values(array_unique(array_filter(preg_split('/\s+/u', $b) ?: [], static fn($w) => mb_strlen($w, 'UTF-8') > 3))); if (!$aWords || !$bWords) { return 0.0; } $intersection = count(array_intersect($aWords, $bWords)); $union = count(array_unique(array_merge($aWords, $bWords))); return $union > 0 ? $intersection / $union : 0.0; } private function looksLikeSubstantiveFamilyLawDocument(string $text, string $docType): bool { if (mb_strlen($text, 'UTF-8') < 8000) { return false; } if (in_array($docType, ['barnevernet', 'adopsjon', 'emergency', 'samvær', 'fylkesnemnd'], true)) { return true; } $lower = mb_strtolower($text, 'UTF-8'); $hits = 0; foreach (['sakkyndig', 'barnevern', 'barnets beste', 'samvær', 'samvaer', 'omsorg', 'tilknytning', 'emk', 'fylkesnemnd'] as $kw) { if (str_contains($lower, mb_strtolower($kw, 'UTF-8'))) { $hits++; } } return $hits >= 2; } /** * @return array */ private function fallbackLegalIssues(string $language, string $docType, string $text): array { $context = mb_substr(preg_replace('/\s+/u', ' ', trim($text)) ?? trim($text), 0, 300, 'UTF-8'); if ($language === 'no') { return [ [ 'id' => 1, 'question' => 'Hvordan skal barnets beste og samvaer vurderes etter norsk rett?', 'brief_context' => 'Langt familie- eller barnevernsdokument der modellen ikke identifiserte strukturerte spørsmål. Utdrag: ' . $context, 'doc_type' => $docType, 'severity_hint' => 'high', ], [ 'id' => 2, 'question' => 'Er den sakkyndige vurderingen og bevisgrunnlaget tilstrekkelig for konklusjonene?', 'brief_context' => 'Dokumentet ser ut til å inneholde sakkyndige eller faktiske vurderinger som bør testes juridisk.', 'doc_type' => $docType, 'severity_hint' => 'medium', ], [ 'id' => 3, 'question' => 'Er saksbehandling, kontradiksjon og offentlige plikter oppfylt etter norsk rett og EMK?', 'brief_context' => 'Lang sak bør vurderes for prosessuelle rettigheter, dokumentasjonsplikt og forholdsmessighet.', 'doc_type' => $docType, 'severity_hint' => 'medium', ], ]; } return [ [ 'id' => 1, 'question' => 'How should the child best-interests and contact/visitation assessment be reviewed under Norwegian law?', 'brief_context' => 'Long family-law or child-welfare document where the model did not return structured issues. Excerpt: ' . $context, 'doc_type' => $docType, 'severity_hint' => 'high', ], [ 'id' => 2, 'question' => 'Is the expert assessment and evidentiary basis sufficient for the conclusions reached?', 'brief_context' => 'The document appears to contain expert or factual assessments that require legal testing.', 'doc_type' => $docType, 'severity_hint' => 'medium', ], [ 'id' => 3, 'question' => 'Were procedural fairness, contradiction rights, and public-authority duties satisfied under Norwegian law and ECHR?', 'brief_context' => 'A long case file should be checked for procedural rights, documentation duties, and proportionality.', 'doc_type' => $docType, 'severity_hint' => 'medium', ], ]; } /** * Pass 2 — single targeted question to dbn-legal-agent-v3 with corpus context. * Ocelot-only. Capped at 350 tokens / 60s to avoid the documented loop bug. * * @param array{id:int,question:string,brief_context:string,doc_type:string,severity_hint:string} $issue * @return array{id:int,question:string,answer:string,severity:string,legal_basis:string,citations_from_corpus:array,what_to_check:string,brief_context:string} */ public function answerIssue(array $issue, string $corpusContext, string $language): array { $locale = dbnToolsLanguageName($language); // Base persona prompt comes from the resolved chat profile (default = // Child-welfare/barnevern). The fine-tune was trained primarily in // Norwegian; the Norwegian system prompt keeps its precision on // barnevernsloven / EMD. We then add a language-coercion line so the // prose comes back in the user's chosen language. Statute and case names // stay in their original Norwegian form. $sysMsg = ($this->personaPrompt ?? 'Du er en ekspert på norsk barnevernsloven og EMD-praksis. ' . 'Bruk korrekt juridisk terminologi. ' . 'Bruk terskler fra barnevernsloven 2021: § 4-25 krever «klar nødvendighet». ' . 'Strand Lobben mot Norge (37283/13) setter krav om rehabiliteringsplan før adopsjon. ' . 'Aldri oppfinn paragrafnumre, saksnumre eller dommernavn.') . ' Avslutt med en «Kilder:»-seksjon som lister lovparagrafer og dommer du har sitert. '; if ($language === 'no') { $sysMsg .= 'Svar på norsk.'; } else { $sysMsg .= 'IMPORTANT: Write your answer in ' . $locale . '. Keep all Norwegian statute references (e.g. "barnevernsloven § 4-25", ' . '"forvaltningsloven § 17", "EMK Art. 8") and case names (e.g. "Strand Lobben ' . 'mot Norge 37283/13") in their original Norwegian/Latin form. The "Kilder:" ' . 'section heading stays as "Kilder:" but its contents (the cited authorities) ' . 'are listed in their original Norwegian form.'; } $userMsg = $issue['question']; if ($issue['brief_context'] !== '') { $ctxLabel = match ($language) { 'no' => 'Kontekst fra saken', 'pl' => 'Kontekst sprawy', 'uk' => 'Контекст справи', default => 'Case context', }; $userMsg .= "\n\n" . $ctxLabel . ': ' . $issue['brief_context']; } if ($corpusContext !== '') { $product = dbnToolsProductName(); $srcLabel = match ($language) { 'no' => 'Relevante kilder fra ' . $product . '-korpuset', 'pl' => 'Istotne źródła z korpusu ' . $product, 'uk' => 'Релевантні джерела з корпусу ' . $product, default => 'Relevant sources from the ' . $product . ' corpus', }; $userMsg .= "\n\n" . $srcLabel . ":\n" . $corpusContext; } $answer = ''; $error = null; try { $response = dbnToolsCallGpuLlm( [ ['role' => 'system', 'content' => $sysMsg], ['role' => 'user', 'content' => $userMsg], ], [ 'model' => self::LEGAL_MODEL, 'temperature' => 0.1, 'max_tokens' => self::LEGAL_MAX_TOKENS, 'timeout' => self::LEGAL_TIMEOUT, ] ); $answer = trim((string)($response['choices'][0]['message']['content'] ?? '')); } catch (Throwable $e) { $error = $e->getMessage(); } $clean = dbnToolsExtractCleanAnswer($answer); if (mb_strlen($clean, 'UTF-8') < 30) { $clean = $answer !== '' ? $answer : ($error !== null ? "[Modellfeil: $error]" : '[Modellen returnerte ingen brukbar tekst.]'); } $severity = $clean !== '' ? dbnToolsInferCheckSeverity($clean) : $issue['severity_hint']; $legalBasis = dbnToolsExtractCheckLegalBasis($clean); $whatToCheck = match ($language) { 'no' => 'Verifiser med norsk familieretsadvokat før handling.', 'pl' => 'Zweryfikuj z norweskim adwokatem ds. rodzinnych przed podjęciem działań.', 'uk' => 'Перевірте з норвезьким адвокатом із сімейного права перед діями.', default => 'Verify with a qualified Norwegian family-law lawyer before acting.', }; return [ 'id' => $issue['id'], 'question' => $issue['question'], 'brief_context' => $issue['brief_context'], 'answer' => $clean, 'severity' => $severity, 'legal_basis' => $legalBasis, 'citations_from_corpus' => [], 'what_to_check' => $whatToCheck, ]; } /** * Pass 3 — synthesise overall assessment. Azure-only. */ public function synthesise(array $issues, string $language, string $docType): array { $locale = dbnToolsLanguageName($language); $bullets = []; foreach ($issues as $i) { $bullets[] = sprintf( "- [%s] %s\n Svar: %s", strtoupper((string)$i['severity']), $i['question'], mb_substr((string)$i['answer'], 0, 600, 'UTF-8') ); } $issuesBlock = implode("\n", $bullets); $disclaimerText = match ($language) { 'no' => 'Dette er automatisert juridisk analyse, ikke juridisk rådgivning. Verifiser med en kvalifisert norsk advokat før du handler.', 'pl' => 'To jest zautomatyzowana analiza prawna, a nie porada prawna. Zweryfikuj z wykwalifikowanym norweskim prawnikiem przed podjęciem działań.', 'uk' => 'Це автоматизований юридичний аналіз, а не юридична консультація. Перевірте з кваліфікованим норвезьким юристом перед діями.', default => 'This is automated legal analysis, not legal advice. Verify with a qualified Norwegian lawyer before acting.', }; $prompt = <<", "next_steps": ["", "", ""], "disclaimer": "{$disclaimerText}" } PROMPT; try { $raw = $this->azureMini->chatText( [ ['role' => 'system', 'content' => 'You return valid JSON only. No prose, no fences.'], ['role' => 'user', 'content' => $prompt], ], ['json' => true, 'temperature' => 0.2, 'max_tokens' => 700, 'timeout' => 60] ); $decoded = $this->azureMini->decodeJsonObject($raw); if (is_array($decoded) && !empty($decoded['overall_assessment'])) { return [ 'overall_assessment' => (string)$decoded['overall_assessment'], 'next_steps' => is_array($decoded['next_steps'] ?? null) ? array_slice($decoded['next_steps'], 0, 5) : [], 'disclaimer' => (string)($decoded['disclaimer'] ?? 'Automated analysis — not legal advice.'), ]; } } catch (Throwable $e) { error_log('legal-analysis synthesis failed: ' . $e->getMessage()); } return [ 'overall_assessment' => 'Synthesis step did not return structured output. See individual issue answers below.', 'next_steps' => [], 'disclaimer' => 'Automated analysis — not legal advice. Verify with a qualified Norwegian lawyer.', ]; } /** * Full orchestrated run. Emits progress events via the $emit callable. * * @param callable $emit (string $event, array $payload): void */ public function runFullAnalysis(string $text, string $language, string $docType, callable $emit): array { $startMs = (int)round(microtime(true) * 1000); // Pass 1 $emit('progress', ['step' => 'extracting_issues', 'detail' => 'Identifying distinct legal issues…']); $issues = $this->extractIssues($text, $language, $docType); if (empty($issues)) { $emptyAssessment = match ($language) { 'no' => 'Ingen distinkte juridiske spørsmål identifisert i dette dokumentet.', 'pl' => 'Nie zidentyfikowano odrębnych kwestii prawnych w tym dokumencie.', 'uk' => 'У цьому документі не виявлено окремих юридичних питань.', default => 'No discrete legal issues identified in this document.', }; $emptyDisclaimer = match ($language) { 'no' => 'Automatisert analyse — ikke juridisk rådgivning.', 'pl' => 'Analiza zautomatyzowana — nie stanowi porady prawnej.', 'uk' => 'Автоматизований аналіз — не є юридичною консультацією.', default => 'Automated analysis — not legal advice.', }; return [ 'ok' => true, 'issues' => [], 'overall_assessment' => $emptyAssessment, 'next_steps' => [], 'disclaimer' => $emptyDisclaimer, 'model' => self::LEGAL_MODEL, 'latency_ms' => (int)round(microtime(true) * 1000) - $startMs, ]; } $emit('progress', [ 'step' => 'issues_extracted', 'detail' => sprintf('Found %d legal issue(s); asking specialist…', count($issues)), 'issues' => array_map(fn($i) => ['id' => $i['id'], 'question' => $i['question'], 'severity_hint' => $i['severity_hint']], $issues), ]); // Pass 2 — one issue at a time $answered = []; foreach ($issues as $issue) { $emit('progress', [ 'step' => 'issue_searching_corpus', 'detail' => sprintf('Issue %d: searching legal corpus…', $issue['id']), 'issue_id' => $issue['id'], ]); $corpusQuery = $issue['question'] . "\n" . $issue['brief_context']; $corpusContext = $this->legalSvc->corpusContextForSummarize($corpusQuery, 3); $emit('progress', [ 'step' => 'issue_answering', 'detail' => sprintf('Issue %d: asking dbn-legal-agent-v3…', $issue['id']), 'issue_id' => $issue['id'], ]); $answer = $this->answerIssue($issue, $corpusContext, $language); $answered[] = $answer; $emit('issue_answered', ['issue' => $answer]); } // Pass 3 $emit('progress', ['step' => 'synthesising', 'detail' => 'Synthesising overall assessment…']); $synth = $this->synthesise($answered, $language, $docType); // Post-synthesis legal check — dbn-legal-agent-v3 validates the overall assessment $legalCheck = []; try { $legalCheck = dbnToolsRunLegalCheck( mb_strimwidth($synth['overall_assessment'], 0, 800), $docType, $this->personaPrompt, $this->personaSlug ); } catch (Throwable) {} return [ 'ok' => true, 'issues' => $answered, 'overall_assessment' => $synth['overall_assessment'], 'next_steps' => $synth['next_steps'], 'disclaimer' => $synth['disclaimer'], 'doc_type' => $docType, 'model' => self::LEGAL_MODEL, 'legal_check' => $legalCheck, 'latency_ms' => (int)round(microtime(true) * 1000) - $startMs, ]; } }