From e977bbb6b3e4ca334efcfae8e5176220da03c632 Mon Sep 17 00:00:00 2001 From: davegilligan Date: Mon, 18 May 2026 19:30:38 +0200 Subject: [PATCH] Add Document Discrepancy Finder tool 8-step NDJSON-streaming pipeline that compares two Barnevernet documents: classifies each doc, extracts parties and timelines, cross-references both for contradictions/deletions/additions, retrieves corpus legal context, and synthesises a full discrepancy report with tabbed UI. New files: DiscrepancyAgent.php, api/discrepancy.php, discrepancy.php, discrepancy.js. Modified: FreeTier.php (cost=4), i18n.php (all 4 langs), tool-svgs.php (DC icon), tools.css (dc-* component styles). Co-Authored-By: Claude Sonnet 4.6 --- api/discrepancy.php | 161 +++++ assets/css/tools.css | 528 +++++++++++++++++ assets/js/discrepancy.js | 882 +++++++++++++++++++++++++++ discrepancy.php | 176 ++++++ includes/DiscrepancyAgent.php | 1047 +++++++++++++++++++++++++++++++++ includes/FreeTier.php | 1 + includes/i18n.php | 19 +- includes/tool-svgs.php | 2 + 8 files changed, 2809 insertions(+), 7 deletions(-) create mode 100644 api/discrepancy.php create mode 100644 assets/js/discrepancy.js create mode 100644 discrepancy.php create mode 100644 includes/DiscrepancyAgent.php diff --git a/api/discrepancy.php b/api/discrepancy.php new file mode 100644 index 0000000..4e8d04b --- /dev/null +++ b/api/discrepancy.php @@ -0,0 +1,161 @@ + 0) { @ob_end_clean(); } +ob_implicit_flush(true); + +header('Content-Type: application/x-ndjson; charset=utf-8'); +header('Cache-Control: no-store'); +header('X-Accel-Buffering: no'); +if ($ftRemaining >= 0) { header('X-Credits-Remaining: ' . $ftRemaining); } + +$language = 'en'; +$startTime = microtime(true); + +$emit = function (string $event, array $payload = []) use ($startTime): void { + $payload['event'] = $event; + $payload['t_ms'] = (int)round((microtime(true) - $startTime) * 1000); + echo json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) . "\n"; + @flush(); +}; + +try { + // Parse payload (always multipart — two files required) + $payloadRaw = (string)($_POST['payload'] ?? ''); + if ($payloadRaw === '') { + throw new DbnToolsHttpException('Missing payload field.', 422, 'missing_payload'); + } + $input = json_decode($payloadRaw, true); + if (!is_array($input)) { + throw new DbnToolsHttpException('Invalid payload JSON.', 422, 'invalid_payload_json'); + } + + $language = dbnToolsNormalizeLanguage($input['language'] ?? 'en'); + $engine = (string)($input['engine'] ?? 'azure_mini'); + $sliceInput = $input['slices'] ?? []; + + // Extract file A + $emit('progress', ['detail' => 'Reading Document A…']); + $fileEntryA = $_FILES['file_a'] ?? null; + if (!$fileEntryA || ($fileEntryA['error'] ?? UPLOAD_ERR_NO_FILE) !== UPLOAD_ERR_OK) { + throw new DbnToolsHttpException( + 'Document A is required. Upload a PDF, DOCX, or TXT file.', + 422, 'missing_file_a' + ); + } + $extractedA = dbnToolsExtractUploadedFile([ + 'name' => $fileEntryA['name'] ?? '', + 'type' => $fileEntryA['type'] ?? '', + 'tmp_name' => $fileEntryA['tmp_name'] ?? '', + 'error' => $fileEntryA['error'] ?? UPLOAD_ERR_NO_FILE, + 'size' => $fileEntryA['size'] ?? 0, + ]); + $fileA = [ + 'filename' => $extractedA['filename'], + 'text' => $extractedA['text'], + 'chars' => $extractedA['chars'], + 'truncated' => $extractedA['truncated'], + ]; + $emit('progress', ['detail' => sprintf('Document A extracted: %s (%d chars%s)', + $extractedA['filename'], $extractedA['chars'], + !empty($extractedA['truncated']) ? ', truncated' : '')]); + + // Extract file B + $emit('progress', ['detail' => 'Reading Document B…']); + $fileEntryB = $_FILES['file_b'] ?? null; + if (!$fileEntryB || ($fileEntryB['error'] ?? UPLOAD_ERR_NO_FILE) !== UPLOAD_ERR_OK) { + throw new DbnToolsHttpException( + 'Document B is required. Upload a PDF, DOCX, or TXT file.', + 422, 'missing_file_b' + ); + } + $extractedB = dbnToolsExtractUploadedFile([ + 'name' => $fileEntryB['name'] ?? '', + 'type' => $fileEntryB['type'] ?? '', + 'tmp_name' => $fileEntryB['tmp_name'] ?? '', + 'error' => $fileEntryB['error'] ?? UPLOAD_ERR_NO_FILE, + 'size' => $fileEntryB['size'] ?? 0, + ]); + $fileB = [ + 'filename' => $extractedB['filename'], + 'text' => $extractedB['text'], + 'chars' => $extractedB['chars'], + 'truncated' => $extractedB['truncated'], + ]; + $emit('progress', ['detail' => sprintf('Document B extracted: %s (%d chars%s)', + $extractedB['filename'], $extractedB['chars'], + !empty($extractedB['truncated']) ? ', truncated' : '')]); + + if (($fileA['text'] ?? '') === '') { + throw new DbnToolsHttpException('Could not extract text from Document A.', 422, 'empty_file_a'); + } + if (($fileB['text'] ?? '') === '') { + throw new DbnToolsHttpException('Could not extract text from Document B.', 422, 'empty_file_b'); + } + + $emit('start', [ + 'engine' => $engine, + 'language' => $language, + 'file_a' => $fileA['filename'], + 'file_b' => $fileB['filename'], + ]); + + $result = (new DbnDiscrepancyAgent())->run( + $fileA, + $fileB, + $engine, + $language, + is_array($sliceInput) ? $sliceInput : [], + $emit + ); + + $result['ok'] = true; + $result['latency_ms'] = (int)round((microtime(true) - $startTime) * 1000); + + dbnToolsLogMetadata([ + 'tool' => 'discrepancy', + 'language' => $language, + 'ok' => true, + 'latency_ms' => $result['latency_ms'], + 'source_count' => (int)($result['trace_metadata']['source_count'] ?? 0), + 'conflict_count' => (int)($result['trace_metadata']['conflict_count'] ?? 0), + 'deleted_count' => (int)($result['trace_metadata']['deleted_count'] ?? 0), + 'added_count' => (int)($result['trace_metadata']['added_count'] ?? 0), + 'deployment' => $result['trace_metadata']['deployment'] ?? null, + ]); + + $emit('final', ['result' => $result]); + +} catch (DbnToolsHttpException $e) { + $latency = (int)round((microtime(true) - $startTime) * 1000); + dbnToolsLogMetadata([ + 'tool' => 'discrepancy', + 'language' => $language, + 'ok' => false, + 'latency_ms' => $latency, + 'error_code' => $e->errorCode, + ]); + $emit('error', ['code' => $e->errorCode, 'message' => $e->getMessage(), 'status' => $e->status]); +} catch (Throwable $e) { + error_log('DBN discrepancy fatal: ' . $e->getMessage()); + $latency = (int)round((microtime(true) - $startTime) * 1000); + dbnToolsLogMetadata([ + 'tool' => 'discrepancy', + 'language' => $language, + 'ok' => false, + 'latency_ms' => $latency, + 'error_code' => 'internal_error', + ]); + $emit('error', ['code' => 'internal_error', 'message' => 'The discrepancy finder could not complete this request.']); +} diff --git a/assets/css/tools.css b/assets/css/tools.css index 692a05a..17e0e7f 100644 --- a/assets/css/tools.css +++ b/assets/css/tools.css @@ -6376,6 +6376,534 @@ body.lt-landing { } } +/* ─── Discrepancy Finder (dc-*) ──────────────────────────────────────────── */ + +/* Two upload zones side by side */ +.dc-upload-pair { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 16px; + margin-bottom: 16px; +} + +.dc-upload-slot { + display: flex; + flex-direction: column; + gap: 6px; +} + +.dc-slot-hint { + font-weight: 400; + color: var(--muted); + font-size: 0.86em; +} + +.dc-zone input[type="file"] { + position: absolute; + width: 0; + height: 0; + opacity: 0; + pointer-events: none; +} + +/* Progressive doc meta cards */ +.dc-doc-meta-pair { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 12px; + margin-bottom: 16px; +} + +.dc-doc-meta-card { + background: var(--panel); + border: 1px solid var(--line); + border-radius: 10px; + padding: 14px 16px; +} + +.dc-doc-meta-card__head { + display: flex; + align-items: center; + gap: 10px; + margin-bottom: 10px; +} + +.dc-slot-label { + font-size: 0.72rem; + font-weight: 800; + letter-spacing: 0.05em; + text-transform: uppercase; + color: var(--muted); + background: #f3f4f6; + border: 1px solid var(--line); + border-radius: 4px; + padding: 2px 7px; +} + +.dc-slot-label--a { background: #e8f0fe; color: #2d5fa6; border-color: #c3d4f8; } +.dc-slot-label--b { background: var(--soft-coral); color: var(--coral); border-color: #f9c6ae; } + +/* Parties preview (stream-time) */ +.dc-parties-preview { + margin-bottom: 10px; +} + +.dc-parties-chips { + display: flex; + flex-wrap: wrap; + gap: 6px; + margin-top: 6px; +} + +.dc-party-chip { + display: inline-flex; + align-items: center; + gap: 4px; + font-size: 0.78rem; + font-weight: 600; + background: var(--soft-teal); + color: var(--teal-dark); + border: 1px solid #b2dbd6; + border-radius: 999px; + padding: 3px 10px; +} + +.dc-party-chip--more { + background: #f3f4f6; + color: var(--muted); + border-color: var(--line); +} + +.dc-parties-count { + font-size: 0.8rem; + color: var(--muted); + margin: 4px 0 0; +} + +/* Timeline preview (stream-time) */ +.dc-timeline-preview { + display: flex; + align-items: center; + gap: 8px; + font-size: 0.82rem; + color: var(--muted); + margin-bottom: 6px; + padding: 6px 10px; + background: #f7f8fb; + border: 1px solid var(--line); + border-radius: 6px; +} + +.dc-timeline-preview strong { + color: var(--ink); +} + +/* ── Tabs ─────────────────────────────────────────────────────────────────── */ + +.dc-tabs { + display: grid; + gap: 0; +} + +.dc-tab-bar { + display: flex; + gap: 2px; + border-bottom: 2px solid var(--line); + margin-bottom: 16px; + overflow-x: auto; +} + +.dc-tab { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 8px 14px; + font-size: 0.84rem; + font-weight: 600; + color: var(--muted); + border: none; + background: none; + cursor: pointer; + border-bottom: 2px solid transparent; + margin-bottom: -2px; + white-space: nowrap; + transition: color 100ms ease, border-color 100ms ease; +} + +.dc-tab:hover { color: var(--ink); } + +.dc-tab.is-active { + color: var(--teal-dark); + border-bottom-color: var(--teal); +} + +.dc-tab-count { + display: inline-flex; + align-items: center; + justify-content: center; + min-width: 18px; + height: 18px; + padding: 0 5px; + border-radius: 999px; + font-size: 0.7rem; + font-weight: 800; + background: var(--line); + color: var(--muted); +} + +.dc-tab.is-active .dc-tab-count { + background: var(--soft-teal); + color: var(--teal-dark); +} + +.dc-tab-panel { display: none; } +.dc-tab-panel.is-active { display: block; } + +/* ── Headline finding ─────────────────────────────────────────────────────── */ + +.dc-headline { + border-left: 4px solid var(--coral); + background: var(--soft-coral); + border-radius: 0 8px 8px 0; + padding: 14px 16px; + margin-bottom: 16px; +} + +.dc-headline__label { + font-size: 0.72rem; + font-weight: 800; + letter-spacing: 0.05em; + text-transform: uppercase; + color: var(--coral); + margin-bottom: 6px; +} + +.dc-headline__text { + font-size: 1.0rem; + font-weight: 700; + color: var(--ink); + line-height: 1.5; + margin: 0; +} + +/* ── Discrepancy list (Summary tab) ──────────────────────────────────────── */ + +.dc-discrepancies { + display: flex; + flex-direction: column; + gap: 10px; +} + +.dc-discrepancy { + background: var(--panel); + border: 1px solid var(--line); + border-radius: 8px; + padding: 14px 16px; +} + +.dc-discrepancy--contradiction { border-left: 3px solid var(--coral); } +.dc-discrepancy--deletion { border-left: 3px solid var(--amber); } +.dc-discrepancy--addition { border-left: 3px solid var(--teal); } +.dc-discrepancy--date_shift { border-left: 3px solid #8b5cf6; } +.dc-discrepancy--changed { border-left: 3px solid var(--amber); } + +.dc-discrepancy__head { + display: flex; + align-items: flex-start; + gap: 10px; + margin-bottom: 10px; + flex-wrap: wrap; +} + +.dc-cat-tag { + display: inline-block; + font-size: 0.68rem; + font-weight: 800; + letter-spacing: 0.05em; + text-transform: uppercase; + border-radius: 4px; + padding: 2px 7px; + white-space: nowrap; +} + +.dc-cat-tag--contradiction { background: var(--soft-coral); color: var(--coral); } +.dc-cat-tag--deletion { background: #fffbeb; color: var(--amber); } +.dc-cat-tag--addition { background: var(--soft-teal); color: var(--teal-dark); } +.dc-cat-tag--date_shift { background: #ede9fe; color: #6d28d9; } +.dc-cat-tag--changed { background: #fffbeb; color: var(--amber); } + +.dc-severity { + display: inline-block; + font-size: 0.68rem; + font-weight: 800; + letter-spacing: 0.04em; + text-transform: uppercase; + border-radius: 999px; + padding: 2px 9px; + white-space: nowrap; +} + +.dc-sev--high { background: var(--soft-coral); color: var(--coral); border: 1px solid #f9c6ae; } +.dc-sev--medium { background: #fffbeb; color: var(--amber); border: 1px solid #fde68a; } +.dc-sev--low { background: #f3f4f6; color: var(--muted); border: 1px solid var(--line); } + +.dc-discrepancy__compare { + display: grid; + grid-template-columns: 1fr auto 1fr; + gap: 8px; + align-items: start; + margin-bottom: 10px; +} + +.dc-compare-col { + background: #f7f8fb; + border: 1px solid var(--line); + border-radius: 6px; + padding: 10px 12px; + font-size: 0.86rem; + line-height: 1.55; + color: var(--ink); +} + +.dc-compare-col--a { + background: #f0f5ff; + border-color: #c3d4f8; +} + +.dc-compare-col--b { + background: #fff5f0; + border-color: #f9c6ae; +} + +.dc-compare-col__label { + font-size: 0.68rem; + font-weight: 800; + letter-spacing: 0.05em; + text-transform: uppercase; + margin-bottom: 5px; +} + +.dc-compare-col--a .dc-compare-col__label { color: #2d5fa6; } +.dc-compare-col--b .dc-compare-col__label { color: var(--coral); } + +.dc-compare-divider { + display: flex; + align-items: center; + justify-content: center; + font-size: 1.2rem; + color: var(--muted); + padding-top: 28px; +} + +.dc-discrepancy__legal { + font-size: 0.82rem; + color: var(--muted); + font-style: italic; + line-height: 1.5; + border-top: 1px solid var(--line); + padding-top: 8px; + margin-top: 4px; +} + +.dc-sig-badge { + display: inline-block; + font-size: 0.68rem; + font-weight: 700; + border-radius: 999px; + padding: 2px 8px; + margin-bottom: 6px; +} + +.dc-sig--high { background: var(--soft-coral); color: var(--coral); } +.dc-sig--medium { background: #fffbeb; color: var(--amber); } +.dc-sig--low { background: #f3f4f6; color: var(--muted); } + +/* ── Parties tab ─────────────────────────────────────────────────────────── */ + +.dc-party-list { + display: flex; + flex-direction: column; + gap: 6px; +} + +.dc-party-row { + display: grid; + grid-template-columns: 1fr 1fr auto; + gap: 8px; + align-items: center; + background: var(--panel); + border: 1px solid var(--line); + border-radius: 6px; + padding: 10px 12px; + border-left: 3px solid var(--line); +} + +.dc-party-row--removed { border-left-color: var(--coral); background: #fff5f0; } +.dc-party-row--added { border-left-color: var(--teal); background: #f0faf8; } +.dc-party-row--changed { border-left-color: var(--amber); background: #fffdf0; } + +.dc-party-row__name { + font-weight: 700; + font-size: 0.88rem; + color: var(--ink); +} + +.dc-party-row__role { + font-size: 0.82rem; + color: var(--muted); +} + +.dc-party-row__sig { + font-size: 0.78rem; + color: var(--muted); + font-style: italic; + line-height: 1.4; + grid-column: 1 / -1; + padding-top: 4px; + border-top: 1px solid var(--line); + margin-top: 4px; +} + +/* ── Timeline tab ────────────────────────────────────────────────────────── */ + +.dc-timeline-list { + display: flex; + flex-direction: column; + gap: 8px; +} + +.dc-tl-item { + background: var(--panel); + border: 1px solid var(--line); + border-radius: 8px; + padding: 12px 14px; + border-left: 3px solid var(--line); +} + +.dc-tl-item--conflict { border-left-color: var(--coral); } +.dc-tl-item--deleted { border-left-color: var(--amber); } +.dc-tl-item--added { border-left-color: var(--teal); } +.dc-tl-item--date_shift { border-left-color: #8b5cf6; } + +.dc-tl-item__head { + display: flex; + align-items: center; + gap: 8px; + margin-bottom: 8px; + flex-wrap: wrap; +} + +.dc-tl-date { + font-family: ui-monospace, "Cascadia Code", "Fira Code", monospace; + font-size: 0.78rem; + font-weight: 700; + color: var(--muted); + background: #f3f4f6; + border: 1px solid var(--line); + border-radius: 4px; + padding: 2px 7px; +} + +.dc-tl-actor { + font-size: 0.78rem; + font-weight: 700; + color: var(--teal-dark); + text-transform: uppercase; + letter-spacing: 0.04em; +} + +.dc-tl-desc { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 8px; + margin-bottom: 8px; +} + +.dc-tl-legal { + font-size: 0.8rem; + color: var(--muted); + font-style: italic; + line-height: 1.4; + border-top: 1px solid var(--line); + padding-top: 7px; +} + +/* ── Narrative blocks ─────────────────────────────────────────────────────── */ + +.dc-narrative-block { + background: var(--panel); + border: 1px solid var(--line); + border-radius: 8px; + padding: 12px 14px; + border-left: 3px solid var(--line); + margin-bottom: 8px; +} + +.dc-narrative-block--added { border-left-color: var(--teal); background: #f0faf8; } +.dc-narrative-block--removed { border-left-color: var(--coral); background: #fff5f0; } + +/* ── Action list ─────────────────────────────────────────────────────────── */ + +.dc-action-list { + display: flex; + flex-direction: column; + gap: 6px; + list-style: none; + margin: 0; + padding: 0; +} + +.dc-action-list li { + display: flex; + align-items: flex-start; + gap: 8px; + font-size: 0.88rem; + color: var(--ink); + line-height: 1.5; + padding: 8px 12px; + background: var(--soft-teal); + border: 1px solid #b2dbd6; + border-radius: 6px; +} + +.dc-action-list li::before { + content: '→'; + color: var(--teal); + font-weight: 800; + flex: 0 0 auto; +} + +/* ── Disclaimer ──────────────────────────────────────────────────────────── */ + +.dc-disclaimer { + font-size: 0.76rem; + color: var(--muted); + font-style: italic; + line-height: 1.5; + padding: 10px 12px; + background: #f7f8fb; + border: 1px solid var(--line); + border-radius: 6px; + margin-top: 16px; +} + +/* ── Responsive ──────────────────────────────────────────────────────────── */ + +@media (max-width: 780px) { + .dc-upload-pair { grid-template-columns: 1fr; } + .dc-doc-meta-pair { grid-template-columns: 1fr; } + .dc-discrepancy__compare { grid-template-columns: 1fr; } + .dc-compare-divider { padding-top: 0; } + .dc-party-row { grid-template-columns: 1fr 1fr; } + .dc-tl-desc { grid-template-columns: 1fr; } +} + +@media (max-width: 520px) { + .dc-tab-bar { gap: 0; } + .dc-tab { padding: 8px 10px; font-size: 0.78rem; } + .dc-party-row { grid-template-columns: 1fr; } +} + /* Print styles */ @media print { .tool-rail, .reasoning-panel, .topbar, .tool-form, diff --git a/assets/js/discrepancy.js b/assets/js/discrepancy.js new file mode 100644 index 0000000..4543e3a --- /dev/null +++ b/assets/js/discrepancy.js @@ -0,0 +1,882 @@ +/* discrepancy.js — page-scoped UI for /discrepancy.php */ +(function () { + 'use strict'; + + const els = {}; + let lang = window.DBN_TOOLS_LANG || localStorage.getItem('dbn-ui-lang') || 'en'; + let fileA = null; + let fileB = null; + let lastResult = null; + + const SLICE_DEFS = [ + { id: 'child_welfare', label: 'Child Welfare' }, + { id: 'echr', label: 'ECHR' }, + { id: 'family_core', label: 'Family Law Core' }, + { id: 'bufdir_guidance', label: 'Bufdir Guidance' }, + { id: 'norwegian_courts', label: 'Norwegian Courts' }, + { id: 'broader_legal', label: 'Broader Legal' }, + ]; + + const STEP_LABELS = [ + 'Classify documents', + 'Extract parties', + 'Build timelines', + 'Cross-reference parties', + 'Cross-reference timelines', + 'Research questions', + 'Retrieve legal context', + 'Synthesize report', + ]; + + const stepKeyToIndex = { + doc_classify: 0, + party_extract: 1, + timeline_extract: 2, + cross_parties: 3, + cross_timelines: 4, + sub_question_gen: 5, + retrieval: 6, + synthesis: 7, + }; + + document.addEventListener('DOMContentLoaded', () => { + if (!document.body.dataset.activeTool || document.body.dataset.activeTool !== 'discrepancy') return; + + Object.assign(els, { + form: document.getElementById('dcForm'), + status: document.getElementById('dcStatus'), + runButton: document.getElementById('dcRunButton'), + results: document.getElementById('dcResults'), + traceList: document.getElementById('traceList'), + langButtons: Array.from(document.querySelectorAll('#dcLangSwitcher .lang-btn')), + engineRadios: Array.from(document.querySelectorAll('input[name="dcEngine"]')), + slices: Array.from(document.querySelectorAll('.adv-slice')), + // File A + zoneA: document.getElementById('dcZoneA'), + inputA: document.getElementById('dcInputA'), + promptA: document.getElementById('dcPromptA'), + fileInfoA: document.getElementById('dcFileInfoA'), + fileNameA: document.getElementById('dcFileNameA'), + clearA: document.getElementById('dcClearA'), + // File B + zoneB: document.getElementById('dcZoneB'), + inputB: document.getElementById('dcInputB'), + promptB: document.getElementById('dcPromptB'), + fileInfoB: document.getElementById('dcFileInfoB'), + fileNameB: document.getElementById('dcFileNameB'), + clearB: document.getElementById('dcClearB'), + // Source modal + modal: document.getElementById('dcSourceModal'), + modalClose: document.getElementById('dcSourceModalClose'), + modalTitle: document.getElementById('dcSourceModalTitle'), + modalEyebrow: document.getElementById('dcSourceModalEyebrow'), + modalMeta: document.getElementById('dcSourceModalMeta'), + modalText: document.getElementById('dcSourceModalText'), + }); + + if (!els.form) return; + + bindLang(); + bindSlices(); + bindUploadZone('A'); + bindUploadZone('B'); + bindModal(); + els.form.addEventListener('submit', onSubmit); + + renderTrace(STEP_LABELS.map((label) => ({ label, detail: 'Waiting…', status: 'idle' }))); + }); + + // ── Language ─────────────────────────────────────────────────────────────── + + function bindLang() { + els.langButtons.forEach((b) => { + b.classList.toggle('is-active', b.dataset.lang === lang); + b.addEventListener('click', () => { + els.langButtons.forEach((x) => x.classList.remove('is-active')); + b.classList.add('is-active'); + lang = b.dataset.lang || 'en'; + localStorage.setItem('dbn-ui-lang', lang); + }); + }); + } + + // ── Corpus slice toggles ─────────────────────────────────────────────────── + + function bindSlices() { + els.slices.forEach((btn) => { + btn.addEventListener('click', () => { + const isOn = btn.classList.toggle('is-on'); + btn.setAttribute('aria-pressed', isOn ? 'true' : 'false'); + const badge = btn.querySelector('.dr-slice__badge'); + if (badge) badge.textContent = isOn ? 'on' : 'off'; + }); + }); + } + + function getSelectedSlices() { + const out = {}; + SLICE_DEFS.forEach((s) => { + const btn = els.slices.find((b) => b.dataset.slice === s.id); + out[s.id] = !!(btn && btn.classList.contains('is-on')); + }); + return out; + } + + // ── File upload zones ────────────────────────────────────────────────────── + + function bindUploadZone(slot) { + const zone = els['zone' + slot]; + const input = els['input' + slot]; + const prompt = els['prompt' + slot]; + const info = els['fileInfo' + slot]; + const nameEl = els['fileName' + slot]; + const clearEl = els['clear' + slot]; + + if (!zone) return; + + const accept = (file) => { + if (!file) return; + if (file.size > 8 * 1024 * 1024) { + setStatus(`${file.name} exceeds the 8 MB limit.`, 'error'); + return; + } + const ext = (file.name.split('.').pop() || '').toLowerCase(); + if (!['pdf', 'docx', 'txt'].includes(ext)) { + setStatus(`${file.name} is not a supported file type (PDF, DOCX, TXT).`, 'error'); + return; + } + if (slot === 'A') fileA = file; + else fileB = file; + nameEl.textContent = file.name; + prompt.classList.add('is-hidden'); + info.classList.remove('is-hidden'); + zone.classList.remove('is-drop'); + setStatus('', ''); + }; + + input.addEventListener('change', (e) => { + if (e.target.files && e.target.files[0]) accept(e.target.files[0]); + }); + zone.addEventListener('dragover', (e) => { e.preventDefault(); zone.classList.add('is-drop'); }); + zone.addEventListener('dragleave', () => zone.classList.remove('is-drop')); + zone.addEventListener('drop', (e) => { + e.preventDefault(); + zone.classList.remove('is-drop'); + const f = e.dataTransfer?.files?.[0]; + if (f) accept(f); + }); + clearEl?.addEventListener('click', () => { + if (slot === 'A') fileA = null; + else fileB = null; + input.value = ''; + info.classList.add('is-hidden'); + prompt.classList.remove('is-hidden'); + }); + } + + // ── Form submission ──────────────────────────────────────────────────────── + + async function onSubmit(e) { + e.preventDefault(); + + if (!fileA) { + setStatus('Upload Document A (the earlier/original document) before running.', 'error'); + return; + } + if (!fileB) { + setStatus('Upload Document B (the later/comparison document) before running.', 'error'); + return; + } + + const engine = (els.engineRadios.find((r) => r.checked) || {}).value || 'azure_mini'; + const slices = getSelectedSlices(); + + const expectedDuration = engine === 'azure_full' ? '2-3 minutes' + : engine === 'gpu' ? '~90 seconds' + : '60-90 seconds'; + + setStatus(`Comparing documents… (${expectedDuration})`, 'busy'); + els.runButton.disabled = true; + els.results.innerHTML = `

Analysing…

Classifying both documents, extracting parties and timelines, then cross-referencing for discrepancies. Expect ${expectedDuration}.

`; + + const stepState = STEP_LABELS.map((label) => ({ label, detail: 'Queued', status: 'idle' })); + renderTrace(stepState); + + const payload = { engine, language: lang, slices }; + const form = new FormData(); + form.append('payload', JSON.stringify(payload)); + form.append('file_a', fileA); + form.append('file_b', fileB); + + let response; + try { + response = await fetch('api/discrepancy.php', { method: 'POST', body: form, credentials: 'same-origin' }); + } catch (err) { + setStatus(`Network error: ${err.message || err}`, 'error'); + els.runButton.disabled = false; + return; + } + + if (!response.ok || !response.body) { + if (response.status === 402 || response.status === 429) { + const d = await response.json().catch(() => ({})); + if (typeof window.dbnFreeTierError === 'function') window.dbnFreeTierError(response.status, d); + } else { + setStatus(`Request failed (${response.status}).`, 'error'); + } + els.runButton.disabled = false; + return; + } + const creditsRemaining = response.headers.get('X-Credits-Remaining'); + if (creditsRemaining !== null && typeof window.dbnUpdateCredits === 'function') { + window.dbnUpdateCredits(parseInt(creditsRemaining, 10)); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder('utf-8'); + let buffer = ''; + let finalResult = null; + let errorEvent = null; + + // State for progressive rendering + let metaARendered = false; + let metaBRendered = false; + let partiesARendered = false; + let partiesBRendered = false; + let tlARendered = false; + let tlBRendered = false; + + function handleStreamEvent(evt) { + if (!evt || !evt.event) return; + + if (evt.event === 'progress') { + if (evt.detail) setStatus(evt.detail, 'busy'); + return; + } + if (evt.event === 'start') { + setStatus(`Comparing ${escapeHtml(evt.file_a || 'A')} ↔ ${escapeHtml(evt.file_b || 'B')}…`, 'busy'); + return; + } + if (evt.event === 'step') { + const idx = stepKeyToIndex[evt.step]; + if (idx !== undefined) { + if (evt.status === 'running' && stepState[idx].status !== 'running') { + stepState[idx] = { label: evt.label || stepState[idx].label, detail: evt.detail || 'Running…', status: 'running' }; + } else if (evt.status !== 'running') { + stepState[idx] = { label: evt.label || stepState[idx].label, detail: evt.detail || stepState[idx].detail, status: evt.status || stepState[idx].status }; + } + renderTrace(stepState); + } + return; + } + if (evt.event === 'doc_a_meta' && !metaARendered) { + renderDocMetaCard('A', evt.result || {}); + metaARendered = true; + return; + } + if (evt.event === 'doc_b_meta' && !metaBRendered) { + renderDocMetaCard('B', evt.result || {}); + metaBRendered = true; + return; + } + if (evt.event === 'parties_a' && !partiesARendered && Array.isArray(evt.parties)) { + renderPartiesPreview('A', evt.parties); + partiesARendered = true; + return; + } + if (evt.event === 'parties_b' && !partiesBRendered && Array.isArray(evt.parties)) { + renderPartiesPreview('B', evt.parties); + partiesBRendered = true; + return; + } + if (evt.event === 'timeline_a' && !tlARendered && Array.isArray(evt.events)) { + renderTimelinePreview('A', evt.events); + tlARendered = true; + return; + } + if (evt.event === 'timeline_b' && !tlBRendered && Array.isArray(evt.events)) { + renderTimelinePreview('B', evt.events); + tlBRendered = true; + return; + } + if (evt.event === 'subq') { + setStatus(`Retrieving ${evt.index}/${evt.total}: ${String(evt.question || '').slice(0, 80)}…`, 'busy'); + return; + } + if (evt.event === 'final') { + finalResult = evt.result; + return; + } + if (evt.event === 'error') { + errorEvent = evt; + return; + } + } + + while (true) { + let chunk; + try { chunk = await reader.read(); } + catch (err) { setStatus(`Stream error: ${err.message || err}`, 'error'); els.runButton.disabled = false; return; } + const { done, value } = chunk; + if (value) { + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop(); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) continue; + let evt; + try { evt = JSON.parse(trimmed); } catch (_) { continue; } + handleStreamEvent(evt); + } + } + if (done) break; + } + + if (errorEvent) { + setStatus(`${errorEvent.code}: ${errorEvent.message}`, 'error'); + els.runButton.disabled = false; + const runningIdx = stepState.findIndex((s) => s.status === 'running'); + if (runningIdx >= 0) { + stepState[runningIdx] = { ...stepState[runningIdx], status: 'error', detail: errorEvent.message }; + renderTrace(stepState); + } + return; + } + + if (!finalResult) { + setStatus('Stream ended without a final result.', 'error'); + els.runButton.disabled = false; + return; + } + + lastResult = finalResult; + const meta = finalResult.trace_metadata || {}; + setStatus( + `Done · ${meta.conflict_count || 0} contradictions · ${meta.deleted_count || 0} deletions · ${meta.added_count || 0} additions · ${meta.source_count || 0} sources`, + 'ok' + ); + els.runButton.disabled = false; + renderTrace(finalResult.trace || []); + renderFinalResults(finalResult); + } + + // ── Progressive rendering ────────────────────────────────────────────────── + + function ensureResultsReady() { + const emptyState = els.results.querySelector('.empty-state'); + if (emptyState) emptyState.remove(); + + // Ensure the doc-meta pair container exists + if (!els.results.querySelector('#dcDocMetaPair')) { + const pair = document.createElement('div'); + pair.id = 'dcDocMetaPair'; + pair.className = 'dc-doc-meta-pair'; + els.results.insertBefore(pair, els.results.firstChild); + } + } + + function renderDocMetaCard(slot, meta) { + ensureResultsReady(); + const pair = els.results.querySelector('#dcDocMetaPair'); + if (!pair) return; + + const existing = pair.querySelector(`#dcMeta${slot}`); + if (existing) existing.remove(); + + const card = document.createElement('div'); + card.id = `dcMeta${slot}`; + card.className = 'dc-doc-meta-card'; + const fields = [ + meta.doc_date ? ['Date', meta.doc_date] : null, + meta.issuing_authority ? ['Authority', meta.issuing_authority] : null, + meta.reference_number ? ['Ref', meta.reference_number] : null, + ].filter(Boolean); + + card.innerHTML = ` +
+ Document ${slot} + ${escapeHtml(meta.doc_type || ('Document ' + slot))} +
+ ${fields.length ? `
+ ${fields.map(([k, v]) => `${escapeHtml(k)}: ${escapeHtml(String(v))}`).join('')} +
` : ''} + `; + pair.appendChild(card); + } + + function renderPartiesPreview(slot, parties) { + if (!parties.length) return; + ensureResultsReady(); + const pair = els.results.querySelector('#dcDocMetaPair'); + if (!pair) return; + const metaCard = pair.querySelector(`#dcMeta${slot}`); + if (!metaCard) return; + + const existing = metaCard.querySelector('.dc-parties-preview'); + if (existing) existing.remove(); + + const preview = document.createElement('div'); + preview.className = 'dc-parties-preview'; + preview.innerHTML = `

${parties.length} party${parties.length === 1 ? '' : 'ies'} identified

+
+ ${parties.slice(0, 6).map((p) => `${escapeHtml(p.name || p.role || '?')}`).join('')} + ${parties.length > 6 ? `+${parties.length - 6} more` : ''} +
`; + metaCard.appendChild(preview); + } + + function renderTimelinePreview(slot, events) { + if (!events.length) return; + ensureResultsReady(); + const pair = els.results.querySelector('#dcDocMetaPair'); + if (!pair) return; + const metaCard = pair.querySelector(`#dcMeta${slot}`); + if (!metaCard) return; + + const existing = metaCard.querySelector('.dc-timeline-preview'); + if (existing) existing.remove(); + + const highCount = events.filter((e) => e.significance === 'high').length; + const preview = document.createElement('div'); + preview.className = 'dc-timeline-preview'; + preview.innerHTML = `

${events.length} events · ${highCount} high-significance

`; + metaCard.appendChild(preview); + } + + // ── Final render ─────────────────────────────────────────────────────────── + + function renderFinalResults(data) { + const sources = data.sources || []; + const discrepancies = Array.isArray(data.critical_discrepancies) ? data.critical_discrepancies : []; + const actions = Array.isArray(data.recommended_actions) ? data.recommended_actions : []; + const uncertain = Array.isArray(data.what_remains_uncertain) ? data.what_remains_uncertain : []; + const partiesDiff = data.parties_diff || {}; + const tlDiff = data.timeline_diff || {}; + const headline = data.headline_finding || ''; + const nameA = data.doc_a_name || 'Document A'; + const nameB = data.doc_b_name || 'Document B'; + + // Remove progressive doc meta pair — we'll re-render from authoritative data + els.results.querySelector('#dcDocMetaPair')?.remove(); + + // Re-render doc meta pair from final data + renderDocMetaCard('A', data.doc_a_meta || {}); + renderDocMetaCard('B', data.doc_b_meta || {}); + if ((data.parties_a || []).length) renderPartiesPreview('A', data.parties_a); + if ((data.parties_b || []).length) renderPartiesPreview('B', data.parties_b); + if ((data.timeline_a || []).length) renderTimelinePreview('A', data.timeline_a); + if ((data.timeline_b || []).length) renderTimelinePreview('B', data.timeline_b); + + // Build tabs + const conflicts = tlDiff.conflicts || []; + const deletedEvents = tlDiff.in_a_only || []; + const addedEvents = tlDiff.in_b_only || []; + const procGaps = tlDiff.procedural_gaps || []; + const narrative = tlDiff.narrative_shifts || {}; + const pRemoved = partiesDiff.in_a_only || []; + const pAdded = partiesDiff.in_b_only || []; + const pChanged = partiesDiff.changed_between || []; + + const totalDiscrepancies = discrepancies.length; + const tabCountStr = (n) => n > 0 ? ` ${n}` : ''; + + const finalHtml = ` + + ${headline ? `
+

Key finding

+

${escapeHtml(headline)}

+
` : ''} + + +
+
+ + + + +
+ + +
+ ${renderDiscrepanciesTab(discrepancies, sources)} + ${actions.length ? `
+

Recommended actions

+
    + ${actions.map((a) => `
  1. ${escapeHtml(String(a))}
  2. `).join('')} +
+
` : ''} + ${narrative.summary ? `
+

Narrative shift

+

${escapeHtml(narrative.summary)}

+ ${(narrative.new_in_b || []).length ? `
+ New in ${escapeHtml(nameB)}: +
    ${(narrative.new_in_b || []).map((s) => `
  • ${escapeHtml(String(s))}
  • `).join('')}
+
` : ''} + ${(narrative.removed_from_b || []).length ? `
+ Removed from ${escapeHtml(nameB)}: +
    ${(narrative.removed_from_b || []).map((s) => `
  • ${escapeHtml(String(s))}
  • `).join('')}
+
` : ''} +
` : ''} + ${uncertain.length ? `
+

What remains uncertain

+
    + ${uncertain.map((u) => `
  • ${escapeHtml(String(u))}
  • `).join('')} +
+
` : ''} +
+ + +
+ ${renderPartiesTab(pRemoved, pAdded, pChanged, nameA, nameB)} +
+ + +
+ ${renderTimelineTab(conflicts, deletedEvents, addedEvents, procGaps, nameA, nameB)} +
+ + +
+ ${renderSourcesTab(sources)} +
+
+ +

${escapeHtml(data.disclaimer || 'For legal information and preparation only — not legal advice. Verify all findings with a qualified lawyer.')}

+ `; + + const finalContainer = document.createElement('div'); + finalContainer.innerHTML = finalHtml; + while (finalContainer.firstChild) { + els.results.appendChild(finalContainer.firstChild); + } + + // Bind tabs + els.results.querySelectorAll('.dc-tab').forEach((btn) => { + btn.addEventListener('click', () => { + const tab = btn.dataset.tab; + els.results.querySelectorAll('.dc-tab').forEach((b) => b.classList.remove('is-active')); + els.results.querySelectorAll('.dc-tab-panel').forEach((p) => p.classList.remove('is-active')); + btn.classList.add('is-active'); + const panel = els.results.querySelector(`.dc-tab-panel[data-panel="${tab}"]`); + if (panel) panel.classList.add('is-active'); + }); + }); + + // Bind source card clicks + els.results.querySelectorAll('.dr-source-card[data-source-n]').forEach((node) => { + node.addEventListener('click', (e) => { + if (e.target.closest('a')) return; + const n = parseInt(node.dataset.sourceN, 10); + const src = sources.find((s) => s.n === n); + if (src) openModal(src); + }); + }); + } + + // ── Tab content renderers ────────────────────────────────────────────────── + + function renderDiscrepanciesTab(discrepancies, sources) { + if (!discrepancies.length) { + return '

No critical discrepancies were identified in the synthesis.

'; + } + const sevClass = (s) => s === 'high' ? 'dc-sev--high' : (s === 'medium' ? 'dc-sev--medium' : 'dc-sev--low'); + const catLabel = (c) => ({ timeline_conflict: 'Timeline', narrative_shift: 'Narrative', party_discrepancy: 'Party', procedural_gap: 'Procedure' }[c] || c); + + return `
+

Critical discrepancies (${discrepancies.length})

+
+ ${discrepancies.map((d) => `
+
+
+ ${escapeHtml(catLabel(d.category || ''))} + ${escapeHtml(d.significance || 'low')} +
+
${escapeHtml(d.title || '')}
+
+
+
+ Document A +

${escapeHtml(d.document_a_says || '—')}

+
+ +
+ Document B +

${escapeHtml(d.document_b_says || '—')}

+
+
+ ${d.legal_relevance ? `` : ''} +
`).join('')} +
+
`; + } + + function renderPartiesTab(removed, added, changed, nameA, nameB) { + if (!removed.length && !added.length && !changed.length) { + return '

No party discrepancies identified between the two documents.

'; + } + let html = '
'; + if (removed.length) { + html += `

Removed from ${escapeHtml(nameB)} (${removed.length})

+
+ ${removed.map((p) => `
+
${escapeHtml(p.name || '?')}
+
${escapeHtml(p.role_in_a || '')}
+ ${p.significance ? `
${escapeHtml(p.significance)}
` : ''} +
`).join('')} +
`; + } + if (added.length) { + html += `

Added in ${escapeHtml(nameB)} (${added.length})

+
+ ${added.map((p) => `
+
${escapeHtml(p.name || '?')}
+
${escapeHtml(p.role_in_b || '')}
+ ${p.significance ? `
${escapeHtml(p.significance)}
` : ''} +
`).join('')} +
`; + } + if (changed.length) { + html += `

Changed between versions (${changed.length})

+
+ ${changed.map((p) => `
+
${escapeHtml(p.name || '?')}
+
${escapeHtml(p.in_a || '')}
+
${escapeHtml(p.in_b || '')}
+ ${p.significance ? `
${escapeHtml(p.significance)}
` : ''} +
`).join('')} +
`; + } + html += '
'; + return html; + } + + function renderTimelineTab(conflicts, deleted, added, procGaps, nameA, nameB) { + if (!conflicts.length && !deleted.length && !added.length && !procGaps.length) { + return '

No timeline discrepancies identified between the two documents.

'; + } + const sigClass = (s) => `dc-sig--${s === 'high' ? 'high' : (s === 'medium' ? 'medium' : 'low')}`; + let html = ''; + + if (conflicts.length) { + html += `
+

Contradictions (${conflicts.length})

+
+ ${conflicts.map((c) => `
+
+ ${escapeHtml(c.significance || 'low')} + ${c.date_a || c.date_b ? `${escapeHtml(c.date_a || '?')} / ${escapeHtml(c.date_b || '?')}` : ''} +
+
+
+ ${escapeHtml(nameA)} +

${escapeHtml(c.doc_a_says || '—')}

+
+ +
+ ${escapeHtml(nameB)} +

${escapeHtml(c.doc_b_says || '—')}

+
+
+ ${c.legal_significance ? `` : ''} +
`).join('')} +
+
`; + } + + if (deleted.length) { + html += `
+

Deleted from ${escapeHtml(nameB)} (${deleted.length})

+
+ ${deleted.map((ev) => `
+
+ ${escapeHtml(ev.significance || 'low')} + ${ev.date ? `${escapeHtml(ev.date)}` : ''} + ${ev.actor ? `${escapeHtml(ev.actor)}` : ''} +
+

${escapeHtml(ev.description || '')}

+ ${ev.legal_significance ? `` : ''} +
`).join('')} +
+
`; + } + + if (added.length) { + html += `
+

New in ${escapeHtml(nameB)} (${added.length})

+
+ ${added.map((ev) => `
+
+ ${escapeHtml(ev.significance || 'low')} + ${ev.date ? `${escapeHtml(ev.date)}` : ''} + ${ev.actor ? `${escapeHtml(ev.actor)}` : ''} +
+

${escapeHtml(ev.description || '')}

+ ${ev.legal_significance ? `` : ''} +
`).join('')} +
+
`; + } + + if (procGaps.length) { + html += `
+

Procedural gaps (${procGaps.length})

+
    + ${procGaps.map((g) => `
  • ${escapeHtml(g.gap || '')} ${escapeHtml(g.significance || 'low')}
  • `).join('')} +
+
`; + } + + return html; + } + + function renderSourcesTab(sources) { + if (!sources.length) { + return '

No corpus sources retrieved. Enable corpus slices and re-run.

'; + } + return `
+
+

Legal context sources (${sources.length})

+ Click a card to expand · external link opens original source +
+
+ ${sources.map((s) => renderSourceCard(s)).join('')} +
+
`; + } + + function renderSourceCard(s) { + const score = s.reranker_score != null ? s.reranker_score : s.similarity; + const link = s.deep_link || s.source_url; + const titleHtml = link + ? `${escapeHtml(s.title || 'Untitled')} ` + : `${escapeHtml(s.title || 'Untitled')}`; + return `
+ ${s.n} +
+
${titleHtml}
+ ${s.section ? `
${escapeHtml(s.section)}
` : ''} +
+ ${escapeHtml(s.package_or_corpus || 'corpus')} + ${s.authority_label ? `${escapeHtml(s.authority_label)}` : ''} + ${(s.matched_sub_questions || []).map((q) => `${escapeHtml(q)}`).join('')} +
+

${escapeHtml(truncate(s.excerpt || '', 240))}

+
+
+ score
${score != null ? Number(score).toFixed(2) : '—'}
+
+
`; + } + + // ── Source modal ─────────────────────────────────────────────────────────── + + function bindModal() { + els.modalClose?.addEventListener('click', closeModal); + els.modal?.addEventListener('click', (e) => { if (e.target === els.modal) closeModal(); }); + document.addEventListener('keydown', (e) => { + if (e.key === 'Escape' && els.modal && !els.modal.classList.contains('is-hidden')) closeModal(); + }); + } + + function closeModal() { els.modal?.classList.add('is-hidden'); } + + function openModal(source) { + if (!source) return; + els.modalEyebrow.textContent = 'Corpus source'; + els.modalTitle.textContent = source.title || 'Source'; + const metaRows = [ + ['Number', `[${source.n}]`], + source.section ? ['Section', source.section] : null, + ['Corpus', source.package_or_corpus || '—'], + source.authority_label ? ['Authority', source.authority_label] : null, + source.similarity != null ? ['Similarity', String(source.similarity)] : null, + source.reranker_score != null ? ['Rerank score', String(source.reranker_score)] : null, + ].filter(Boolean); + els.modalMeta.innerHTML = '
' + metaRows.map(([k, v]) => `
${escapeHtml(k)}
${escapeHtml(String(v))}
`).join('') + '
'; + const chunkText = source.chunk_text || source.excerpt || ''; + let html = chunkText + ? `` + : 'No excerpt available.'; + els.modalText.innerHTML = html; + const toggle = els.modalText.querySelector('.dr-modal-chunk-toggle'); + const div = els.modalText.querySelector('.dr-modal-chunk-text'); + toggle?.addEventListener('click', () => { + const isHidden = div.classList.toggle('is-hidden'); + toggle.textContent = isHidden ? 'Show matching text ▼' : 'Hide matching text ▲'; + }); + els.modal.classList.remove('is-hidden'); + } + + // ── Trace rendering ──────────────────────────────────────────────────────── + + function renderTrace(steps) { + if (!els.traceList) return; + els.traceList.classList.add('is-rich'); + els.traceList.innerHTML = steps.map((step, i) => { + const statusClass = step.status === 'running' ? 'is-running' + : step.status === 'complete' ? 'is-done' + : step.status === 'warning' ? 'is-warning' + : step.status === 'error' ? 'is-error' + : ''; + const marker = step.status === 'complete' ? '✓' + : step.status === 'warning' ? '!' + : step.status === 'error' ? '×' + : (i + 1); + return `
  • + ${marker} +
    + ${escapeHtml(step.label || '')} + ${escapeHtml(step.detail || '')} +
    +
  • `; + }).join(''); + } + + // ── Utility ──────────────────────────────────────────────────────────────── + + function setStatus(message, kind) { + if (!els.status) return; + els.status.textContent = message; + els.status.style.color = kind === 'error' ? '#b41e1e' + : kind === 'ok' ? 'var(--teal-dark)' + : 'var(--muted)'; + } + + function renderInlineCitations(escapedHtml, sources) { + return escapedHtml.replace(/\[(\d+(?:\s*[-,]\s*\d+)*)\]/g, (_, group) => { + const nums = expandCiteGroup(group); + return nums.map((n) => `${n}`).join(''); + }); + } + + function expandCiteGroup(group) { + const out = []; + group.split(',').forEach((part) => { + const range = part.trim().match(/^(\d+)\s*-\s*(\d+)$/); + if (range) { + for (let i = parseInt(range[1], 10); i <= parseInt(range[2], 10); i++) out.push(i); + } else { + const n = parseInt(part.trim(), 10); + if (!Number.isNaN(n)) out.push(n); + } + }); + return Array.from(new Set(out)); + } + + function escapeHtml(s) { + return String(s) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + } + + function truncate(s, n) { + if (!s || s.length <= n) return s || ''; + return s.slice(0, n - 1) + '…'; + } +})(); diff --git a/discrepancy.php b/discrepancy.php new file mode 100644 index 0000000..1452a18 --- /dev/null +++ b/discrepancy.php @@ -0,0 +1,176 @@ + +
    + +
    + + + + +
    + +

    Upload two versions of the same Barnevernet document — or any two related documents — and the agent will find contradictions, deleted facts, new allegations, and party changes between them. Results include corpus-backed legal significance for each discrepancy.

    + + +
    + +
    +

    Document A — Earlier / Original

    +
    + +
    + +

    Drop here or

    +

    PDF, DOCX, TXT — max 8 MB

    +
    + +
    +
    + +
    +

    Document B — Later / Comparison

    +
    + +
    + +

    Drop here or

    +

    PDF, DOCX, TXT — max 8 MB

    +
    + +
    +
    + +
    + +
    + Engine + + + +
    +

    Engine applies to the final synthesis only. Document classification, party extraction, timelines, and cross-referencing always use azure-mini.

    + +
    + Corpus slices (used for legal significance context) +

    The corpus provides legal significance context for each discrepancy found. All four default slices cover the core Barnevernet framework.

    +
    + + + + + + +
    +
    + + +
    + +
    +
    +

    Ready

    +

    Upload two Barnevernet documents, then run. The agent will classify each document, extract parties and timelines, cross-reference them for discrepancies, and produce a corpus-backed legal significance report.

    +

    Typical use: compare the original Bekymringsmelding against the later Vedtak, or compare two versions of a Barnevernet investigation report.

    +
    +
    + + + + + + + + + + + + + + diff --git a/includes/DiscrepancyAgent.php b/includes/DiscrepancyAgent.php new file mode 100644 index 0000000..92eaa97 --- /dev/null +++ b/includes/DiscrepancyAgent.php @@ -0,0 +1,1047 @@ +azure = $azure ?: new DbnAzureOpenAiGateway(); + } + + /** + * @param array $fileA {filename, text, chars, truncated} + * @param array $fileB {filename, text, chars, truncated} + * @param string $engine 'azure_mini'|'azure_full'|'gpu' + * @param string $language 'en'|'no'|'uk'|'pl' + * @param array $sliceSelection Corpus slice toggles + * @param callable|null $emit function(string $event, array $payload): void + */ + public function run( + array $fileA, + array $fileB, + string $engine, + string $language, + array $sliceSelection, + ?callable $emit = null + ): array { + $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini'; + $language = dbnToolsNormalizeUiLanguage($language); + + $textA = mb_substr((string)($fileA['text'] ?? ''), 0, self::MAX_DOC_CHARS, 'UTF-8'); + $textB = mb_substr((string)($fileB['text'] ?? ''), 0, self::MAX_DOC_CHARS, 'UTF-8'); + + if ($textA === '' || $textB === '') { + dbnToolsAbort('Could not extract text from one or both uploaded files.', 422, 'empty_document'); + } + + $nameA = (string)($fileA['filename'] ?? 'Document A'); + $nameB = (string)($fileB['filename'] ?? 'Document B'); + + $client = dbnToolsRequireClient(); + $package = $this->requireFamilyPackage((int)$client['id']); + + dbnToolsBootCaveau(); + $aiPortalRoot = dbnToolsAiPortalRoot(); + require_once $aiPortalRoot . '/platform/includes/dbn_v6.php'; + + $this->stepTimings = []; + $trace = []; + + $emitStep = function (string $stepId, string $label, string $detail, string $status) + use (&$trace, $emit): void { + $trace[] = ['label' => $label, 'detail' => $detail, 'status' => $status]; + if ($emit) { + $emit('step', ['step' => $stepId, 'label' => $label, 'detail' => $detail, 'status' => $status]); + } + }; + $emitRunning = function (string $stepId, string $label, string $detail = 'Running…') use ($emit): void { + if ($emit) { + $emit('step', ['step' => $stepId, 'label' => $label, 'detail' => $detail, 'status' => 'running']); + } + }; + + // ── STEP 1+2: Classify both documents ───────────────────────────────── + $emitRunning('doc_classify', 'Classify documents', "Classifying {$nameA}…"); + $stepStart = microtime(true); + $metaA = $this->classifyDoc($textA, $nameA, $language); + if ($emit) $emit('doc_a_meta', ['result' => $metaA]); + if ($emit) $emit('progress', ['detail' => "Classifying {$nameB}…"]); + $metaB = $this->classifyDoc($textB, $nameB, $language); + if ($emit) $emit('doc_b_meta', ['result' => $metaB]); + $this->stepTimings['doc_classify'] = $this->elapsedMs($stepStart); + $emitStep('doc_classify', 'Classify documents', + sprintf('%s (%s) → %s (%s)', + $metaA['doc_type'] ?? 'Document A', $metaA['doc_date'] ?? '?', + $metaB['doc_type'] ?? 'Document B', $metaB['doc_date'] ?? '?'), + 'complete'); + + // ── STEP 3: Extract parties from both documents ───────────────────────── + $emitRunning('party_extract', 'Extract parties', "Extracting parties from {$nameA}…"); + $stepStart = microtime(true); + $partiesA = $this->extractPartiesDoc($textA, $nameA, $language); + if ($emit) $emit('parties_a', ['parties' => $partiesA]); + if ($emit) $emit('progress', ['detail' => "Extracting parties from {$nameB}…"]); + $partiesB = $this->extractPartiesDoc($textB, $nameB, $language); + if ($emit) $emit('parties_b', ['parties' => $partiesB]); + $this->stepTimings['party_extract'] = $this->elapsedMs($stepStart); + $emitStep('party_extract', 'Extract parties', + sprintf('%d in %s · %d in %s', count($partiesA), $nameA, count($partiesB), $nameB), + 'complete'); + + // ── STEP 4: Build timelines from both documents ───────────────────────── + $emitRunning('timeline_extract', 'Build timelines', "Building timeline from {$nameA}…"); + $stepStart = microtime(true); + $timelineA = $this->extractTimelineDoc($textA, $nameA, $language); + if ($emit) $emit('timeline_a', ['events' => $timelineA]); + if ($emit) $emit('progress', ['detail' => "Building timeline from {$nameB}…"]); + $timelineB = $this->extractTimelineDoc($textB, $nameB, $language); + if ($emit) $emit('timeline_b', ['events' => $timelineB]); + $this->stepTimings['timeline_extract'] = $this->elapsedMs($stepStart); + $emitStep('timeline_extract', 'Build timelines', + sprintf('%d events in %s · %d events in %s', + count($timelineA), $nameA, count($timelineB), $nameB), + 'complete'); + + // ── STEP 5: Cross-reference parties ──────────────────────────────────── + $emitRunning('cross_parties', 'Cross-reference parties', 'Comparing parties across both documents…'); + $stepStart = microtime(true); + $partiesDiff = $this->crossReferenceParties($partiesA, $partiesB, $nameA, $nameB, $language); + if ($emit) $emit('parties_diff', ['result' => $partiesDiff]); + $this->stepTimings['cross_parties'] = $this->elapsedMs($stepStart); + $pRemoved = count($partiesDiff['in_a_only'] ?? []); + $pAdded = count($partiesDiff['in_b_only'] ?? []); + $pChanged = count($partiesDiff['changed_between'] ?? []); + $emitStep('cross_parties', 'Cross-reference parties', + sprintf('%d removed · %d added · %d changed', $pRemoved, $pAdded, $pChanged), + 'complete'); + + // ── STEP 6: Cross-reference timelines ───────────────────────────────── + $emitRunning('cross_timelines', 'Cross-reference timelines', + 'Scanning for contradictions, deletions, and new events…'); + $stepStart = microtime(true); + $timelineDiff = $this->crossReferenceTimelines( + $timelineA, $timelineB, $textA, $textB, $nameA, $nameB, $language + ); + if ($emit) $emit('timeline_diff', ['result' => $timelineDiff]); + $this->stepTimings['cross_timelines'] = $this->elapsedMs($stepStart); + $conflictCount = count($timelineDiff['conflicts'] ?? []); + $deletedCount = count($timelineDiff['in_a_only'] ?? []); + $addedCount = count($timelineDiff['in_b_only'] ?? []); + $emitStep('cross_timelines', 'Cross-reference timelines', + sprintf('%d contradictions · %d deleted events · %d new events', + $conflictCount, $deletedCount, $addedCount), + 'complete'); + + // ── STEP 7: Generate research sub-questions ──────────────────────────── + $emitRunning('sub_question_gen', 'Research questions', + 'Generating legal research questions from discrepancies…'); + $stepStart = microtime(true); + $subQuestions = $this->generateDiscrepancySubQ( + $partiesDiff, $timelineDiff, $metaA, $metaB, $language + ); + $this->stepTimings['sub_question_gen'] = $this->elapsedMs($stepStart); + $emitStep('sub_question_gen', 'Research questions', + sprintf('%d legal research question(s) generated.', count($subQuestions)), + 'complete'); + + // ── STEP 8: Corpus retrieval ──────────────────────────────────────────── + $emitRunning('retrieval', 'Retrieve legal context', + sprintf('Hybrid vector + keyword search across %d question(s)…', count($subQuestions))); + $stepStart = microtime(true); + + $sliceSelectionNormalized = dbnV6NormalizeSliceSelection($sliceSelection); + if (!array_filter($sliceSelectionNormalized)) { + $sliceSelectionNormalized = [ + 'child_welfare' => true, + 'echr' => true, + 'family_core' => true, + 'bufdir_guidance' => true, + ]; + } + + $ragDb = dbnToolsRagDb(); + try { + $sharedDocIds = dbnV6ResolveSelectedDocIds($ragDb, $sliceSelectionNormalized); + } catch (Throwable $e) { + error_log('Discrepancy slice resolve failed: ' . $e->getMessage()); + $sharedDocIds = []; + } + + try { + $rag = new ClientRagPipeline((int)$client['id'], 'http://10.0.1.10:4000', 60); + } catch (Throwable $e) { + dbnToolsAbort('Could not initialise the retrieval pipeline.', 503, 'rag_init_failed'); + } + + $retrievalQueries = $subQuestions ?: [[ + 'id' => 'q1', + 'question' => 'ECHR procedural requirements when Barnevernet changes facts between document versions', + 'rationale' => 'Fallback query', + ]]; + + $rawPool = []; + $retrievalWarnings = 0; + $rawCorpusCount = 0; + + foreach ($retrievalQueries as $idx => $sq) { + if ($emit) { + $emit('subq', [ + 'index' => $idx + 1, + 'total' => count($retrievalQueries), + 'id' => $sq['id'], + 'question' => $sq['question'], + ]); + } + try { + $corpusChunks = $rag->searchAll( + $sq['question'], + 6, + null, + [ + 'search_private' => false, + 'search_shared' => true, + 'package_ids' => [(int)$package['id']], + 'shared_doc_ids' => $sharedDocIds, + 'chunk_limit' => 6, + 'search_method' => 'hybrid', + 'reranker_enabled' => true, + 'include_beta_website' => false, + 'include_primary_website' => false, + ] + ); + } catch (Throwable $e) { + error_log('Discrepancy sub-Q retrieval failed: ' . $e->getMessage()); + $corpusChunks = []; + $retrievalWarnings++; + } + $rawCorpusCount += count($corpusChunks); + foreach ($corpusChunks as $chunk) { + $rawPool[] = $this->normalizeCorpusChunk($chunk, $sq['id']); + } + } + + $merged = $this->mergeAndDedupe($rawPool, self::POOL_CAP); + $this->hydrateSourceUrls($merged); + $numberedSources = $this->numberSources(array_slice($merged, 0, 12)); + $this->stepTimings['retrieval'] = $this->elapsedMs($stepStart); + $retrievalStatus = $retrievalWarnings > 0 ? 'warning' : 'complete'; + $emitStep('retrieval', 'Retrieve legal context', + sprintf('%d sub-Q(s) → %d corpus chunks → %d unique sources.', + count($retrievalQueries), $rawCorpusCount, count($numberedSources)), + $retrievalStatus); + + // ── STEP 9: Synthesis ─────────────────────────────────────────────────── + $engineLabel = match ($engine) { + 'azure_full' => 'Azure gpt-4o', + 'gpu' => 'GPU qwen2.5:14b', + default => 'Azure gpt-4o-mini', + }; + $emitRunning('synthesis', 'Synthesize report', + sprintf('Synthesising discrepancy report with %s…', $engineLabel)); + $stepStart = microtime(true); + $synthesis = $this->synthesize( + $metaA, $metaB, $nameA, $nameB, + $partiesDiff, $timelineDiff, + $numberedSources, $engine, $language + ); + $this->stepTimings['synthesis'] = $this->elapsedMs($stepStart); + $confidence = $this->citationConfidence($numberedSources); + $emitStep('synthesis', 'Synthesize report', + sprintf('Report complete · %d source(s) · %s confidence.', + count($numberedSources), $confidence), + 'complete'); + + $synJson = $synthesis['json']; + return [ + 'tool' => 'discrepancy', + 'language' => $language, + 'doc_a_name' => $nameA, + 'doc_b_name' => $nameB, + 'doc_a_meta' => $metaA, + 'doc_b_meta' => $metaB, + 'parties_a' => $partiesA, + 'parties_b' => $partiesB, + 'timeline_a' => $timelineA, + 'timeline_b' => $timelineB, + 'parties_diff' => $partiesDiff, + 'timeline_diff' => $timelineDiff, + 'headline_finding' => (string)($synJson['headline_finding'] ?? ''), + 'critical_discrepancies' => is_array($synJson['critical_discrepancies'] ?? null) + ? $synJson['critical_discrepancies'] : [], + 'recommended_actions' => is_array($synJson['recommended_actions'] ?? null) + ? $synJson['recommended_actions'] : [], + 'what_remains_uncertain' => is_array($synJson['what_remains_uncertain'] ?? null) + ? $synJson['what_remains_uncertain'] : [], + 'sources' => $numberedSources, + 'sub_questions' => $subQuestions, + 'citation_confidence' => $confidence, + 'trace' => $trace, + 'trace_metadata' => [ + 'source_count' => count($numberedSources), + 'sub_question_count' => count($retrievalQueries), + 'conflict_count' => $conflictCount, + 'deleted_count' => $deletedCount, + 'added_count' => $addedCount, + 'deployment' => $synthesis['deploy_label'], + 'engine_used' => $engine, + 'citation_confidence' => $confidence, + 'elapsed_ms_per_step' => $this->stepTimings, + ], + 'disclaimer' => dbnToolsDisclaimer($language), + ]; + } + + // ── Per-document classification ──────────────────────────────────────────── + + private function classifyDoc(string $docText, string $label, string $language): array + { + $locale = dbnToolsLanguageName($language); + $excerpt = mb_substr($docText, 0, 6000, 'UTF-8'); + + $prompt = << $label, + 'doc_date' => null, + 'issuing_authority' => null, + 'reference_number' => null, + 'child_info' => null, + ]; + try { + $raw = $this->azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 400, 'timeout' => 30]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json)) { + return array_merge($default, array_filter($json, fn($v) => $v !== null && $v !== '')); + } + } catch (Throwable $e) { + error_log('Discrepancy classifyDoc failed (' . $label . '): ' . $e->getMessage()); + } + return $default; + } + + // ── Per-document party extraction ────────────────────────────────────────── + + private function extractPartiesDoc(string $docText, string $label, string $language): array + { + $locale = dbnToolsLanguageName($language); + $excerpt = mb_substr($docText, 0, 20000, 'UTF-8'); + + $prompt = <<azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 2000, 'timeout' => 45]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json) && is_array($json['parties'] ?? null)) { + return array_slice($json['parties'], 0, 20); + } + if (is_array($json) && isset($json[0]['name'])) { + return array_slice($json, 0, 20); + } + } catch (Throwable $e) { + error_log('Discrepancy extractPartiesDoc failed (' . $label . '): ' . $e->getMessage()); + } + return []; + } + + // ── Per-document timeline extraction ─────────────────────────────────────── + + private function extractTimelineDoc(string $docText, string $label, string $language): array + { + $locale = dbnToolsLanguageName($language); + $excerpt = mb_substr($docText, 0, 20000, 'UTF-8'); + + $prompt = <<azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 4000, 'timeout' => 55]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json) && is_array($json['events'] ?? null)) { + return array_slice($json['events'], 0, 40); + } + } catch (Throwable $e) { + error_log('Discrepancy extractTimelineDoc failed (' . $label . '): ' . $e->getMessage()); + } + return []; + } + + // ── Cross-reference: parties ─────────────────────────────────────────────── + + private function crossReferenceParties( + array $partiesA, + array $partiesB, + string $nameA, + string $nameB, + string $language + ): array { + $locale = dbnToolsLanguageName($language); + $partiesAJson = json_encode($partiesA, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT); + $partiesBJson = json_encode($partiesB, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT); + + $prompt = << [], 'in_b_only' => [], 'changed_between' => []]; + try { + $raw = $this->azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 2000, 'timeout' => 50]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json)) { + return array_merge($default, array_intersect_key($json, $default)); + } + } catch (Throwable $e) { + error_log('Discrepancy crossReferenceParties failed: ' . $e->getMessage()); + } + return $default; + } + + // ── Cross-reference: timelines ───────────────────────────────────────────── + + private function crossReferenceTimelines( + array $timelineA, + array $timelineB, + string $textA, + string $textB, + string $nameA, + string $nameB, + string $language + ): array { + $locale = dbnToolsLanguageName($language); + $tlAJson = json_encode(array_slice($timelineA, 0, 30), JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT); + $tlBJson = json_encode(array_slice($timelineB, 0, 30), JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT); + $excerptA = mb_substr($textA, 0, 3000, 'UTF-8'); + $excerptB = mb_substr($textB, 0, 3000, 'UTF-8'); + + $prompt = << [], + 'in_a_only' => [], + 'in_b_only' => [], + 'procedural_gaps' => [], + 'narrative_shifts' => ['summary' => '', 'new_in_b' => [], 'removed_from_b' => []], + ]; + try { + $raw = $this->azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 4000, 'timeout' => 90]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json)) { + return array_merge($default, array_intersect_key($json, $default)); + } + } catch (Throwable $e) { + error_log('Discrepancy crossReferenceTimelines failed: ' . $e->getMessage()); + } + return $default; + } + + // ── Sub-question generation ──────────────────────────────────────────────── + + private function generateDiscrepancySubQ( + array $partiesDiff, + array $timelineDiff, + array $metaA, + array $metaB, + string $language + ): array { + $locale = dbnToolsLanguageName($language); + + $parts = []; + $pRemove = count($partiesDiff['in_a_only'] ?? []); + $pAdd = count($partiesDiff['in_b_only'] ?? []); + $pChange = count($partiesDiff['changed_between'] ?? []); + if ($pRemove) $parts[] = "{$pRemove} parties removed between versions"; + if ($pAdd) $parts[] = "{$pAdd} new parties added in later version"; + if ($pChange) $parts[] = "{$pChange} parties changed between versions"; + + $conflicts = $timelineDiff['conflicts'] ?? []; + $deleted = $timelineDiff['in_a_only'] ?? []; + $added = $timelineDiff['in_b_only'] ?? []; + $procGaps = $timelineDiff['procedural_gaps'] ?? []; + if ($conflicts) $parts[] = count($conflicts) . ' timeline contradictions'; + if ($deleted) $parts[] = count($deleted) . ' events deleted from later version'; + if ($added) $parts[] = count($added) . ' new events added in later version'; + if ($procGaps) $parts[] = count($procGaps) . ' procedural gaps identified'; + + $summary = $parts ? implode(', ', $parts) . '.' : 'Some discrepancies found.'; + $docTypeA = $metaA['doc_type'] ?? 'Document A'; + $docTypeB = $metaB['doc_type'] ?? 'Document B'; + $authA = $metaA['issuing_authority'] ?? 'the municipality'; + + $exampleFacts = ''; + if (!empty($conflicts[0])) { + $c = $conflicts[0]; + $exampleFacts .= "- Contradiction: A says '{$c['doc_a_says']}', B says '{$c['doc_b_says']}'\n"; + } + if (!empty($deleted[0])) { + $exampleFacts .= "- Deleted from B: '{$deleted[0]['description']}'\n"; + } + if (!empty($added[0])) { + $exampleFacts .= "- New in B: '{$added[0]['description']}'\n"; + } + if (!empty($procGaps[0])) { + $exampleFacts .= "- Procedural gap: '{$procGaps[0]['gap']}'\n"; + } + if (!empty(($partiesDiff['changed_between'] ?? [])[0])) { + $pc = $partiesDiff['changed_between'][0]; + $exampleFacts .= "- Party change: {$pc['name']}: '{$pc['in_a']}' → '{$pc['in_b']}'\n"; + } + + $prompt = <<azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.15, 'max_tokens' => 1000, 'timeout' => 40]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json) && is_array($json['sub_questions'] ?? null)) { + $sqs = []; + foreach (array_slice($json['sub_questions'], 0, 5) as $sq) { + if (!empty($sq['id']) && !empty($sq['question'])) { + $sqs[] = [ + 'id' => (string)$sq['id'], + 'question' => (string)$sq['question'], + 'rationale' => (string)($sq['rationale'] ?? ''), + ]; + } + } + if ($sqs) return $sqs; + } + } catch (Throwable $e) { + error_log('Discrepancy generateDiscrepancySubQ failed: ' . $e->getMessage()); + } + + return [ + ['id' => 'q1', 'question' => 'What does ECHR Article 8 require when Barnevernet changes the factual basis of an intervention between document versions?', 'rationale' => 'ECHR procedural fairness'], + ['id' => 'q2', 'question' => 'Under Barnevernloven, can new allegations be introduced after the initial care order application has been filed?', 'rationale' => 'New allegations validity'], + ['id' => 'q3', 'question' => 'What are Barnevernloven documentation requirements for home visits and assessments?', 'rationale' => 'Documentation obligations'], + ['id' => 'q4', 'question' => 'What Bufdir guidance exists on evidence standards and investigation quality for Barnevernet interventions?', 'rationale' => 'Evidence standards'], + ]; + } + + // ── Synthesis ────────────────────────────────────────────────────────────── + + private function synthesize( + array $metaA, + array $metaB, + string $nameA, + string $nameB, + array $partiesDiff, + array $timelineDiff, + array $numberedSources, + string $engine, + string $language + ): array { + $locale = dbnToolsLanguageName($language); + $sourceCount = count($numberedSources); + $deployLabel = match ($engine) { + 'gpu' => 'GPU (cuttlefish)', + 'azure_full' => 'gpt-4o', + default => $this->azure->chatDeployment(), + }; + + if (empty($numberedSources)) { + return [ + 'json' => [ + 'headline_finding' => 'No corpus sources retrieved. Discrepancies were identified but could not be cross-referenced with the legal corpus for legal significance assessment.', + 'critical_discrepancies' => [], + 'recommended_actions' => ['Enable corpus slices (Child Welfare, ECHR, Family Core, Bufdir Guidance) and re-run for legal significance mapping.'], + 'what_remains_uncertain' => ['Legal significance of each discrepancy — re-run with corpus slices enabled.'], + ], + 'deploy_label' => $deployLabel, + ]; + } + + $sourcesContext = []; + foreach ($numberedSources as $s) { + $sourcesContext[] = sprintf( + "[%d] %s%s\n Corpus: %s | Authority: %s\n Excerpt: %s", + $s['n'], + $s['title'], + !empty($s['section']) ? ' — ' . $s['section'] : '', + $s['package_or_corpus'], + $s['authority_label'] ?? ($s['authority_type'] ?? 'n/a'), + $s['excerpt'] + ); + } + $sourcesText = implode("\n\n", $sourcesContext); + + $discrepancyJson = json_encode([ + 'timeline_conflicts' => array_slice($timelineDiff['conflicts'] ?? [], 0, 10), + 'events_deleted_from_b' => array_slice($timelineDiff['in_a_only'] ?? [], 0, 8), + 'events_added_in_b' => array_slice($timelineDiff['in_b_only'] ?? [], 0, 8), + 'procedural_gaps' => array_slice($timelineDiff['procedural_gaps'] ?? [], 0, 5), + 'narrative_shifts' => $timelineDiff['narrative_shifts'] ?? [], + 'parties_removed' => $partiesDiff['in_a_only'] ?? [], + 'parties_added' => $partiesDiff['in_b_only'] ?? [], + 'parties_changed' => $partiesDiff['changed_between'] ?? [], + ], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT); + + $docTypeA = $metaA['doc_type'] ?? $nameA; + $docDateA = $metaA['doc_date'] ?? '?'; + $docTypeB = $metaB['doc_type'] ?? $nameB; + $docDateB = $metaB['doc_date'] ?? '?'; + $authority = $metaA['issuing_authority'] ?? $metaB['issuing_authority'] ?? 'the authority'; + + $prompt = << 'system', 'content' => $sysPrompt], + ['role' => 'user', 'content' => $prompt], + ]; + $opts = ['json' => true, 'temperature' => 0.15, 'max_tokens' => 4000, 'timeout' => 240]; + + $raw = ''; + try { + if ($engine === 'gpu') { + $response = dbnToolsCallGpuLlm($messages, $opts); + $raw = (string)($response['choices'][0]['message']['content'] ?? ''); + } elseif ($engine === 'azure_full') { + $raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts); + } else { + $raw = $this->azure->chatText($messages, $opts); + } + } catch (Throwable $e) { + dbnToolsAbort('Synthesis LLM request failed: ' . $e->getMessage(), 502, 'llm_error'); + } + + $json = $this->azure->decodeJsonObject($raw); + if (!is_array($json) || empty($json['headline_finding'])) { + $json = [ + 'headline_finding' => $raw, + 'critical_discrepancies' => [], + 'recommended_actions' => [], + 'what_remains_uncertain' => [], + ]; + } + return ['json' => $json, 'deploy_label' => $deployLabel]; + } + + // ── Corpus helpers ───────────────────────────────────────────────────────── + + private function normalizeCorpusChunk(array $chunk, string $subQId): array + { + return [ + 'chunk_id' => isset($chunk['id']) ? (int)$chunk['id'] : null, + 'title' => (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source'), + 'section' => $chunk['section_title'] ?? null, + 'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Legal'), + 'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620), + 'chunk_text' => (string)($chunk['content'] ?? ''), + 'similarity' => isset($chunk['similarity']) ? round((float)$chunk['similarity'], 4) : null, + 'reranker_score' => isset($chunk['reranker_score']) ? round((float)$chunk['reranker_score'], 4) : null, + 'document_id' => isset($chunk['document_id']) ? (int)$chunk['document_id'] : null, + 'source_origin' => 'corpus', + 'authority_type' => $chunk['authority_type'] ?? null, + 'jurisdiction' => $chunk['jurisdiction'] ?? null, + 'source_url' => null, + 'deep_link' => null, + 'authority_label' => null, + 'matched_sub_questions' => [$subQId], + ]; + } + + private function mergeAndDedupe(array $rawPool, int $cap): array + { + $byKey = []; + foreach ($rawPool as $chunk) { + $key = 'corpus:' . ($chunk['chunk_id'] ?? bin2hex(random_bytes(4))); + if (!isset($byKey[$key])) { + $byKey[$key] = $chunk; + continue; + } + $existing = $byKey[$key]; + $existing['matched_sub_questions'] = array_values(array_unique(array_merge( + $existing['matched_sub_questions'] ?? [], + $chunk['matched_sub_questions'] ?? [] + ))); + if (($chunk['reranker_score'] ?? 0) > ($existing['reranker_score'] ?? 0)) { + $existing['reranker_score'] = $chunk['reranker_score']; + } + if (($chunk['similarity'] ?? 0) > ($existing['similarity'] ?? 0)) { + $existing['similarity'] = $chunk['similarity']; + } + $byKey[$key] = $existing; + } + $merged = array_values($byKey); + usort($merged, function (array $a, array $b): int { + $aScore = $a['reranker_score'] ?? $a['similarity'] ?? 0; + $bScore = $b['reranker_score'] ?? $b['similarity'] ?? 0; + return $bScore <=> $aScore; + }); + return array_slice($merged, 0, $cap); + } + + private function numberSources(array $chunks): array + { + $out = []; + foreach ($chunks as $i => $c) { + $c['n'] = $i + 1; + $out[] = $c; + } + return $out; + } + + private function citationConfidence(array $sources): string + { + if (!$sources) return 'low'; + $scores = array_values(array_filter(array_map( + fn(array $s) => $s['reranker_score'] ?? $s['similarity'] ?? null, + $sources + ), 'is_numeric')); + $best = $scores ? max($scores) : 0; + if (count($sources) >= 5 && $best >= 0.5) return 'high'; + if (count($sources) >= 3 && $best >= 0.35) return 'medium'; + return 'low'; + } + + private function hydrateSourceUrls(array &$pool): void + { + $docIds = []; + foreach ($pool as $chunk) { + $docId = (int)($chunk['document_id'] ?? 0); + if ($docId > 0) $docIds[$docId] = true; + } + if (empty($docIds)) return; + try { + $ragDb = dbnToolsRagDb(); + $ids = array_keys($docIds); + $ph = implode(',', array_fill(0, count($ids), '?')); + $stmt = $ragDb->prepare( + "SELECT d.id, d.source_url, d.authority_type, d.publication_date, d.source_id, d.title + FROM documents d WHERE d.id IN ({$ph})" + ); + $stmt->execute($ids); + $docMeta = []; + $sourceIds = []; + foreach ($stmt as $row) { + $dId = (int)$row['id']; + $sid = isset($row['source_id']) ? (int)$row['source_id'] : null; + if ($sid) $sourceIds[] = $sid; + $docMeta[$dId] = [ + 'source_url' => $row['source_url'] ?? null, + 'authority_label' => dbnV6AuthorityLabel($row['authority_type'] ?? null), + 'publication_date' => $row['publication_date'] ?? null, + 'source_id' => $sid, + ]; + } + if ($sourceIds) { + $uSids = array_values(array_unique($sourceIds)); + $sPh = implode(',', array_fill(0, count($uSids), '?')); + $sStmt = dbnToolsDb()->prepare("SELECT id, name FROM corpus_sources WHERE id IN ({$sPh})"); + $sStmt->execute($uSids); + $srcNames = []; + foreach ($sStmt as $row) { + $srcNames[(int)$row['id']] = dbnV6RepairText((string)($row['name'] ?? 'Do Better Legal')); + } + foreach ($docMeta as &$m) { + if ($m['source_id'] && isset($srcNames[$m['source_id']])) { + $m['corpus_source_name'] = $srcNames[$m['source_id']]; + } + } + unset($m); + } + } catch (Throwable $e) { + error_log('Discrepancy hydrateSourceUrls failed: ' . $e->getMessage()); + return; + } + foreach ($pool as &$chunk) { + $docId = (int)($chunk['document_id'] ?? 0); + if (!$docId || !isset($docMeta[$docId])) continue; + $m = $docMeta[$docId]; + $chunk['source_url'] = $m['source_url'] ?? null; + $chunk['deep_link'] = $m['source_url'] ?? null; + $chunk['authority_label'] = $m['authority_label'] ?? $chunk['authority_label']; + $chunk['corpus_source_name'] = $m['corpus_source_name'] ?? null; + $chunk['publication_date'] = $m['publication_date'] ?? null; + } + unset($chunk); + } + + private function requireFamilyPackage(int $clientId): array + { + $package = dbnToolsFetchPackage('family-legal'); + if (!$package || empty($package['is_active'])) { + dbnToolsAbort('The family-legal corpus package is not active.', 503, 'package_unavailable'); + } + if (!dbnToolsHasActiveSubscription($clientId, (int)$package['id'])) { + dbnToolsAbort('Do Better Norge does not have an active family-legal subscription.', 503, 'subscription_missing'); + } + return $package; + } + + private function elapsedMs(float $start): int + { + return (int)round((microtime(true) - $start) * 1000); + } +} diff --git a/includes/FreeTier.php b/includes/FreeTier.php index 1832f07..9bf38a1 100644 --- a/includes/FreeTier.php +++ b/includes/FreeTier.php @@ -25,6 +25,7 @@ final class FreeTier 'advocate' => 3, 'deep-research' => 5, 'transcribe' => 2, // flat rate; actual duration unknown upfront + 'discrepancy' => 4, // 2 docs × 4 extraction steps + cross-ref + synthesis ]; /** Credit cost for a given tool slug. Returns 1 for unknown tools. */ diff --git a/includes/i18n.php b/includes/i18n.php index d08c7e1..fb03937 100644 --- a/includes/i18n.php +++ b/includes/i18n.php @@ -444,6 +444,7 @@ function dbnToolsLaunchedTools(?string $language = null): array 'barnevernet' => ['BVJ Analyzer', 'Barnevernet documents', 'Analyze child-welfare documents from your perspective with procedural red flags and citations.', 'Document + RAG'], 'advocate' => ['Advocate', 'Partisan brief', 'Choose who you represent and generate a source-grounded brief for that position.', 'ECHR + Lovdata'], 'deep-research' => ['Deep Research', 'Agent + RAG', 'Expand a question into research angles, search legal slices, and synthesize a cited brief.', 'Family-legal'], + 'discrepancy' => ['Discrepancy Finder', 'Document comparison', 'Upload two versions of a Barnevernet document and find contradictions, deleted facts, and new allegations.', 'Cross-document AI'], 'corpus' => ['Corpus', 'Legal knowledge base', 'Inspect indexed sources, corpus health, legal categories, and retrieval behavior.', '~220 K passages'], ], 'no' => [ @@ -453,6 +454,7 @@ function dbnToolsLaunchedTools(?string $language = null): array 'barnevernet' => ['BVJ-analyse', 'Barnevernsdokumenter', 'Analyser barnevernsdokumenter fra ditt perspektiv med prosessuelle røde flagg og kilder.', 'Dokument + RAG'], 'advocate' => ['Advokatmodus', 'Partsinnlegg', 'Velg hvem du representerer og lag et kildebelagt innlegg for den posisjonen.', 'EMD + Lovdata'], 'deep-research' => ['Dyp research', 'Agent + RAG', 'Utvid et spørsmål til forskningsvinkler, søk juridiske kilder og lag et kildebelagt notat.', 'Familierett'], + 'discrepancy' => ['Avviksfinner', 'Dokumentsammenligning', 'Last opp to versjoner av et barneverndokument og finn motsigelser, slettede fakta og nye påstander.', 'Kryssdokument AI'], 'corpus' => ['Korpus', 'Juridisk kunnskapsbase', 'Se indekserte kilder, korpushelse, juridiske kategorier og søkeoppsett.', '~220 K utdrag'], ], 'uk' => [ @@ -462,6 +464,7 @@ function dbnToolsLaunchedTools(?string $language = null): array 'barnevernet' => ['BVJ аналізатор', 'Документи Barnevernet', 'Аналізуйте документи захисту дітей з вашої позиції, з процесуальними ризиками та джерелами.', 'Документ + RAG'], 'advocate' => ['Адвокат', 'Позиційний бриф', 'Оберіть, кого представляєте, і створіть бриф із джерелами на підтримку цієї позиції.', 'ЄСПЛ + Lovdata'], 'deep-research' => ['Глибоке дослідження', 'Agent + RAG', 'Розгортає питання в дослідницькі напрями, шукає юридичні джерела та створює бриф.', 'Сімейне право'], + 'discrepancy' => ['Пошук розбіжностей', 'Порівняння документів', 'Завантажте дві версії документа Barnevernet і знайдіть суперечності, видалені факти та нові твердження.', 'Міждокументний AI'], 'corpus' => ['Корпус', 'Юридична база знань', 'Переглядайте індексовані джерела, стан корпусу, категорії та поведінку пошуку.', '~220 тис. уривків'], ], 'pl' => [ @@ -471,20 +474,22 @@ function dbnToolsLaunchedTools(?string $language = null): array 'barnevernet' => ['Analizator BVJ', 'Dokumenty Barnevernet', 'Analizuj dokumenty opieki nad dziećmi z Twojej perspektywy, z ryzykami proceduralnymi i źródłami.', 'Dokument + RAG'], 'advocate' => ['Adwokat', 'Stronniczy brief', 'Wybierz, kogo reprezentujesz, i wygeneruj brief oparty na źródłach dla tej pozycji.', 'ETPC + Lovdata'], 'deep-research' => ['Głębokie badanie', 'Agent + RAG', 'Rozwija pytanie w kierunki badawcze, przeszukuje źródła prawne i tworzy brief z cytatami.', 'Prawo rodzinne'], + 'discrepancy' => ['Wyszukiwacz rozbieżności', 'Porównanie dokumentów', 'Prześlij dwie wersje dokumentu Barnevernet i znajdź sprzeczności, usunięte fakty i nowe zarzuty.', 'AI Między-dokumentowe'], 'corpus' => ['Korpus', 'Prawna baza wiedzy', 'Sprawdzaj indeksowane źródła, stan korpusu, kategorie prawne i działanie wyszukiwania.', '~220 tys. fragmentów'], ], ]; $selected = $copy[$language] ?? $copy['en']; - $order = ['transcribe', 'timeline', 'redact', 'barnevernet', 'advocate', 'deep-research', 'corpus']; + $order = ['transcribe', 'timeline', 'redact', 'barnevernet', 'advocate', 'deep-research', 'discrepancy', 'corpus']; $icons = [ - 'transcribe' => 'TR', - 'timeline' => 'TL', - 'redact' => 'RX', - 'barnevernet' => 'BVJ', - 'advocate' => 'ADV', + 'transcribe' => 'TR', + 'timeline' => 'TL', + 'redact' => 'RX', + 'barnevernet' => 'BVJ', + 'advocate' => 'ADV', 'deep-research' => 'DR', - 'corpus' => 'KB', + 'discrepancy' => 'DC', + 'corpus' => 'KB', ]; $out = []; foreach ($order as $slug) { diff --git a/includes/tool-svgs.php b/includes/tool-svgs.php index 45a2c5c..54a5d02 100644 --- a/includes/tool-svgs.php +++ b/includes/tool-svgs.php @@ -16,5 +16,7 @@ $toolSvgs = [ 'deep-research' => '', + 'discrepancy' => '', + 'corpus' => '', ];