diff --git a/api/barnevernet.php b/api/barnevernet.php new file mode 100644 index 0000000..ac1b4e0 --- /dev/null +++ b/api/barnevernet.php @@ -0,0 +1,161 @@ + 0) { @ob_end_clean(); } +ob_implicit_flush(true); + +header('Content-Type: application/x-ndjson; charset=utf-8'); +header('Cache-Control: no-store'); +header('X-Accel-Buffering: no'); + +$language = 'en'; +$startTime = microtime(true); + +$emit = function (string $event, array $payload = []) use ($startTime): void { + $payload['event'] = $event; + $payload['t_ms'] = (int)round((microtime(true) - $startTime) * 1000); + echo json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) . "\n"; + @flush(); +}; + +try { + $isMultipart = stripos((string)($_SERVER['CONTENT_TYPE'] ?? ''), 'multipart/form-data') !== false; + if ($isMultipart) { + $payloadRaw = (string)($_POST['payload'] ?? ''); + if ($payloadRaw === '') { + throw new DbnToolsHttpException('Multipart request missing payload.', 422, 'missing_payload'); + } + $input = json_decode($payloadRaw, true); + if (!is_array($input)) { + throw new DbnToolsHttpException('Invalid payload JSON.', 422, 'invalid_payload_json'); + } + } else { + $raw = file_get_contents('php://input'); + if ($raw === false || strlen($raw) > 120000) { + throw new DbnToolsHttpException('Request body unreadable or too large.', 413, 'body_too_large'); + } + $input = json_decode((string)$raw, true); + if (!is_array($input)) { + throw new DbnToolsHttpException('Request body must be valid JSON.', 400, 'invalid_json'); + } + } + + $language = dbnToolsNormalizeLanguage($input['language'] ?? 'en'); + $advocateRole = trim((string)($input['advocate_role'] ?? '')); + $engine = (string)($input['engine'] ?? 'azure_mini'); + $sliceInput = $input['slices'] ?? []; + $controls = is_array($input['controls'] ?? null) ? $input['controls'] : []; + $additionalNotes = mb_substr(trim((string)($input['additional_notes'] ?? '')), 0, 2000, 'UTF-8'); + + if (mb_strlen($advocateRole, 'UTF-8') > 200) { + throw new DbnToolsHttpException('advocate_role is too long.', 422, 'advocate_role_too_long'); + } + if (mb_strlen($additionalNotes, 'UTF-8') > 2000) { + throw new DbnToolsHttpException('additional_notes is too long.', 422, 'notes_too_long'); + } + + $emit('progress', ['detail' => 'Reading upload(s)…']); + + $uploadedFiles = []; + if (!empty($_FILES['files']) && is_array($_FILES['files']['tmp_name'] ?? null)) { + $count = count($_FILES['files']['tmp_name']); + if ($count > 5) { + throw new DbnToolsHttpException('At most 5 files can be uploaded per request.', 413, 'too_many_files'); + } + for ($i = 0; $i < $count; $i++) { + $file = [ + 'name' => $_FILES['files']['name'][$i] ?? '', + 'type' => $_FILES['files']['type'][$i] ?? '', + 'tmp_name' => $_FILES['files']['tmp_name'][$i] ?? '', + 'error' => $_FILES['files']['error'][$i] ?? UPLOAD_ERR_NO_FILE, + 'size' => $_FILES['files']['size'][$i] ?? 0, + ]; + $extracted = dbnToolsExtractUploadedFile($file); + $uploadedFiles[] = [ + 'filename' => $extracted['filename'], + 'text' => $extracted['text'], + 'chars' => $extracted['chars'], + 'truncated' => $extracted['truncated'], + ]; + $emit('progress', [ + 'detail' => sprintf('Extracted %s (%d chars%s)', + $extracted['filename'], + $extracted['chars'], + !empty($extracted['truncated']) ? ', truncated' : '' + ), + ]); + } + } + + if (empty($uploadedFiles)) { + throw new DbnToolsHttpException( + 'Upload at least one BVJ document (PDF, DOCX, or TXT) before running the analyzer.', + 422, 'no_uploads' + ); + } + + $emit('start', [ + 'engine' => $engine, + 'language' => $language, + 'file_count' => count($uploadedFiles), + ]); + + $result = (new DbnBvjAnalyzerAgent())->run( + $uploadedFiles, + $advocateRole, + $engine, + $language, + is_array($sliceInput) ? $sliceInput : [], + $controls, + $additionalNotes, + $emit + ); + + $result['ok'] = true; + $result['latency_ms'] = (int)round((microtime(true) - $startTime) * 1000); + + dbnToolsLogMetadata([ + 'tool' => 'bvj_analyzer', + 'language' => $language, + 'ok' => true, + 'latency_ms' => $result['latency_ms'], + 'chunk_count' => (int)($result['trace_metadata']['chunk_count'] ?? 0), + 'source_count' => (int)($result['trace_metadata']['source_count'] ?? 0), + 'deployment' => $result['trace_metadata']['deployment'] ?? null, + 'advocate_role' => $advocateRole !== '' ? $advocateRole : null, + 'bvj_doc_type' => $result['doc_meta']['doc_type'] ?? null, + ]); + + $emit('final', ['result' => $result]); + +} catch (DbnToolsHttpException $e) { + $latency = (int)round((microtime(true) - $startTime) * 1000); + dbnToolsLogMetadata([ + 'tool' => 'bvj_analyzer', + 'language' => $language, + 'ok' => false, + 'latency_ms' => $latency, + 'error_code' => $e->errorCode, + ]); + $emit('error', ['code' => $e->errorCode, 'message' => $e->getMessage(), 'status' => $e->status]); +} catch (Throwable $e) { + error_log('DBN BVJ analyzer fatal: ' . $e->getMessage()); + $latency = (int)round((microtime(true) - $startTime) * 1000); + dbnToolsLogMetadata([ + 'tool' => 'bvj_analyzer', + 'language' => $language, + 'ok' => false, + 'latency_ms' => $latency, + 'error_code' => 'internal_error', + ]); + $emit('error', ['code' => 'internal_error', 'message' => 'The analyzer could not complete this request.']); +} diff --git a/assets/css/tools.css b/assets/css/tools.css index 6e385a0..7140b01 100644 --- a/assets/css/tools.css +++ b/assets/css/tools.css @@ -3376,3 +3376,339 @@ a.dr-source-title-link:hover { color: var(--muted); white-space: pre-wrap; } + +/* ============================================================ + BVJ Analyzer — document meta, parties, timeline, red flags + ============================================================ */ + +.bvj-doc-meta { + background: var(--panel); + border: 1px solid var(--line); + border-radius: 10px; + padding: 16px 20px; + margin-bottom: 20px; +} + +.bvj-doc-meta__head { + display: flex; + align-items: center; + gap: 12px; + margin-bottom: 12px; +} + +.bvj-doc-meta__authority { + font-weight: 700; + color: var(--ink); + font-size: 1rem; +} + +.bvj-doc-type-badge { + display: inline-block; + font-size: 0.72rem; + font-weight: 700; + letter-spacing: 0.04em; + text-transform: uppercase; + background: var(--soft-teal); + color: var(--teal-dark); + border: 1px solid #b2dbd6; + border-radius: 20px; + padding: 2px 10px; +} + +.bvj-doc-meta__fields { + display: flex; + flex-wrap: wrap; + gap: 8px 20px; +} + +.bvj-doc-meta__field { + font-size: 0.84rem; + color: var(--muted); +} + +.bvj-doc-meta__field strong { + color: var(--ink); + font-weight: 600; +} + +/* --- Parties grid --- */ + +.bvj-parties-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); + gap: 12px; + margin-bottom: 20px; +} + +.bvj-party-card { + background: var(--panel); + border: 1px solid var(--line); + border-radius: 8px; + padding: 12px 14px; +} + +.bvj-party-card__name { + font-weight: 700; + font-size: 0.92rem; + color: var(--ink); + margin: 0 0 4px; +} + +.bvj-party-card__org { + font-size: 0.8rem; + color: var(--muted); + margin: 0 0 6px; +} + +.bvj-party-card__rel { + font-size: 0.8rem; + font-style: italic; + color: var(--muted); + margin: 0 0 8px; +} + +.bvj-party-role { + display: inline-block; + font-size: 0.72rem; + font-weight: 700; + letter-spacing: 0.03em; + border-radius: 20px; + padding: 2px 9px; +} + +.bvj-party-role--bvv { + background: #e8f0fe; + color: #2d5fa6; + border: 1px solid #c3d4f8; +} + +.bvj-party-role--parent { + background: var(--soft-teal); + color: var(--teal-dark); + border: 1px solid #b2dbd6; +} + +.bvj-party-role--child { + background: #ecfdf5; + color: #166534; + border: 1px solid #bbf7d0; +} + +.bvj-party-role--third { + background: #f3f4f6; + color: #4b5563; + border: 1px solid var(--line); +} + +.bvj-party-role--other { + background: #fafafa; + color: var(--muted); + border: 1px solid var(--line); +} + +/* --- Timeline --- */ + +.bvj-timeline-wrap { + display: flex; + flex-direction: column; + gap: 4px; + margin-bottom: 20px; +} + +.bvj-timeline-event { + display: grid; + grid-template-columns: 130px 1fr; + border-left: 3px solid var(--line); + border-radius: 0 6px 6px 0; + overflow: hidden; +} + +.bvj-timeline-event--high { border-left-color: var(--coral); } +.bvj-timeline-event--medium { border-left-color: var(--amber); } +.bvj-timeline-event--low { border-left-color: var(--line); } + +.bvj-timeline-date { + font-family: ui-monospace, "Cascadia Code", "Fira Code", monospace; + font-size: 0.78rem; + color: var(--muted); + background: #f3f4f6; + border-right: 1px solid var(--line); + padding: 10px 12px; + display: flex; + flex-direction: column; + justify-content: flex-start; + gap: 2px; +} + +.bvj-timeline-time { + font-size: 0.72rem; + color: var(--muted); + opacity: 0.8; +} + +.bvj-timeline-body { + background: var(--panel); + padding: 10px 14px; + display: flex; + flex-direction: column; + gap: 2px; +} + +.bvj-timeline-actor { + font-size: 0.78rem; + font-weight: 700; + color: var(--teal-dark); + text-transform: uppercase; + letter-spacing: 0.04em; +} + +.bvj-timeline-action { + font-size: 0.88rem; + color: var(--ink); + line-height: 1.5; +} + +/* --- Red flags --- */ + +.bvj-red-flags { + display: flex; + flex-direction: column; + gap: 10px; + margin-bottom: 20px; +} + +.bvj-red-flag { + background: var(--panel); + border: 1px solid var(--line); + border-radius: 8px; + padding: 14px 16px; +} + +.bvj-red-flag--high { border-left: 3px solid var(--coral); } +.bvj-red-flag--medium { border-left: 3px solid var(--amber); } +.bvj-red-flag--low { border-left: 3px solid var(--line); } + +.bvj-red-flag__head { + display: flex; + align-items: flex-start; + gap: 10px; + margin-bottom: 8px; +} + +.bvj-red-flag__desc { + font-weight: 700; + font-size: 0.9rem; + color: var(--ink); + flex: 1; + margin: 0; +} + +.bvj-red-flag__legal { + display: inline-block; + font-size: 0.75rem; + font-weight: 600; + background: var(--soft-teal); + color: var(--teal-dark); + border: 1px solid #b2dbd6; + border-radius: 20px; + padding: 2px 9px; + white-space: nowrap; + margin-top: 2px; +} + +.bvj-red-flag__details { + margin-top: 6px; +} + +.bvj-red-flag__details summary { + font-size: 0.8rem; + color: var(--teal); + cursor: pointer; + user-select: none; +} + +.bvj-red-flag__check { + font-size: 0.84rem; + font-style: italic; + color: var(--muted); + margin: 6px 0 0; + line-height: 1.5; +} + +/* --- Severity badges --- */ + +.bvj-severity { + display: inline-block; + font-size: 0.72rem; + font-weight: 700; + letter-spacing: 0.04em; + text-transform: uppercase; + border-radius: 20px; + padding: 2px 9px; + white-space: nowrap; +} + +.bvj-severity-high { + background: var(--soft-coral); + color: var(--coral); + border: 1px solid #f9c6ae; +} + +.bvj-severity-medium { + background: #fffbeb; + color: var(--amber); + border: 1px solid #fde68a; +} + +.bvj-severity-low { + background: #f3f4f6; + color: var(--muted); + border: 1px solid var(--line); +} + +/* --- BVJ result banner (role + doc type header) --- */ + +.bvj-banner { + display: flex; + align-items: center; + gap: 12px; + padding: 10px 16px; + background: var(--soft-teal); + border: 1px solid #b2dbd6; + border-radius: 8px; + margin-bottom: 16px; +} + +.bvj-banner__label { + font-size: 0.8rem; + color: var(--muted); + text-transform: uppercase; + letter-spacing: 0.04em; + font-weight: 600; +} + +.bvj-banner__role { + font-size: 0.88rem; + font-weight: 700; + color: var(--teal-dark); +} + +/* --- [DOC] citation inline marker --- */ + +.dr-cite--doc { + display: inline-flex; + align-items: center; + justify-content: center; + font-size: 0.7rem; + font-weight: 700; + background: #e8f0fe; + color: #2d5fa6; + border: 1px solid #c3d4f8; + border-radius: 4px; + padding: 1px 5px; + margin: 0 1px; + cursor: default; + vertical-align: baseline; + position: relative; + top: -1px; +} diff --git a/assets/js/barnevernet.js b/assets/js/barnevernet.js new file mode 100644 index 0000000..4cddcda --- /dev/null +++ b/assets/js/barnevernet.js @@ -0,0 +1,1005 @@ +/* barnevernet.js — page-scoped UI for /barnevernet.php */ +(function () { + 'use strict'; + + const els = {}; + let lang = 'en'; + let uploadFiles = []; + let lastResult = null; + let branchContext = null; + + const SLICE_DEFS = [ + { id: 'child_welfare', label: 'Child Welfare' }, + { id: 'echr', label: 'ECHR' }, + { id: 'family_core', label: 'Family Law Core' }, + { id: 'bufdir_guidance', label: 'Bufdir Guidance' }, + { id: 'norwegian_courts', label: 'Norwegian Courts' }, + { id: 'hague', label: 'Hague Convention' }, + { id: 'broader_legal', label: 'Broader Legal Support' }, + { id: 'dbn_resources', label: 'DBN Resources' }, + ]; + + const STEP_LABELS = [ + 'Document classification', + 'Party extraction', + 'Timeline extraction', + 'Sub-question generation', + 'Corpus retrieval', + 'Synthesis', + 'Citation confidence', + ]; + + const stepKeyToIndex = { + doc_classify: 0, + party_extract: 1, + timeline_extract: 2, + sub_question_gen: 3, + slice_resolution: 3, // shown under sub-question gen phase + upload_indexing: 4, + retrieval: 4, + synthesis: 5, + confidence: 6, + }; + + document.addEventListener('DOMContentLoaded', () => { + if (!document.body.dataset.activeTool || document.body.dataset.activeTool !== 'barnevernet') return; + + Object.assign(els, { + form: document.getElementById('bvjForm'), + notes: document.getElementById('bvjNotes'), + status: document.getElementById('bvjStatus'), + runButton: document.getElementById('bvjRunButton'), + results: document.getElementById('bvjResults'), + traceList: document.getElementById('traceList'), + roleSelect: document.getElementById('bvjRoleSelect'), + roleCustom: document.getElementById('bvjRoleCustom'), + slices: Array.from(document.querySelectorAll('.adv-slice')), + langButtons: Array.from(document.querySelectorAll('#bvjLangSwitcher .lang-btn')), + engineRadios: Array.from(document.querySelectorAll('input[name="bvjEngine"]')), + subQ: document.getElementById('bvjSubQ'), + subQVal: document.getElementById('bvjSubQValue'), + chunkLimit: document.getElementById('bvjChunkLimit'), + chunkLimitVal: document.getElementById('bvjChunkLimitValue'), + sim: document.getElementById('bvjSim'), + simVal: document.getElementById('bvjSimValue'), + topK: document.getElementById('bvjTopK'), + topKVal: document.getElementById('bvjTopKValue'), + temp: document.getElementById('bvjTemp'), + tempVal: document.getElementById('bvjTempValue'), + uploadZone: document.getElementById('bvjUploadZone'), + uploadInput: document.getElementById('bvjUploadInput'), + uploadPrompt: document.getElementById('bvjUploadPrompt'), + uploadFileInfo: document.getElementById('bvjUploadFileInfo'), + uploadFileList: document.getElementById('bvjUploadFileList'), + uploadClear: document.getElementById('bvjUploadClear'), + modal: document.getElementById('bvjSourceModal'), + modalClose: document.getElementById('bvjSourceModalClose'), + modalTitle: document.getElementById('bvjSourceModalTitle'), + modalEyebrow: document.getElementById('bvjSourceModalEyebrow'), + modalMeta: document.getElementById('bvjSourceModalMeta'), + modalText: document.getElementById('bvjSourceModalText'), + branchPanel: document.getElementById('bvjBranchPanel'), + branchClear: document.getElementById('bvjBranchClear'), + branchOrigin: document.getElementById('bvjBranchOrigin'), + branchSummary: document.getElementById('bvjBranchSummary'), + branchNotes: document.getElementById('bvjBranchNotes'), + }); + + if (!els.form) return; + + bindRole(); + bindSlices(); + bindLang(); + bindRanges(); + bindUpload(); + bindModal(); + bindBranch(); + els.form.addEventListener('submit', onSubmit); + els.results.addEventListener('click', (e) => { + const btn = e.target.closest('.dr-branch-btn'); + if (btn) branchFromSubQ(btn.dataset.question || ''); + }); + + renderTrace(STEP_LABELS.map((label) => ({ label, detail: 'Waiting…', status: 'idle' }))); + }); + + // ── Role binding ─────────────────────────────────────────────────────────── + + function bindRole() { + if (!els.roleSelect) return; + els.roleSelect.addEventListener('change', () => { + const isOther = els.roleSelect.value === '__other__'; + els.roleCustom.classList.toggle('is-hidden', !isOther); + if (isOther) els.roleCustom.focus(); + }); + } + + function getAdvocateRole() { + if (!els.roleSelect) return ''; + if (els.roleSelect.value === '__other__') { + return (els.roleCustom ? els.roleCustom.value.trim() : ''); + } + return els.roleSelect.value; + } + + // ── Corpus slice toggles ─────────────────────────────────────────────────── + + function bindSlices() { + els.slices.forEach((btn) => { + btn.addEventListener('click', () => { + const isOn = btn.classList.toggle('is-on'); + btn.setAttribute('aria-pressed', isOn ? 'true' : 'false'); + const badge = btn.querySelector('.dr-slice__badge'); + if (badge) badge.textContent = isOn ? 'on' : 'off'; + }); + }); + } + + function getSelectedSlices() { + const out = {}; + SLICE_DEFS.forEach((s) => { + const btn = els.slices.find((b) => b.dataset.slice === s.id); + out[s.id] = !!(btn && btn.classList.contains('is-on')); + }); + return out; + } + + // ── Language ─────────────────────────────────────────────────────────────── + + function bindLang() { + els.langButtons.forEach((b) => { + b.addEventListener('click', () => { + els.langButtons.forEach((x) => x.classList.remove('is-active')); + b.classList.add('is-active'); + lang = b.dataset.lang || 'en'; + }); + }); + } + + // ── Range controls ───────────────────────────────────────────────────────── + + function bindRanges() { + const pairs = [ + [els.subQ, els.subQVal, (v) => v], + [els.chunkLimit, els.chunkLimitVal, (v) => v], + [els.sim, els.simVal, (v) => Number(v).toFixed(2)], + [els.topK, els.topKVal, (v) => v], + [els.temp, els.tempVal, (v) => Number(v).toFixed(2)], + ]; + pairs.forEach(([range, label, fmt]) => { + if (!range || !label) return; + const sync = () => { label.textContent = fmt(range.value); }; + range.addEventListener('input', sync); + sync(); + }); + } + + function getControls() { + return { + sub_q_count: parseInt(els.subQ.value, 10), + chunk_limit: parseInt(els.chunkLimit.value, 10), + similarity_threshold: parseFloat(els.sim.value), + reranker_top_k: parseInt(els.topK.value, 10), + temperature: parseFloat(els.temp.value), + }; + } + + function getEngine() { + const checked = els.engineRadios.find((r) => r.checked); + return checked ? checked.value : 'azure_mini'; + } + + // ── File upload ──────────────────────────────────────────────────────────── + + function bindUpload() { + if (!els.uploadZone) return; + const onFiles = (fileList) => { + const files = Array.from(fileList || []).slice(0, 5); + if (uploadFiles.length + files.length > 5) { + setStatus('At most 5 files can be uploaded per request.', 'error'); + return; + } + files.forEach((f) => { + if (f.size > 4 * 1024 * 1024) { + setStatus(`${f.name} exceeds the 4 MB limit.`, 'error'); + return; + } + const ext = (f.name.split('.').pop() || '').toLowerCase(); + if (!['pdf', 'docx', 'txt'].includes(ext)) { + setStatus(`${f.name} is not a supported file type (PDF, DOCX, TXT).`, 'error'); + return; + } + uploadFiles.push(f); + }); + renderUploadList(); + }; + els.uploadInput.addEventListener('change', (e) => onFiles(e.target.files)); + els.uploadZone.addEventListener('dragover', (e) => { e.preventDefault(); els.uploadZone.classList.add('is-drop'); }); + els.uploadZone.addEventListener('dragleave', () => els.uploadZone.classList.remove('is-drop')); + els.uploadZone.addEventListener('drop', (e) => { + e.preventDefault(); + els.uploadZone.classList.remove('is-drop'); + onFiles(e.dataTransfer?.files); + }); + els.uploadClear?.addEventListener('click', () => { + uploadFiles = []; + els.uploadInput.value = ''; + renderUploadList(); + }); + } + + function renderUploadList() { + if (!uploadFiles.length) { + els.uploadFileInfo.classList.add('is-hidden'); + els.uploadPrompt.classList.remove('is-hidden'); + return; + } + els.uploadPrompt.classList.add('is-hidden'); + els.uploadFileInfo.classList.remove('is-hidden'); + els.uploadFileList.innerHTML = uploadFiles.map((f) => { + const kb = (f.size / 1024).toFixed(0); + return `
  • ${escapeHtml(f.name)}${kb} KB
  • `; + }).join(''); + } + + // ── Source modal ─────────────────────────────────────────────────────────── + + function bindModal() { + els.modalClose?.addEventListener('click', closeModal); + els.modal?.addEventListener('click', (e) => { + if (e.target === els.modal) closeModal(); + }); + document.addEventListener('keydown', (e) => { + if (e.key === 'Escape' && els.modal && !els.modal.classList.contains('is-hidden')) closeModal(); + }); + } + + function closeModal() { + els.modal?.classList.add('is-hidden'); + } + + function openModal(source) { + if (!source) return; + els.modalEyebrow.textContent = source.source_origin === 'upload' ? 'Uploaded file' : 'Corpus source'; + els.modalTitle.textContent = source.title || 'Source'; + const metaRows = [ + ['Number', `[${source.n}]`], + source.section ? ['Section', source.section] : null, + ['Corpus / package', source.package_or_corpus || '—'], + source.authority_type ? ['Authority', source.authority_type] : null, + source.jurisdiction ? ['Jurisdiction', source.jurisdiction] : null, + source.similarity != null ? ['Similarity', String(source.similarity)] : null, + source.reranker_score != null ? ['Rerank score', String(source.reranker_score)] : null, + source.matched_sub_questions?.length ? ['Matched sub-Q', source.matched_sub_questions.join(', ')] : null, + ].filter(Boolean); + els.modalMeta.innerHTML = '
    ' + metaRows.map(([k, v]) => `
    ${escapeHtml(k)}
    ${escapeHtml(String(v))}
    `).join('') + '
    '; + + const summary = source.summary || ''; + const chunkText = source.chunk_text || source.excerpt || ''; + const isUpload = source.source_origin === 'upload'; + const hasDocId = source.document_id != null; + + let html = summary + ? `
    ${escapeHtml(summary)}
    ` + : `
    Summary not yet generated — showing raw chunk below.
    `; + + if (chunkText) { + html += ``; + html += ``; + } + if (!isUpload && hasDocId) { + html += ``; + html += `
    `; + } + + els.modalText.innerHTML = html; + + const chunkToggle = els.modalText.querySelector('.dr-modal-chunk-toggle'); + const chunkDiv = els.modalText.querySelector('.dr-modal-chunk-text'); + chunkToggle?.addEventListener('click', () => { + const isHidden = chunkDiv.classList.toggle('is-hidden'); + chunkToggle.textContent = isHidden ? 'Show matching chunk ▼' : 'Hide matching chunk ▲'; + }); + + const allChunksBtn = els.modalText.querySelector('.dr-modal-all-chunks'); + const chunksListDiv = els.modalText.querySelector('.dr-modal-chunks-list'); + if (allChunksBtn && chunksListDiv) { + allChunksBtn.addEventListener('click', async () => { + allChunksBtn.disabled = true; + allChunksBtn.textContent = 'Loading…'; + try { + const res = await fetch(`api/document-chunks.php?document_id=${source.document_id}`, { credentials: 'same-origin' }); + const data = await res.json(); + if (data.ok && data.chunks) { + chunksListDiv.innerHTML = + `
    ${escapeHtml(data.document?.title || '')} · ${data.chunks.length} chunks
    ` + + data.chunks.map((c) => `
    + #${c.chunk_index + 1}${c.section_title ? ' · ' + escapeHtml(c.section_title) : ''} +

    ${escapeHtml(truncate(c.content, 300))}

    +
    `).join(''); + allChunksBtn.remove(); + } else { + allChunksBtn.textContent = 'Could not load chunks.'; + allChunksBtn.disabled = false; + } + } catch (_) { + allChunksBtn.textContent = 'Error loading chunks.'; + allChunksBtn.disabled = false; + } + }); + } + + els.modal.classList.remove('is-hidden'); + } + + // ── Branch context ───────────────────────────────────────────────────────── + + function bindBranch() { + if (!els.branchClear) return; + els.branchClear.addEventListener('click', clearBranch); + } + + function clearBranch() { + branchContext = null; + if (els.branchPanel) els.branchPanel.classList.add('is-hidden'); + if (els.branchNotes) els.branchNotes.value = ''; + } + + function branchFromSubQ(question) { + if (!lastResult || !question) return; + branchContext = { + original_query: lastResult.query || '', + brief_summary: (lastResult.advocacy_brief || '').slice(0, 600), + what_we_found: lastResult.what_we_found || '', + top_sources: (lastResult.sources || []).slice(0, 5).map((s) => ({ + n: s.n, title: s.title, excerpt: (s.excerpt || '').slice(0, 200), + })), + }; + // Pre-fill notes textarea (branch uses notes field, not a query textarea) + if (els.notes) els.notes.value = question; + if (els.branchOrigin) els.branchOrigin.textContent = 'Original query: ' + branchContext.original_query; + if (els.branchSummary) els.branchSummary.textContent = branchContext.brief_summary; + if (els.branchPanel) els.branchPanel.classList.remove('is-hidden'); + els.form.scrollIntoView({ behavior: 'smooth', block: 'start' }); + } + + // ── Form submission ──────────────────────────────────────────────────────── + + async function onSubmit(e) { + e.preventDefault(); + + const advocateRole = getAdvocateRole(); + if (!advocateRole) { + setStatus('Select who you are representing before running.', 'error'); + return; + } + if (!uploadFiles.length) { + setStatus('Upload at least one BVJ document before running.', 'error'); + return; + } + const slices = getSelectedSlices(); + if (!Object.values(slices).some(Boolean)) { + setStatus('Enable at least one corpus slice.', 'error'); + return; + } + + const engine = getEngine(); + const additionalNotes = (els.notes ? els.notes.value : '').trim(); + const expectedDuration = engine === 'azure_full' + ? '90–180 seconds with Azure gpt-4o' + : (engine === 'gpu' ? '45–90 seconds on GPU' + : (engine === 'dbn_legal' ? '60–120 seconds with Norwegian specialist' + : '30–60 seconds with Azure gpt-4o-mini')); + + setStatus(`Analysing document for ${advocateRole}… (${expectedDuration})`, 'busy'); + els.runButton.disabled = true; + // Clear results area but leave room for progressive renders + els.results.innerHTML = `

    Analysing…

    Document classification, party extraction, and timeline are running. Legal corpus retrieval and advocacy synthesis follow. Expect ${expectedDuration}.

    `; + + const stepState = STEP_LABELS.map((label) => ({ label, detail: 'Queued', status: 'idle' })); + renderTrace(stepState); + + const payload = { + advocate_role: advocateRole, + engine, + language: lang, + slices, + controls: getControls(), + additional_notes: additionalNotes, + }; + + if (branchContext) { + payload.prior_context = branchContext; + payload.branch_notes = (els.branchNotes ? els.branchNotes.value : '').trim(); + } + + // Always multipart — files are required + const form = new FormData(); + form.append('payload', JSON.stringify(payload)); + uploadFiles.forEach((f) => form.append('files[]', f)); + + let response; + try { + response = await fetch('api/barnevernet.php', { method: 'POST', body: form, credentials: 'same-origin' }); + } catch (err) { + setStatus(`Network error: ${err.message || err}`, 'error'); + els.runButton.disabled = false; + stepState[0] = { ...stepState[0], status: 'error', detail: String(err) }; + renderTrace(stepState); + return; + } + + if (!response.ok || !response.body) { + setStatus(`Request failed (${response.status}).`, 'error'); + els.runButton.disabled = false; + return; + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder('utf-8'); + let buffer = ''; + let finalResult = null; + let errorEvent = null; + // Track whether progressive sections have been rendered + let docMetaRendered = false; + let partiesRendered = false; + let timelineRendered = false; + + function handleStreamEvent(evt) { + if (!evt || !evt.event) return; + + if (evt.event === 'progress') { + if (evt.detail) setStatus(evt.detail, 'busy'); + return; + } + if (evt.event === 'start') { + setStatus(`Running… engine=${evt.engine}, files=${evt.file_count || 0}`, 'busy'); + return; + } + if (evt.event === 'step') { + const idx = stepKeyToIndex[evt.step]; + if (idx !== undefined) { + if (evt.status === 'running' && stepState[idx].status !== 'running') { + stepState[idx] = { label: evt.label || stepState[idx].label, detail: evt.detail || 'Running…', status: 'running' }; + } else if (evt.status !== 'running') { + stepState[idx] = { + label: evt.label || stepState[idx].label, + detail: evt.detail || stepState[idx].detail, + status: evt.status || stepState[idx].status, + }; + } + renderTrace(stepState); + } + return; + } + if (evt.event === 'doc_meta') { + if (!docMetaRendered) { + renderDocMetaIntoResults(evt.result || {}); + docMetaRendered = true; + } + return; + } + if (evt.event === 'parties') { + if (!partiesRendered && Array.isArray(evt.parties)) { + renderPartiesIntoResults(evt.parties); + partiesRendered = true; + } + return; + } + if (evt.event === 'timeline') { + if (!timelineRendered && Array.isArray(evt.events)) { + renderTimelineIntoResults(evt.events); + timelineRendered = true; + } + return; + } + if (evt.event === 'subq') { + setStatus(`Retrieving sub-question ${evt.index}/${evt.total}: ${String(evt.question || '').slice(0, 80)}${String(evt.question || '').length > 80 ? '…' : ''}`, 'busy'); + return; + } + if (evt.event === 'final') { + finalResult = evt.result; + return; + } + if (evt.event === 'error') { + errorEvent = evt; + return; + } + } + + while (true) { + let chunk; + try { + chunk = await reader.read(); + } catch (err) { + setStatus(`Stream error: ${err.message || err}`, 'error'); + els.runButton.disabled = false; + return; + } + const { done, value } = chunk; + if (value) { + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop(); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) continue; + let evt; + try { evt = JSON.parse(trimmed); } catch (_) { continue; } + handleStreamEvent(evt); + } + } + if (done) break; + } + + if (errorEvent) { + setStatus(`${errorEvent.code}: ${errorEvent.message}`, 'error'); + els.runButton.disabled = false; + const runningIdx = stepState.findIndex((s) => s.status === 'running'); + if (runningIdx >= 0) { + stepState[runningIdx] = { ...stepState[runningIdx], status: 'error', detail: errorEvent.message }; + renderTrace(stepState); + } + return; + } + + if (!finalResult) { + setStatus('Stream ended without a final result.', 'error'); + els.runButton.disabled = false; + return; + } + + lastResult = finalResult; + const meta = finalResult.trace_metadata || {}; + setStatus( + `Done in ${Math.round((finalResult.latency_ms || 0) / 1000)} s · ${meta.source_count || 0} sources · confidence ${meta.citation_confidence || '?'}`, + 'ok' + ); + els.runButton.disabled = false; + renderTrace(finalResult.trace || []); + renderFinalResults(finalResult); + } + + // ── Progressive rendering (renders as stream events arrive) ──────────────── + + function ensureResultsReady() { + // If the empty-state is still shown, clear it for progressive inserts + const emptyState = els.results.querySelector('.empty-state'); + if (emptyState) emptyState.remove(); + } + + function renderDocMetaIntoResults(meta) { + ensureResultsReady(); + const existing = els.results.querySelector('#bvjDocMetaSection'); + if (existing) existing.remove(); + + const docType = meta.doc_type || 'BVJ Document'; + const docDate = meta.doc_date || ''; + const authority = meta.issuing_authority || ''; + const refNo = meta.reference_number || ''; + const childInfo = meta.child_info || ''; + + const fields = [ + docDate ? ['Date', docDate] : null, + authority ? ['Issuing authority', authority] : null, + refNo ? ['Reference', refNo] : null, + childInfo ? ['Child', childInfo] : null, + ].filter(Boolean); + + const section = document.createElement('div'); + section.id = 'bvjDocMetaSection'; + section.className = 'bvj-doc-meta'; + section.innerHTML = ` +
    + ${escapeHtml(authority || docType)} + ${escapeHtml(docType)} +
    + ${fields.length ? `
    + ${fields.map(([k, v]) => `${escapeHtml(k)}: ${escapeHtml(String(v))}`).join('')} +
    ` : ''} + `; + els.results.insertBefore(section, els.results.firstChild); + } + + function renderPartiesIntoResults(parties) { + ensureResultsReady(); + const existing = els.results.querySelector('#bvjPartiesSection'); + if (existing) existing.remove(); + if (!parties.length) return; + + const roleClass = (role) => { + const r = (role || '').toLowerCase(); + if (r.includes('bvv') || r.includes('barnevern') || r.includes('saksbehandler') || r.includes('casework') || r.includes('melder')) return 'bvj-party-role--bvv'; + if (r.includes('mother') || r.includes('mor') || r.includes('father') || r.includes('far') || r.includes('parent') || r.includes('foreldre') || r.includes('foster')) return 'bvj-party-role--parent'; + if (r.includes('child') || r.includes('barn')) return 'bvj-party-role--child'; + if (r.includes('third') || r.includes('tredje') || r.includes('politi') || r.includes('police')) return 'bvj-party-role--third'; + return 'bvj-party-role--other'; + }; + + const section = document.createElement('div'); + section.id = 'bvjPartiesSection'; + section.className = 'dr-result-block'; + section.innerHTML = ` +

    Parties identified (${parties.length})

    +
    + ${parties.map((p) => ` +
    + ${escapeHtml(p.role || 'Unknown')} +
    ${escapeHtml(p.name || '—')}
    + ${p.organization ? `
    ${escapeHtml(p.organization)}
    ` : ''} + ${p.relationship_to_child ? `
    ${escapeHtml(p.relationship_to_child)}
    ` : ''} +
    + `).join('')} +
    + `; + // Insert after doc meta + const docMeta = els.results.querySelector('#bvjDocMetaSection'); + if (docMeta && docMeta.nextSibling) { + els.results.insertBefore(section, docMeta.nextSibling); + } else { + els.results.appendChild(section); + } + } + + function renderTimelineIntoResults(events) { + ensureResultsReady(); + const existing = els.results.querySelector('#bvjTimelineSection'); + if (existing) existing.remove(); + if (!events.length) return; + + const sigClass = (sig) => `bvj-timeline-event--${sig === 'high' ? 'high' : (sig === 'medium' ? 'medium' : 'low')}`; + + const section = document.createElement('div'); + section.id = 'bvjTimelineSection'; + section.className = 'dr-result-block'; + section.innerHTML = ` +

    Timeline (${events.length} events)

    +
    + ${events.map((ev) => { + const sig = ev.significance || 'low'; + const timeStr = ev.time_of_day ? `
    ${escapeHtml(ev.time_of_day)}` : ''; + return `
    +
    ${escapeHtml(ev.date || '?')}${timeStr}
    +
    +
    ${escapeHtml(ev.actor || '')}
    +
    ${escapeHtml(ev.action || '')}
    +
    +
    `; + }).join('')} +
    + `; + // Insert after parties section (or doc meta if no parties) + const parties = els.results.querySelector('#bvjPartiesSection'); + const docMeta = els.results.querySelector('#bvjDocMetaSection'); + const anchor = parties || docMeta; + if (anchor && anchor.nextSibling) { + els.results.insertBefore(section, anchor.nextSibling); + } else { + els.results.appendChild(section); + } + } + + // ── Final render (after stream completes) ────────────────────────────────── + + function renderFinalResults(data) { + const sources = data.sources || []; + const subs = data.sub_questions || []; + const role = data.advocate_role || ''; + const redFlags = Array.isArray(data.procedural_red_flags) ? data.procedural_red_flags : []; + const strengths = Array.isArray(data.client_strengths) ? data.client_strengths : []; + const weaknesses = Array.isArray(data.opposing_weaknesses) ? data.opposing_weaknesses : []; + + // Remove any previously rendered progressive sections (will be re-inserted in order below) + const toRemove = ['#bvjDocMetaSection', '#bvjPartiesSection', '#bvjTimelineSection']; + toRemove.forEach((sel) => els.results.querySelector(sel)?.remove()); + + // Rebuild progressive sections from final data (authoritative) + const docMeta = data.doc_meta || {}; + const parties = data.parties || []; + const timeline = data.timeline || {}; + + // Re-render progressive sections now that we have final data + renderDocMetaIntoResults(docMeta); + if (parties.length) renderPartiesIntoResults(parties); + if ((timeline.events || []).length) renderTimelineIntoResults(timeline.events); + + // 4. Advocate banner + const bannerHtml = role ? ` +
    + Representing + ${escapeHtml(role)} +
    ` : ''; + + // 5. Client strengths + const strengthsHtml = strengths.length ? ` +
    +

    Your strongest arguments

    + +
    ` : ''; + + // 6. Advocacy brief + const briefHtml = renderBrief(data.advocacy_brief || '', sources); + + // 7. Procedural red flags + const redFlagsHtml = redFlags.length ? ` +
    +

    Procedural red flags (${redFlags.length})

    +
    + ${redFlags.map((f) => renderRedFlag(f, sources)).join('')} +
    +
    ` : ''; + + // 8. Opposing weaknesses + const weaknessesHtml = weaknesses.length ? ` +
    +

    Gaps in the opposing position

    + +
    ` : ''; + + // 9. Sub-Q cards + const subQReportsHtml = subs.length ? ` +
    +
    +

    What each sub-question researched

    + ${subs.length} sub-question${subs.length === 1 ? '' : 's'} framed for ${escapeHtml(role || 'your client')} +
    +
    + ${subs.map((sq, i) => renderSubQReport(sq, i)).join('')} +
    +
    ` : ''; + + // 10. Sources + const sourcesHtml = sources.length ? ` +
    +
    +

    All sources (${sources.length})

    + Click a card to see the full source · external link opens the original article +
    +
    + ${sources.map((s) => renderSourceCard(s)).join('')} +
    +
    ` : ''; + + // 11. Uncertainty + next step + const uncertHtml = (data.what_remains_uncertain || []).length ? ` +
    +

    What remains uncertain

    + +
    ` : ''; + + const nextHtml = data.next_practical_step ? ` +
    +

    Next practical step

    +

    ${escapeHtml(data.next_practical_step)}

    +
    ` : ''; + + // Append final sections after the progressive sections + const finalHtml = ` + ${bannerHtml} + ${strengthsHtml} +
    +

    Advocacy brief

    +
    ${briefHtml}
    +
    + ${redFlagsHtml} + ${weaknessesHtml} + ${subQReportsHtml} + ${sourcesHtml} + ${uncertHtml} + ${nextHtml} + `; + + // Append to results (after the progressive sections already in place) + const finalContainer = document.createElement('div'); + finalContainer.innerHTML = finalHtml; + while (finalContainer.firstChild) { + els.results.appendChild(finalContainer.firstChild); + } + + // Bind source card clicks + els.results.querySelectorAll('.dr-source-card[data-source-n]').forEach((node) => { + node.addEventListener('click', (e) => { + if (e.target.closest('a')) return; + const n = parseInt(node.dataset.sourceN, 10); + const src = sources.find((s) => s.n === n); + if (src) { openModal(src); flashSource(n); } + }); + }); + els.results.querySelectorAll('.dr-cite[data-source-n]').forEach((node) => { + node.addEventListener('click', (e) => { + if (e.target.closest('a')) return; + flashSource(parseInt(node.dataset.sourceN, 10)); + }); + }); + } + + // ── Component renderers ──────────────────────────────────────────────────── + + function renderRedFlag(flag, sources) { + const severity = flag.severity || 'low'; + const sevClass = `bvj-severity-${severity}`; + const legal = flag.legal_basis || ''; + const what = flag.what_to_check || ''; + return `
    +
    +
    ${renderInlineCitations(escapeHtml(flag.description || ''), sources)}
    + ${escapeHtml(severity)} +
    + ${legal ? `${escapeHtml(legal)}` : ''} + ${what ? `
    What to verify

    ${escapeHtml(what)}

    ` : ''} +
    `; + } + + function renderSubQReport(sq, idx) { + const top = sq.top_sources || []; + const sourceItems = top.length + ? top.map((s) => { + const link = s.deep_link || s.source_url; + const titleHtml = link + ? `${escapeHtml(s.title || 'Untitled')} ` + : `${escapeHtml(s.title || 'Untitled')}`; + const meta = []; + if (s.section) meta.push(escapeHtml(s.section)); + if (s.authority_label) meta.push(escapeHtml(s.authority_label)); + if (s.source_origin === 'upload') meta.push('your upload'); + return `
  • + [${s.n ?? '?'}] +
    + ${titleHtml} + ${meta.length ? `
    ${meta.join(' · ')}
    ` : ''} +
    ${escapeHtml(truncate(s.excerpt || '', 180))}
    +
    +
  • `; + }).join('') + : `
  • No sources retrieved for this sub-question.
  • `; + + return `
    +
    + ${escapeHtml(sq.id || ('q' + (idx + 1)))} +
    +
    ${escapeHtml(sq.question || '')}
    + ${sq.rationale ? `
    ${escapeHtml(sq.rationale)}
    ` : ''} +
    + +
    + +
    `; + } + + function renderSourceCard(s) { + const score = s.reranker_score != null ? s.reranker_score : s.similarity; + const originTagClass = s.source_origin === 'upload' ? 'dr-source-tag dr-source-tag--upload' : 'dr-source-tag'; + const originLabel = s.source_origin === 'upload' ? 'upload' : 'corpus'; + const link = s.deep_link || s.source_url; + const titleHtml = link + ? `${escapeHtml(s.title || 'Untitled')} ` + : `${escapeHtml(s.title || 'Untitled')}`; + return `
    + ${s.n} +
    +
    ${titleHtml}
    + ${s.section ? `
    ${escapeHtml(s.section)}
    ` : ''} +
    + ${originLabel} + ${s.authority_label ? `${escapeHtml(s.authority_label)}` : ''} + ${escapeHtml(s.package_or_corpus || '—')} + ${(s.matched_sub_questions || []).map((q) => `${escapeHtml(q)}`).join('')} +
    +

    ${escapeHtml(truncate(s.excerpt || '', 240))}

    +
    +
    + score
    ${score != null ? Number(score).toFixed(2) : '—'}
    + ${s.reranker_score != null && s.similarity != null ? `sim
    ${Number(s.similarity).toFixed(2)}
    ` : ''} +
    +
    `; + } + + // ── Trace rendering ──────────────────────────────────────────────────────── + + function renderTrace(steps) { + if (!els.traceList) return; + els.traceList.classList.add('is-rich'); + els.traceList.innerHTML = steps.map((step, i) => { + const statusClass = step.status === 'running' ? 'is-running' + : step.status === 'complete' ? 'is-done' + : step.status === 'warning' ? 'is-warning' + : step.status === 'error' ? 'is-error' + : ''; + const marker = step.status === 'complete' ? '✓' + : step.status === 'warning' ? '!' + : step.status === 'error' ? '×' + : (i + 1); + return `
  • + ${marker} +
    + ${escapeHtml(step.label || '')} + ${escapeHtml(step.detail || '')} +
    +
  • `; + }).join(''); + } + + // ── Utility ──────────────────────────────────────────────────────────────── + + function setStatus(message, kind) { + els.status.textContent = message; + els.status.style.color = kind === 'error' ? '#b41e1e' : (kind === 'ok' ? 'var(--teal-dark)' : 'var(--muted)'); + } + + function flashSource(n) { + document.querySelectorAll('.dr-source-card.is-highlight').forEach((c) => c.classList.remove('is-highlight')); + const target = document.querySelector(`.dr-source-card[data-source-n="${n}"]`); + if (target) { + target.classList.add('is-highlight'); + target.scrollIntoView({ behavior: 'smooth', block: 'center' }); + setTimeout(() => target.classList.remove('is-highlight'), 1800); + } + } + + function renderBrief(markdown, sources) { + if (!markdown) return '

    No brief was returned.

    '; + const escaped = escapeHtml(markdown); + const withCites = escaped.replace(/\[(\d+(?:\s*[-,]\s*\d+)*)\]/g, (_, group) => { + const nums = expandCiteGroup(group); + return nums.map((n) => `${n}`).join(''); + }); + // Also mark [DOC] references + const withDoc = withCites.replace(/\[DOC\]/g, 'DOC'); + const withBold = withDoc + .replace(/\*\*([^*]+)\*\*/g, '$1') + .replace(/(^|[^*])\*([^*]+)\*(?!\*)/g, '$1$2') + .replace(/`([^`]+)`/g, '$1'); + const paragraphs = withBold.split(/\n{2,}/).map((p) => { + const t = p.trim(); + if (!t) return ''; + if (/^## /.test(t)) return `

    ${t.replace(/^## /, '')}

    `; + if (/^### /.test(t)) return `

    ${t.replace(/^### /, '')}

    `; + return `

    ${t.replace(/\n/g, '
    ')}

    `; + }).join(''); + return paragraphs; + } + + function renderInlineCitations(escapedHtml, sources) { + return escapedHtml.replace(/\[(\d+(?:\s*[-,]\s*\d+)*)\]/g, (_, group) => { + const nums = expandCiteGroup(group); + return nums.map((n) => `${n}`).join(''); + }); + } + + function expandCiteGroup(group) { + const out = []; + group.split(',').forEach((part) => { + const range = part.trim().match(/^(\d+)\s*-\s*(\d+)$/); + if (range) { + const a = parseInt(range[1], 10); + const b = parseInt(range[2], 10); + for (let i = a; i <= b; i++) out.push(i); + } else { + const n = parseInt(part.trim(), 10); + if (!Number.isNaN(n)) out.push(n); + } + }); + return Array.from(new Set(out)); + } + + function escapeHtml(s) { + return String(s) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + } + + function truncate(s, n) { + if (!s) return ''; + if (s.length <= n) return s; + return s.slice(0, n - 1) + '…'; + } +})(); diff --git a/barnevernet.php b/barnevernet.php new file mode 100644 index 0000000..361ee46 --- /dev/null +++ b/barnevernet.php @@ -0,0 +1,227 @@ + +
    + +
    + + +
    + + +
    + + + +

    The agent will analyse the document from your perspective — identifying supporting statutes, procedural red flags, and ECHR arguments for your position.

    +
    + +
    + Engine + + + + +
    +

    Engine applies to the final advocacy synthesis only. Document classification, party extraction, and timeline are always fast (azure-mini). Norwegian specialist is best for Barnevernloven, ECHR Article 8, and Bufdir analysis.

    + +
    +

    Corpus slices

    +

    Child Welfare, ECHR, Family Law Core, and Bufdir Guidance are on by default — these cover the core Barnevernet legal framework. Enable Norwegian Courts for case law.

    +
    + + + + + + + + +
    +
    + +
    + Advanced controls +
    +
    + + + Legal angles generated to search the corpus (each supports your position). +
    +
    + + + Corpus chunks retrieved per sub-question. +
    +
    + + + Minimum similarity for upload chunks to be included. +
    +
    + + + Top sources kept after dedupe + rerank for synthesis. +
    +
    + + + Keep low for grounded legal analysis. +
    +
    +
    + + +
    + +
    + +

    Drop BVJ document(s) here, or

    +

    At least 1 file required. PDF, DOCX, TXT — up to 5 files — processed in memory only, never stored.

    +
    + +
    + + + + + + + + +
    + +
    +
    +

    Ready

    +

    Upload a Barnevernet document (bekymringsmelding, vedtak, rapport), select who you represent, and run. The agent will extract the timeline and parties, search the legal corpus, and produce a partisan advocacy brief with procedural red flags.

    +
    +
    + + + + + + + + + + + + + + diff --git a/includes/BvjAnalyzerAgent.php b/includes/BvjAnalyzerAgent.php new file mode 100644 index 0000000..7eb0d61 --- /dev/null +++ b/includes/BvjAnalyzerAgent.php @@ -0,0 +1,1213 @@ +azure = $azure ?: new DbnAzureOpenAiGateway(); + } + + /** + * Main pipeline. At least 1 uploaded file is required. + * + * @param array $uploadedFiles [{filename, text, chars, truncated}] + * @param string $advocateRole Party the user represents + * @param string $engine Affects synthesis only: azure_mini|azure_full|gpu|dbn_legal + * @param string $language 'en' or 'no' + * @param array $sliceSelection Corpus slice toggles + * @param array $controls sub_q_count, chunk_limit, similarity_threshold, reranker_top_k, temperature + * @param string $additionalNotes Optional user context to supplement the document + * @param callable|null $emit function(string $event, array $payload): void + */ + public function run( + array $uploadedFiles, + string $advocateRole, + string $engine, + string $language, + array $sliceSelection, + array $controls, + string $additionalNotes = '', + ?callable $emit = null + ): array { + $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'dbn_legal'], true) + ? $engine : 'azure_mini'; + $language = in_array($language, ['en', 'no'], true) ? $language : 'en'; + $controls = $this->normalizeControls($controls); + + if (empty($uploadedFiles)) { + dbnToolsAbort('Upload at least one BVJ document before running the analyzer.', 422, 'no_uploads'); + } + + $client = dbnToolsRequireClient(); + $package = $this->requireFamilyPackage((int)$client['id']); + + dbnToolsBootCaveau(); + $aiPortalRoot = dbnToolsAiPortalRoot(); + require_once $aiPortalRoot . '/platform/includes/dbn_v6.php'; + + $this->uploadVecs = []; + $this->stepTimings = []; + $trace = []; + + $emitStep = function (string $stepId, string $label, string $detail, string $status) + use (&$trace, $emit): void { + $trace[] = $this->trace($label, $detail, $status); + if ($emit) { + $emit('step', ['step' => $stepId, 'label' => $label, 'detail' => $detail, 'status' => $status]); + } + }; + $emitRunning = function (string $stepId, string $label, string $detail = 'Running…') use ($emit): void { + if ($emit) { + $emit('step', ['step' => $stepId, 'label' => $label, 'detail' => $detail, 'status' => 'running']); + } + }; + + // Build combined document text (first file is primary; additional files appended) + $docText = ''; + foreach ($uploadedFiles as $idx => $file) { + $text = mb_substr((string)($file['text'] ?? ''), 0, self::MAX_DOC_CHARS, 'UTF-8'); + if ($text === '') continue; + $filename = (string)($file['filename'] ?? sprintf('document-%d', $idx + 1)); + $docText .= ($docText !== '' ? "\n\n--- Document: {$filename} ---\n\n" : '') . $text; + } + if ($docText === '') { + dbnToolsAbort('Could not extract text from the uploaded file(s).', 422, 'empty_document'); + } + $docText = mb_substr($docText, 0, self::MAX_DOC_CHARS * 2, 'UTF-8'); + + // ── STEP 1: Document classification ──────────────────────────────────── + $emitRunning('doc_classify', 'Document classification', 'Classifying document and extracting metadata…'); + $stepStart = microtime(true); + $docMeta = $this->classifyDocument($docText, $language); + $this->stepTimings['doc_classify'] = $this->elapsedMs($stepStart); + if ($emit) { + $emit('doc_meta', ['result' => $docMeta]); + } + $docTypeBadge = $docMeta['doc_type'] ?? 'BVJ Document'; + $refStr = $docMeta['reference_number'] ? ' · ref ' . $docMeta['reference_number'] : ''; + $authStr = $docMeta['issuing_authority'] ? $docMeta['issuing_authority'] : ''; + $emitStep('doc_classify', 'Document classification', + trim("{$docTypeBadge} · {$authStr}{$refStr}"), 'complete'); + + // ── STEP 2: Party extraction ──────────────────────────────────────────── + $emitRunning('party_extract', 'Party extraction', 'Identifying all named parties and their roles…'); + $stepStart = microtime(true); + $parties = $this->extractParties($docText, $language); + $this->stepTimings['party_extract'] = $this->elapsedMs($stepStart); + if ($emit) { + $emit('parties', ['parties' => $parties]); + } + $emitStep('party_extract', 'Party extraction', + sprintf('%d %s identified.', count($parties), count($parties) === 1 ? 'party' : 'parties'), + 'complete'); + + // ── STEP 3: Timeline extraction ───────────────────────────────────────── + $emitRunning('timeline_extract', 'Timeline extraction', 'Building chronological event timeline…'); + $stepStart = microtime(true); + $timelineEvents = $this->extractTimeline($docText, $language); + $this->stepTimings['timeline_extract'] = $this->elapsedMs($stepStart); + if ($emit) { + $emit('timeline', ['events' => $timelineEvents]); + } + $highCount = count(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') === 'high')); + $emitStep('timeline_extract', 'Timeline extraction', + sprintf('%d events extracted (%d high-significance).', count($timelineEvents), $highCount), + 'complete'); + + // ── STEP 4: Sub-question generation ──────────────────────────────────── + $emitRunning('sub_question_gen', 'Sub-question generation', + sprintf('Generating %d research angles for %s…', $controls['sub_q_count'], $advocateRole ?: 'selected role')); + $stepStart = microtime(true); + $subQuestions = $this->generateSubQuestions( + $docMeta, $parties, $timelineEvents, + $advocateRole, $controls['sub_q_count'], $language + ); + $this->stepTimings['sub_question_gen'] = $this->elapsedMs($stepStart); + $emitStep('sub_question_gen', 'Sub-question generation', + sprintf('%d sub-questions generated for %s.', count($subQuestions), $advocateRole ?: 'selected role'), + 'complete'); + + // ── STEP 5: Slice resolution + upload indexing + corpus retrieval ─────── + $emitRunning('slice_resolution', 'Slice resolution', 'Resolving corpus slice toggles…'); + $stepStart = microtime(true); + $sliceSelectionNormalized = dbnV6NormalizeSliceSelection($sliceSelection); + if (!array_filter($sliceSelectionNormalized)) { + dbnToolsAbort('Enable at least one corpus slice before running the analyzer.', 422, 'no_slices'); + } + $ragDb = dbnToolsRagDb(); + try { + $sharedDocIds = dbnV6ResolveSelectedDocIds($ragDb, $sliceSelectionNormalized); + $sliceDetail = sprintf('%d slice(s) active → %d candidate documents.', + count(array_filter($sliceSelectionNormalized)), count($sharedDocIds)); + $sliceStatus = 'complete'; + } catch (Throwable $e) { + error_log('BVJ slice resolve failed: ' . $e->getMessage()); + $sharedDocIds = []; + $sliceDetail = 'Slice resolution failed; corpus search will run unconstrained.'; + $sliceStatus = 'warning'; + } + $this->stepTimings['slice_resolution'] = $this->elapsedMs($stepStart); + $emitStep('slice_resolution', 'Slice resolution', $sliceDetail, $sliceStatus); + + // Upload indexing + $emitRunning('upload_indexing', 'Upload indexing', + sprintf('Chunking + embedding %d file(s)…', count($uploadedFiles))); + $stepStart = microtime(true); + $uploadChunks = []; + foreach ($uploadedFiles as $idx => $file) { + $filename = (string)($file['filename'] ?? sprintf('upload-%d', $idx + 1)); + $text = mb_substr((string)($file['text'] ?? ''), 0, self::MAX_DOC_CHARS, 'UTF-8'); + $uploadChunks = array_merge($uploadChunks, $this->splitIntoChunks($text, $filename, $idx)); + } + $uploadStatus = 'complete'; + $uploadDetail = sprintf('%d file(s) → %d in-memory chunks indexed.', count($uploadedFiles), count($uploadChunks)); + if ($uploadChunks) { + try { + $texts = array_map(fn(array $c) => $c['text'], $uploadChunks); + $allVecs = []; + $batchSz = 5; + for ($b = 0; $b < count($texts); $b += $batchSz) { + $batch = array_slice($texts, $b, $batchSz); + if ($emit) { + $emit('progress', ['detail' => sprintf( + 'Embedding chunks %d–%d of %d…', + $b + 1, $b + count($batch), count($texts) + )]); + } + $allVecs = array_merge($allVecs, dbnToolsLiteLLMEmbedBatch($batch)); + } + if (count($allVecs) === count($uploadChunks)) { + foreach ($uploadChunks as $i => $chunk) { + $this->uploadVecs[] = ['meta' => $chunk, 'vec' => $allVecs[$i]]; + } + } else { + $uploadStatus = 'warning'; + $uploadDetail = 'Upload embedding count mismatch; uploaded chunks will not participate in retrieval.'; + } + } catch (Throwable $e) { + error_log('BVJ upload embed failed: ' . $e->getMessage()); + $uploadStatus = 'warning'; + $uploadDetail = 'Upload embedding timed out; corpus-only retrieval will run.'; + $this->uploadVecs = []; + } + } + $this->stepTimings['upload_indexing'] = $this->elapsedMs($stepStart); + $emitStep('upload_indexing', 'Upload indexing', $uploadDetail, $uploadStatus); + + // Corpus retrieval (per sub-question) + $retrievalQueries = $subQuestions ?: [[ + 'id' => 'q1', + 'question' => sprintf('%s case involving %s', $docMeta['doc_type'] ?? 'BVJ document', $advocateRole), + 'rationale' => 'Fallback query (sub-question generation returned empty).', + ]]; + $emitRunning('retrieval', 'Corpus retrieval', + sprintf('Hybrid vector + keyword across %d sub-question(s)…', count($retrievalQueries))); + $stepStart = microtime(true); + + try { + $rag = new ClientRagPipeline((int)$client['id'], 'http://10.0.1.10:4000', 60); + } catch (Throwable $e) { + dbnToolsAbort('Could not initialise the retrieval pipeline.', 503, 'rag_init_failed'); + } + + $rawPool = []; + $retrievalWarnings = 0; + $rawCorpusCount = 0; + $rawUploadCount = 0; + $filteredOutCount = 0; + + foreach ($retrievalQueries as $idx => $sq) { + if ($emit) { + $emit('subq', [ + 'index' => $idx + 1, + 'total' => count($retrievalQueries), + 'id' => $sq['id'], + 'question' => $sq['question'], + ]); + } + try { + $corpusChunks = $rag->searchAll( + $sq['question'], + $controls['chunk_limit'], + null, + [ + 'search_private' => false, + 'search_shared' => true, + 'package_ids' => [(int)$package['id']], + 'shared_doc_ids' => $sharedDocIds, + 'chunk_limit' => $controls['chunk_limit'], + 'search_method' => 'hybrid', + 'reranker_enabled' => true, + 'include_beta_website' => false, + 'include_primary_website' => false, + ] + ); + } catch (Throwable $e) { + error_log('BVJ sub-Q retrieval failed: ' . $e->getMessage()); + $corpusChunks = []; + $retrievalWarnings++; + } + $rawCorpusCount += count($corpusChunks); + foreach ($corpusChunks as $chunk) { + if ($this->shouldExcludeChunk($chunk, $sliceSelectionNormalized)) { + $filteredOutCount++; + continue; + } + $rawPool[] = $this->normalizeCorpusChunk($chunk, $sq['id']); + } + if (!empty($this->uploadVecs)) { + $uploadHits = $this->retrieveFromUploads( + $sq['question'], $controls['chunk_limit'], $controls['similarity_threshold'] + ); + $rawUploadCount += count($uploadHits); + foreach ($uploadHits as $hit) { + $hit['matched_sub_questions'] = [$sq['id']]; + $rawPool[] = $hit; + } + } + } + + $merged = $this->mergeAndDedupe($rawPool, self::POOL_CAP); + $this->stepTimings['retrieval'] = $this->elapsedMs($stepStart); + $retrievalStatus = $retrievalWarnings > 0 ? 'warning' : 'complete'; + $retrievalDetail = sprintf( + '%d sub-Q(s) × hybrid → %d corpus (%d filtered) + %d upload → %d unique after dedupe.', + count($retrievalQueries), $rawCorpusCount, $filteredOutCount, $rawUploadCount, count($merged) + ); + $emitStep('retrieval', 'Corpus retrieval', $retrievalDetail, $retrievalStatus); + + $synthesisPool = array_slice($merged, 0, $controls['reranker_top_k']); + $this->hydrateSourceUrls($synthesisPool); + $numberedSources = $this->numberSources($synthesisPool); + + // Generate upload summaries for sources from uploaded files + if (!empty($uploadedFiles) && !empty($numberedSources)) { + $uploadSummaries = []; + foreach ($uploadedFiles as $idx => $file) { + $text = mb_substr((string)($file['text'] ?? ''), 0, 4000, 'UTF-8'); + $filename = (string)($file['filename'] ?? "file-{$idx}"); + if ($text === '') continue; + try { + $raw = $this->azure->chatText([ + ['role' => 'system', 'content' => 'Return only a concise 3-4 sentence summary. No preamble.'], + ['role' => 'user', 'content' => "Summarise this BVJ document for a legal researcher.\n\nFilename: {$filename}\n\nContent:\n{$text}"], + ], ['temperature' => 0.1, 'max_tokens' => 200, 'timeout' => 25]); + $uploadSummaries[$idx] = trim($raw); + } catch (Throwable $e) { + error_log('BVJ upload summary gen failed for file ' . $idx . ': ' . $e->getMessage()); + $uploadSummaries[$idx] = null; + } + } + foreach ($numberedSources as &$src) { + if (($src['source_origin'] ?? '') !== 'upload') continue; + if (preg_match('/^upload:(\d+):/', (string)($src['chunk_id'] ?? ''), $m)) { + $src['summary'] = $uploadSummaries[(int)$m[1]] ?? null; + } + } + unset($src); + } + + $retrievalCounts = [ + 'raw_corpus' => $rawCorpusCount, + 'filtered' => $filteredOutCount, + 'raw_upload' => $rawUploadCount, + 'after_dedupe' => count($merged), + 'after_topk' => count($numberedSources), + ]; + + // ── STEP 6: Synthesis ─────────────────────────────────────────────────── + $engineLabel = match ($engine) { + 'azure_full' => 'Azure gpt-4o', + 'gpu' => 'GPU qwen2.5:14b', + 'dbn_legal' => 'dbn-legal-agent', + default => 'Azure gpt-4o-mini', + }; + $emitRunning('synthesis', 'Synthesis', + sprintf('Synthesising advocacy brief with %s…', $engineLabel)); + $stepStart = microtime(true); + $synthesis = $this->synthesiseBvj( + $docText, $docMeta, $parties, $timelineEvents, + $subQuestions, $numberedSources, + $advocateRole, $engine, $language, $controls['temperature'], $additionalNotes + ); + $this->stepTimings['synthesis'] = $this->elapsedMs($stepStart); + $emitStep('synthesis', 'Synthesis', + sprintf('%s synthesised advocacy brief using %d source(s) + document.', + $synthesis['deploy_label'], count($numberedSources)), + 'complete'); + + // ── STEP 7: Confidence ────────────────────────────────────────────────── + $confidence = $this->citationConfidence($numberedSources); + $emitStep('confidence', 'Citation confidence', + sprintf('%s confidence based on %d source(s).', ucfirst($confidence), count($numberedSources)), + $confidence === 'low' ? 'warning' : 'complete'); + + // Build sub-question output with top_sources + $subQOut = []; + foreach ($retrievalQueries as $sq) { + $matchedChunks = array_values(array_filter( + $numberedSources, + fn(array $s) => in_array($sq['id'], $s['matched_sub_questions'] ?? [], true) + )); + $topSources = array_slice($matchedChunks, 0, 3); + $subQOut[] = [ + 'id' => $sq['id'], + 'question' => $sq['question'], + 'rationale' => $sq['rationale'] ?? '', + 'chunk_ids' => array_values(array_map(fn(array $s) => $s['chunk_id'], $matchedChunks)), + 'top_sources' => array_map(fn(array $s) => [ + 'n' => $s['n'] ?? null, + 'title' => $s['title'] ?? '', + 'section' => $s['section'] ?? null, + 'deep_link' => $s['deep_link'] ?? $s['source_url'] ?? null, + 'source_url' => $s['source_url'] ?? null, + 'source_origin' => $s['source_origin'] ?? 'corpus', + 'authority_label' => $s['authority_label'] ?? null, + 'excerpt' => $s['excerpt'] ?? '', + ], $topSources), + ]; + } + + $synJson = $synthesis['json']; + return [ + 'tool' => 'bvj_analyzer', + 'language' => $language, + 'advocate_role' => $advocateRole, + 'doc_meta' => $docMeta, + 'parties' => $parties, + 'timeline' => ['events' => $timelineEvents], + 'advocacy_brief' => (string)($synJson['advocacy_brief'] ?? ''), + 'procedural_red_flags' => is_array($synJson['procedural_red_flags'] ?? null) + ? $synJson['procedural_red_flags'] : [], + 'client_strengths' => is_array($synJson['client_strengths'] ?? null) + ? $synJson['client_strengths'] : [], + 'opposing_weaknesses' => is_array($synJson['opposing_weaknesses'] ?? null) + ? $synJson['opposing_weaknesses'] : [], + 'sub_questions' => $subQOut, + 'sources' => $numberedSources, + 'what_we_found' => (string)($synJson['what_we_found'] ?? ''), + 'what_remains_uncertain' => $synJson['what_remains_uncertain'] ?? [], + 'next_practical_step' => (string)($synJson['next_practical_step'] ?? ''), + 'trace' => $trace, + 'trace_metadata' => [ + 'chunk_count' => count($merged), + 'source_count' => count($numberedSources), + 'sub_question_count' => count($retrievalQueries), + 'upload_chunk_count' => count($this->uploadVecs), + 'deployment' => $synthesis['deploy_label'], + 'engine_used' => $engine, + 'citation_confidence' => $confidence, + 'elapsed_ms_per_step' => $this->stepTimings, + 'retrieval_counts' => $retrievalCounts, + 'slices_active' => array_keys(array_filter($sliceSelectionNormalized)), + ], + 'disclaimer' => dbnToolsDisclaimer($language), + ]; + } + + // ── Step 1: Document classification ────────────────────────────────────── + + private function classifyDocument(string $docText, string $language): array + { + $locale = $language === 'no' ? 'Norwegian' : 'English'; + $excerpt = mb_substr($docText, 0, 6000, 'UTF-8'); + + $prompt = << 'BVJ Document', + 'doc_date' => null, + 'issuing_authority' => null, + 'reference_number' => null, + 'child_info' => null, + ]; + + try { + $raw = $this->azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 400, 'timeout' => 30]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json)) { + return array_merge($default, array_filter($json, fn($v) => $v !== null && $v !== '')); + } + } catch (Throwable $e) { + error_log('BVJ classifyDocument failed: ' . $e->getMessage()); + } + return $default; + } + + // ── Step 2: Party extraction ────────────────────────────────────────────── + + private function extractParties(string $docText, string $language): array + { + $locale = $language === 'no' ? 'Norwegian' : 'English'; + $excerpt = mb_substr($docText, 0, 8000, 'UTF-8'); + + $prompt = <<azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1200, 'timeout' => 35]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json) && is_array($json['parties'] ?? null)) { + return array_slice($json['parties'], 0, 20); + } + } catch (Throwable $e) { + error_log('BVJ extractParties failed: ' . $e->getMessage()); + } + return []; + } + + // ── Step 3: Timeline extraction ─────────────────────────────────────────── + + private function extractTimeline(string $docText, string $language): array + { + $locale = $language === 'no' ? 'Norwegian' : 'English'; + $excerpt = mb_substr($docText, 0, 12000, 'UTF-8'); + + $prompt = <<azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 3000, 'timeout' => 45]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json) && is_array($json['events'] ?? null)) { + return array_slice($json['events'], 0, 30); + } + } catch (Throwable $e) { + error_log('BVJ extractTimeline failed: ' . $e->getMessage()); + } + return []; + } + + // ── Step 4: Sub-question generation ────────────────────────────────────── + + private function generateSubQuestions( + array $docMeta, + array $parties, + array $timelineEvents, + string $advocateRole, + int $count, + string $language + ): array { + $locale = $language === 'no' ? 'Norwegian' : 'English'; + $docType = $docMeta['doc_type'] ?? 'BVJ document'; + $roleStr = $advocateRole !== '' ? $advocateRole : 'the affected party'; + + // Summarise the top events to give the model context + $eventSummary = ''; + $highEvents = array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') === 'high'); + $topEvents = array_slice(array_merge(array_values($highEvents), + array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') !== 'high'))), 0, 8); + foreach ($topEvents as $ev) { + $eventSummary .= sprintf("- %s: %s (%s)\n", $ev['date'] ?? '?', $ev['action'] ?? '', $ev['actor'] ?? ''); + } + + // Summarise parties + $partyList = ''; + foreach (array_slice($parties, 0, 8) as $p) { + $partyList .= sprintf("- %s (%s)\n", $p['name'] ?? '', $p['role'] ?? ''); + } + + $prompt = <<azure->chatText([ + ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ], ['json' => true, 'temperature' => 0.15, 'max_tokens' => 1000, 'timeout' => 40]); + $json = $this->azure->decodeJsonObject($raw); + if (is_array($json) && is_array($json['sub_questions'] ?? null) && count($json['sub_questions']) >= 1) { + $sqs = []; + foreach (array_slice($json['sub_questions'], 0, $count) as $sq) { + if (!empty($sq['id']) && !empty($sq['question'])) { + $sqs[] = [ + 'id' => (string)$sq['id'], + 'question' => (string)$sq['question'], + 'rationale' => (string)($sq['rationale'] ?? ''), + ]; + } + } + if ($sqs) return $sqs; + } + } catch (Throwable $e) { + error_log('BVJ generateSubQuestions failed: ' . $e->getMessage()); + } + + // Fallback: generic sub-questions + $role = $advocateRole ?: 'affected party'; + return [ + ['id' => 'q1', 'question' => "What procedural rights does {$role} have in Barnevernet proceedings under Barnevernloven?", 'rationale' => 'Procedural rights'], + ['id' => 'q2', 'question' => "What does ECHR Article 8 require when child welfare authorities intervene in family life?", 'rationale' => 'ECHR Article 8'], + ['id' => 'q3', 'question' => "What Bufdir guidance applies to the proportionality of Barnevernet interventions?", 'rationale' => 'Proportionality'], + ['id' => 'q4', 'question' => "What are the documentation and notice obligations of BVV before taking acute measures?", 'rationale' => 'Documentation obligations'], + ]; + } + + // ── Step 6: Synthesis ───────────────────────────────────────────────────── + + private function synthesiseBvj( + string $docText, + array $docMeta, + array $parties, + array $timelineEvents, + array $subQuestions, + array $numberedSources, + string $advocateRole, + string $engine, + string $language, + float $temperature, + string $additionalNotes + ): array { + $locale = $language === 'no' ? 'Norwegian' : 'English'; + $roleStr = $advocateRole !== '' ? $advocateRole : 'the affected party'; + $docType = $docMeta['doc_type'] ?? 'BVJ Document'; + $docDate = $docMeta['doc_date'] ?? 'unknown date'; + $authority = $docMeta['issuing_authority'] ?? 'unknown authority'; + $refNo = $docMeta['reference_number'] ? ' (ref ' . $docMeta['reference_number'] . ')' : ''; + $childInfo = $docMeta['child_info'] ?? 'not specified'; + $sourceCount = count($numberedSources); + + if (empty($numberedSources)) { + $emptyBrief = $language === 'no' + ? 'Ingen kildetreff ble funnet i korpuset for de valgte skivene og spørsmålene.' + : 'No corpus sources were retrieved for the selected slices and sub-questions.'; + return [ + 'json' => [ + 'advocacy_brief' => $emptyBrief, + 'procedural_red_flags' => [], + 'client_strengths' => [], + 'opposing_weaknesses' => [], + 'what_we_found' => 'No retrieved sources passed the similarity threshold.', + 'what_remains_uncertain' => ['No corpus evidence retrieved — widen slice selection or try different sub-questions.'], + 'next_practical_step' => 'Enable more corpus slices (Norwegian Courts, Bufdir Guidance) and re-run.', + ], + 'deploy_label' => match($engine) { + 'gpu' => 'GPU (cuttlefish)', + 'dbn_legal' => 'dbn-legal-agent', + 'azure_full' => 'gpt-4o', + default => $this->azure->chatDeployment(), + }, + ]; + } + + // Build parties summary (top 8) + $partiesSummary = ''; + foreach (array_slice($parties, 0, 8) as $i => $p) { + $org = $p['organization'] ? ' (' . $p['organization'] . ')' : ''; + $rel = $p['relationship_to_child'] ? ' — rel: ' . $p['relationship_to_child'] : ''; + $partiesSummary .= sprintf("%d. %s — %s%s%s\n", $i + 1, $p['name'] ?? '', $p['role'] ?? '', $org, $rel); + } + + // Build timeline summary (top 15 most significant events) + $highEvents = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') === 'high')); + $otherEvents = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') !== 'high')); + $topEvents = array_slice(array_merge($highEvents, $otherEvents), 0, 15); + $timelineSummary = ''; + foreach ($topEvents as $ev) { + $time = $ev['time_of_day'] ? ' kl.' . $ev['time_of_day'] : ''; + $timelineSummary .= sprintf("- %s%s [%s] %s: %s\n", + $ev['date'] ?? '?', $time, + strtoupper($ev['significance'] ?? 'low'), + $ev['actor'] ?? '', $ev['action'] ?? ''); + } + + // Build sources text + $sourcesContext = []; + foreach ($numberedSources as $s) { + $sourcesContext[] = sprintf( + "[%d] (%s) %s%s\n Corpus: %s\n Authority: %s | Jurisdiction: %s\n Excerpt: %s", + $s['n'], + $s['source_origin'] === 'upload' ? 'uploaded doc' : 'corpus', + $s['title'], + !empty($s['section']) ? ' — ' . $s['section'] : '', + $s['package_or_corpus'], + $s['authority_label'] ?? ($s['authority_type'] ?? 'n/a'), + $s['jurisdiction'] ?? 'n/a', + $s['excerpt'] + ); + } + $sourcesText = implode("\n\n", $sourcesContext); + + // Build sub-question text + $subQText = ''; + if ($subQuestions) { + $subQText = "\nSub-questions researched:\n"; + foreach ($subQuestions as $sq) { + $subQText .= sprintf("- %s: %s\n", $sq['id'], $sq['question']); + } + } + + $notesSection = $additionalNotes !== '' + ? "\n== ADDITIONAL CONTEXT FROM ADVOCATE ==\n{$additionalNotes}\n" + : ''; + + $docExcerpt = mb_substr($docText, 0, 3000, 'UTF-8'); + + $prompt = << 'system', 'content' => 'You return valid JSON only. No markdown fences.'], + ['role' => 'user', 'content' => $prompt], + ]; + $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3500, 'timeout' => 200]; + + $deployLabel = match ($engine) { + 'gpu' => 'GPU (cuttlefish)', + 'dbn_legal' => 'dbn-legal-agent', + 'azure_full' => 'gpt-4o', + default => $this->azure->chatDeployment(), + }; + + try { + if ($engine === 'dbn_legal') { + $response = dbnToolsCallGpuLlm($messages, array_merge($opts, ['model' => 'dbn-legal-agent', 'timeout' => 200])); + $raw = (string)($response['choices'][0]['message']['content'] ?? ''); + } elseif ($engine === 'gpu') { + $response = dbnToolsCallGpuLlm($messages, $opts); + $raw = (string)($response['choices'][0]['message']['content'] ?? ''); + } elseif ($engine === 'azure_full') { + $raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts); + } else { + $raw = $this->azure->chatText($messages, $opts); + } + } catch (Throwable $e) { + dbnToolsAbort('Synthesis LLM request failed: ' . $e->getMessage(), 502, 'llm_error'); + } + + $json = $this->azure->decodeJsonObject($raw); + if (!is_array($json) || empty($json['advocacy_brief'])) { + $json = [ + 'advocacy_brief' => $raw, + 'procedural_red_flags' => [], + 'client_strengths' => [], + 'opposing_weaknesses' => [], + 'what_we_found' => 'Synthesis returned non-structured output; rendered as raw markdown.', + 'what_remains_uncertain' => ['Response format could not be validated as structured JSON.'], + 'next_practical_step' => 'Review the brief manually before relying on it.', + ]; + } + + return ['json' => $json, 'deploy_label' => $deployLabel]; + } + + // ── Shared helpers (copied from DbnDeepResearchAgent) ──────────────────── + + private function splitIntoChunks(string $text, string $filename, int $fileIdx): array + { + $text = preg_replace('/\s+/u', ' ', trim($text)) ?? ''; + if ($text === '') return []; + $words = preg_split('/\s+/u', $text, -1, PREG_SPLIT_NO_EMPTY) ?: []; + if (!$words) return []; + + $chunks = []; + $i = 0; + $chunkIdx = 0; + $total = count($words); + while ($i < $total) { + $slice = array_slice($words, $i, self::CHUNK_WORDS); + if (count($slice) >= self::MIN_CHUNK_WORDS || $i === 0) { + $chunks[] = [ + 'chunk_id' => sprintf('upload:%d:%d', $fileIdx, $chunkIdx), + 'file_index' => $fileIdx, + 'chunk_index' => $chunkIdx, + 'filename' => $filename, + 'text' => implode(' ', $slice), + ]; + $chunkIdx++; + } + $advance = self::CHUNK_WORDS - self::CHUNK_OVERLAP_WORDS; + if ($advance < 1) $advance = 1; + $i += $advance; + if (count($slice) < self::CHUNK_WORDS) break; + } + return $chunks; + } + + private function retrieveFromUploads(string $question, int $limitPerSubQ, float $threshold): array + { + if (empty($this->uploadVecs)) return []; + try { + $qVec = dbnToolsLiteLLMEmbedBatch([$question])[0] ?? []; + } catch (Throwable $e) { + error_log('BVJ sub-Q embed failed: ' . $e->getMessage()); + return []; + } + if (empty($qVec)) return []; + + $scored = []; + foreach ($this->uploadVecs as $entry) { + $sim = $this->cosineSim($qVec, $entry['vec']); + if ($sim < $threshold) continue; + $scored[] = [ + 'chunk_id' => $entry['meta']['chunk_id'], + 'title' => 'uploaded: ' . $entry['meta']['filename'], + 'section' => null, + 'package_or_corpus' => 'Your upload', + 'excerpt' => dbnToolsExcerpt($entry['meta']['text'], 620), + 'chunk_text' => $entry['meta']['text'], + 'similarity' => round($sim, 4), + 'reranker_score' => null, + 'document_id' => null, + 'source_origin' => 'upload', + 'authority_type' => null, + 'jurisdiction' => null, + ]; + } + usort($scored, fn(array $a, array $b) => ($b['similarity'] <=> $a['similarity'])); + $keep = (int)ceil($limitPerSubQ / 2); + return array_slice($scored, 0, max(1, $keep)); + } + + private function cosineSim(array $a, array $b): float + { + $len = min(count($a), count($b)); + if ($len === 0) return 0.0; + $dot = $na = $nb = 0.0; + for ($i = 0; $i < $len; $i++) { + $x = (float)$a[$i]; $y = (float)$b[$i]; + $dot += $x * $y; $na += $x * $x; $nb += $y * $y; + } + if ($na === 0.0 || $nb === 0.0) return 0.0; + return $dot / (sqrt($na) * sqrt($nb)); + } + + private function normalizeCorpusChunk(array $chunk, string $subQId): array + { + return [ + 'chunk_id' => isset($chunk['id']) ? (int)$chunk['id'] : null, + 'title' => (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source'), + 'section' => $chunk['section_title'] ?? null, + 'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Legal'), + 'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620), + 'chunk_text' => (string)($chunk['content'] ?? ''), + 'similarity' => isset($chunk['similarity']) ? round((float)$chunk['similarity'], 4) : null, + 'reranker_score' => isset($chunk['reranker_score']) ? round((float)$chunk['reranker_score'], 4) : null, + 'document_id' => isset($chunk['document_id']) ? (int)$chunk['document_id'] : null, + 'source_origin' => 'corpus', + 'authority_type' => $chunk['authority_type'] ?? null, + 'jurisdiction' => $chunk['jurisdiction'] ?? null, + 'publication_year' => $chunk['publication_year'] ?? null, + 'source_url' => null, + 'deep_link' => null, + 'authority_label' => null, + 'corpus_source_name' => null, + 'publication_date' => null, + 'matched_sub_questions' => [$subQId], + ]; + } + + private function shouldExcludeChunk(array $chunk, array $activeSlices): bool + { + $title = strtolower((string)($chunk['document_title'] ?? $chunk['title'] ?? '')); + $url = strtolower((string)($chunk['source_url'] ?? '')); + $name = strtolower((string)($chunk['source_name'] ?? '')); + + if (preg_match('/eu\s+ai\s+act|2024[\/.]1689|regulation.*\bai\b.*act/i', $title)) return true; + if (str_contains($url, 'eur-lex') && preg_match('/2024.1689|ai.act/i', $url)) return true; + + $isDbnPage = ( + str_contains($name, 'website') + || str_contains($title, 'dobetternorge.no') + || preg_match('/^(homepage|landing|about |contact )/i', $title) + || str_contains($title, 'resource directory') + || preg_match('/^flashcards?\s*[-–|]/i', $title) + || preg_match('/\|\s*do better norge\s*$/i', $title) + || preg_match('/[-–]\s*do better norge\s*$/i', $title) + ); + if ($isDbnPage) { + return !($activeSlices['dbn_resources'] ?? false); + } + return false; + } + + private function hydrateSourceUrls(array &$pool): void + { + $docIds = []; + foreach ($pool as $chunk) { + if (($chunk['source_origin'] ?? 'corpus') !== 'corpus') continue; + $docId = (int)($chunk['document_id'] ?? 0); + if ($docId > 0) $docIds[$docId] = true; + } + if (empty($docIds)) return; + + try { + $ragDb = dbnToolsRagDb(); + $ids = array_keys($docIds); + $ph = implode(',', array_fill(0, count($ids), '?')); + + $stmt = $ragDb->prepare(" + SELECT d.id, d.title, d.source_url, d.authority_type, + d.publication_date, d.source_id, d.jurisdiction, + d.summary, LEFT(d.content, 4000) AS content_excerpt + FROM documents d + WHERE d.id IN ({$ph}) + "); + $stmt->execute($ids); + + $docMeta = []; + $sourceIds = []; + foreach ($stmt as $row) { + $dId = (int)$row['id']; + $sid = isset($row['source_id']) ? (int)$row['source_id'] : null; + if ($sid) $sourceIds[] = $sid; + $docMeta[$dId] = [ + 'source_url' => $row['source_url'] ?? null, + 'authority_label' => dbnV6AuthorityLabel($row['authority_type'] ?? null), + 'publication_date' => $row['publication_date'] ?? null, + 'corpus_source_name' => 'Do Better Legal', + 'source_id' => $sid, + 'summary' => $row['summary'] ?? null, + 'content_excerpt' => (string)($row['content_excerpt'] ?? ''), + 'title' => (string)($row['title'] ?? ''), + ]; + } + + $unsummarized = array_filter($docMeta, fn($m) => $m['summary'] === null && $m['content_excerpt'] !== ''); + foreach ($unsummarized as $dId => $m) { + try { + $raw = $this->azure->chatText([ + ['role' => 'system', 'content' => 'Return only a concise 3-4 sentence summary. No preamble.'], + ['role' => 'user', 'content' => "Summarise this Norwegian family law document.\nFocus on: legal provisions covered, authority type, and questions it helps answer.\n\nTitle: {$m['title']}\n\nContent:\n{$m['content_excerpt']}"], + ], ['temperature' => 0.1, 'max_tokens' => 200, 'timeout' => 25]); + $summary = trim($raw); + if ($summary !== '') { + $ragDb->prepare("UPDATE documents SET summary = ? WHERE id = ?")->execute([$summary, $dId]); + $docMeta[$dId]['summary'] = $summary; + } + } catch (Throwable $e) { + error_log('BVJ hydrateSourceUrls summary gen failed for doc ' . $dId . ': ' . $e->getMessage()); + } + } + + if (!empty($sourceIds)) { + $uSids = array_values(array_unique($sourceIds)); + $sPh = implode(',', array_fill(0, count($uSids), '?')); + $sStmt = dbnToolsDb()->prepare("SELECT id, name FROM corpus_sources WHERE id IN ({$sPh})"); + $sStmt->execute($uSids); + $srcNames = []; + foreach ($sStmt as $row) { + $srcNames[(int)$row['id']] = dbnV6RepairText((string)($row['name'] ?? 'Do Better Legal')); + } + foreach ($docMeta as &$m) { + if ($m['source_id'] && isset($srcNames[$m['source_id']])) { + $m['corpus_source_name'] = $srcNames[$m['source_id']]; + } + } + unset($m); + } + } catch (Throwable $e) { + error_log('BVJ hydrateSourceUrls failed: ' . $e->getMessage()); + return; + } + + foreach ($pool as &$chunk) { + if (($chunk['source_origin'] ?? 'corpus') !== 'corpus') continue; + $docId = (int)($chunk['document_id'] ?? 0); + if (!$docId || !isset($docMeta[$docId])) continue; + $m = $docMeta[$docId]; + $sourceUrl = $m['source_url'] ?? null; + $chunk['source_url'] = $sourceUrl; + $chunk['deep_link'] = $this->buildDeepLink($sourceUrl, $chunk['section'] ?? null); + $chunk['authority_label'] = $m['authority_label'] ?? $chunk['authority_label']; + $chunk['corpus_source_name'] = $m['corpus_source_name'] ?? null; + $chunk['publication_date'] = $m['publication_date'] ?? null; + $chunk['summary'] = $m['summary'] ?? null; + } + unset($chunk); + } + + private function buildDeepLink(?string $sourceUrl, ?string $sectionTitle): ?string + { + if (!$sourceUrl) return null; + $sourceUrl = trim($sourceUrl); + if ($sourceUrl === '') return null; + if (preg_match('~^https?://lovdata\.no/~i', $sourceUrl) + && $sectionTitle + && preg_match('/§\s?(\d+[A-Za-z\-]?)/u', $sectionTitle, $m)) { + return rtrim($sourceUrl, '/') . '/§' . $m[1]; + } + return $sourceUrl; + } + + private function mergeAndDedupe(array $rawPool, int $cap): array + { + $byKey = []; + foreach ($rawPool as $chunk) { + $key = ($chunk['source_origin'] ?? 'corpus') . ':' . ($chunk['chunk_id'] ?? bin2hex(random_bytes(4))); + if (!isset($byKey[$key])) { + $byKey[$key] = $chunk; + continue; + } + $existing = $byKey[$key]; + $existing['matched_sub_questions'] = array_values(array_unique(array_merge( + $existing['matched_sub_questions'] ?? [], + $chunk['matched_sub_questions'] ?? [] + ))); + if (($chunk['similarity'] ?? 0) > ($existing['similarity'] ?? 0)) { + $existing['similarity'] = $chunk['similarity']; + } + if (($chunk['reranker_score'] ?? 0) > ($existing['reranker_score'] ?? 0)) { + $existing['reranker_score'] = $chunk['reranker_score']; + } + $byKey[$key] = $existing; + } + $merged = array_values($byKey); + usort($merged, function (array $a, array $b): int { + $aScore = $a['reranker_score'] ?? $a['similarity'] ?? 0; + $bScore = $b['reranker_score'] ?? $b['similarity'] ?? 0; + return $bScore <=> $aScore; + }); + return array_slice($merged, 0, $cap); + } + + private function numberSources(array $chunks): array + { + $out = []; + foreach ($chunks as $i => $c) { + $c['n'] = $i + 1; + $out[] = $c; + } + return $out; + } + + private function citationConfidence(array $sources): string + { + if (!$sources) return 'low'; + $scores = array_values(array_filter(array_map( + fn(array $s) => $s['reranker_score'] ?? $s['similarity'] ?? null, + $sources + ), 'is_numeric')); + $best = $scores ? max($scores) : 0; + if (count($sources) >= 6 && $best >= 0.5) return 'high'; + if (count($sources) >= 3 && $best >= 0.35) return 'medium'; + return 'low'; + } + + private function normalizeControls(array $controls): array + { + return [ + 'sub_q_count' => max(3, min(5, (int)($controls['sub_q_count'] ?? 4))), + 'chunk_limit' => max(4, min(10, (int)($controls['chunk_limit'] ?? 6))), + 'similarity_threshold' => max(0.2, min(0.6, (float)($controls['similarity_threshold'] ?? 0.30))), + 'reranker_top_k' => max(8, min(14, (int)($controls['reranker_top_k'] ?? 12))), + 'temperature' => max(0.05, min(0.4, (float)($controls['temperature'] ?? 0.15))), + ]; + } + + private function requireFamilyPackage(int $clientId): array + { + $package = dbnToolsFetchPackage('family-legal'); + if (!$package || empty($package['is_active'])) { + dbnToolsAbort('The family-legal corpus package is not active.', 503, 'package_unavailable'); + } + if (!dbnToolsHasActiveSubscription($clientId, (int)$package['id'])) { + dbnToolsAbort('Do Better Norge does not have an active family-legal subscription.', 503, 'subscription_missing'); + } + return $package; + } + + private function trace(string $label, string $detail, string $status = 'complete'): array + { + return ['label' => $label, 'detail' => $detail, 'status' => $status]; + } + + private function elapsedMs(float $start): int + { + return (int)round((microtime(true) - $start) * 1000); + } +} diff --git a/includes/layout.php b/includes/layout.php index a4207a0..920ec0c 100644 --- a/includes/layout.php +++ b/includes/layout.php @@ -13,6 +13,7 @@ $navItems = [ 'search' => ['Search', 'Legal sources'], 'deep-research' => ['Deep research', 'Agent + RAG'], 'advocate' => ['Advocate', 'Take a side'], + 'barnevernet' => ['BVJ Analyzer', 'Document'], 'summarize' => ['Summarize', 'Pasted text'], 'timeline' => ['Timeline', 'Events'], 'redact' => ['Redact', 'Privacy'],