diff --git a/advocate.php b/advocate.php index 109d7b9..c5c5924 100644 --- a/advocate.php +++ b/advocate.php @@ -150,6 +150,20 @@ require_once __DIR__ . '/includes/layout.php'; + + diff --git a/api/deep-research.php b/api/deep-research.php index edbb255..55271ab 100644 --- a/api/deep-research.php +++ b/api/deep-research.php @@ -62,6 +62,8 @@ try { if (mb_strlen($advocateRole, 'UTF-8') > 200) { throw new DbnToolsHttpException('advocate_role is too long.', 422, 'advocate_role_too_long'); } + $priorContext = is_array($input['prior_context'] ?? null) ? $input['prior_context'] : null; + $branchNotes = mb_substr(trim((string)($input['branch_notes'] ?? '')), 0, 1000, 'UTF-8'); if (mb_strlen($seedQuery, 'UTF-8') > 4000) { throw new DbnToolsHttpException('Query is too long.', 422, 'query_too_long'); @@ -118,7 +120,9 @@ try { $language, $controls, $emit, - $advocateRole + $advocateRole, + $priorContext, + $branchNotes ); $result['ok'] = true; diff --git a/api/document-chunks.php b/api/document-chunks.php new file mode 100644 index 0000000..fdc375c --- /dev/null +++ b/api/document-chunks.php @@ -0,0 +1,54 @@ + false, 'error' => 'document_id is required']); + exit; + } + + $ragDb = dbnToolsRagDb(); + + $docStmt = $ragDb->prepare("SELECT id, title FROM documents WHERE id = ? LIMIT 1"); + $docStmt->execute([$documentId]); + $doc = $docStmt->fetch(PDO::FETCH_ASSOC); + if (!$doc) { + echo json_encode(['ok' => false, 'error' => 'Document not found']); + exit; + } + + $chunkStmt = $ragDb->prepare(" + SELECT chunk_index, section_title, content + FROM chunks + WHERE document_id = ? + ORDER BY chunk_index ASC + "); + $chunkStmt->execute([$documentId]); + $chunks = []; + foreach ($chunkStmt as $row) { + $chunks[] = [ + 'chunk_index' => (int)$row['chunk_index'], + 'section_title' => $row['section_title'] ?? null, + 'content' => (string)$row['content'], + ]; + } + + echo json_encode([ + 'ok' => true, + 'document' => ['id' => (int)$doc['id'], 'title' => (string)$doc['title']], + 'chunks' => $chunks, + ], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); + +} catch (Throwable $e) { + error_log('DBN document-chunks error: ' . $e->getMessage()); + echo json_encode(['ok' => false, 'error' => 'Internal error']); +} diff --git a/assets/css/tools.css b/assets/css/tools.css index 21bb4d8..6e385a0 100644 --- a/assets/css/tools.css +++ b/assets/css/tools.css @@ -2573,7 +2573,7 @@ p { .dr-source-modal__meta dt:first-of-type { margin-top: 0; } .dr-source-modal__text { - white-space: pre-wrap; + white-space: normal; line-height: 1.7; color: var(--ink); } @@ -2600,7 +2600,7 @@ p { .dr-subq-report__head { display: grid; - grid-template-columns: auto 1fr; + grid-template-columns: auto 1fr auto; gap: 10px; align-items: start; margin-bottom: 10px; @@ -3213,3 +3213,166 @@ a.dr-source-title-link:hover { .adv-role-select, .adv-role-custom { max-width: 100%; } .adv-banner { flex-direction: column; align-items: flex-start; } } + +/* ── Branch-from-sub-question panel ──────────────────────────────────────── */ +.branch-panel { + background: #f0faf8; + border: 1px solid var(--teal); + border-left: 4px solid var(--teal); + border-radius: 8px; + padding: 14px 16px; + margin-bottom: 16px; +} + +.branch-panel__head { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 6px; +} + +.branch-panel__label { + font-weight: 700; + font-size: 0.82rem; + text-transform: uppercase; + letter-spacing: 0.04em; + color: var(--teal-dark); +} + +.branch-panel__origin { + margin: 0 0 8px; + font-size: 0.88rem; + color: var(--ink); + line-height: 1.45; +} + +.branch-panel__prior { + margin-bottom: 10px; + font-size: 0.87rem; +} + +.branch-panel__prior summary { + cursor: pointer; + color: var(--teal-dark); + font-weight: 600; + user-select: none; +} + +.branch-panel__brief { + margin-top: 6px; + padding: 8px 10px; + background: rgba(255,255,255,0.7); + border-radius: 5px; + font-size: 0.86rem; + color: var(--muted); + line-height: 1.5; + white-space: pre-wrap; +} + +/* "Branch ↓" button on sub-Q cards */ +.dr-branch-btn { + align-self: start; + padding: 4px 10px; + font-size: 0.78rem; + font-weight: 700; + background: transparent; + border: 1px solid var(--teal); + border-radius: 5px; + color: var(--teal-dark); + white-space: nowrap; + transition: background 0.12s, color 0.12s; +} + +.dr-branch-btn:hover { + background: var(--teal); + color: #fff; +} + +/* ── Source modal: summary + chunk toggle + all-chunks ───────────────────── */ +.dr-modal-summary { + font-size: 0.93rem; + line-height: 1.65; + color: var(--ink); + margin-bottom: 14px; +} + +.dr-modal-summary--empty em { + color: var(--muted); + font-size: 0.87rem; +} + +.dr-modal-chunk-toggle, +.dr-modal-all-chunks { + display: inline-block; + margin-bottom: 8px; + padding: 4px 10px; + font-size: 0.8rem; + font-weight: 600; + background: transparent; + border: 1px solid var(--line); + border-radius: 5px; + color: var(--teal-dark); + cursor: pointer; + transition: background 0.12s, border-color 0.12s; +} + +.dr-modal-chunk-toggle:hover, +.dr-modal-all-chunks:hover { + background: var(--soft-teal); + border-color: var(--teal); +} + +.dr-modal-chunk-text { + white-space: pre-wrap; + font-size: 0.87rem; + line-height: 1.65; + color: var(--muted); + background: #f8f9fb; + border: 1px solid var(--line); + border-radius: 6px; + padding: 10px 12px; + margin-bottom: 12px; +} + +.dr-modal-all-chunks { + display: block; + width: 100%; + text-align: left; + margin-top: 4px; +} + +.dr-modal-chunks-list { + margin-top: 4px; +} + +.dr-modal-chunks-head { + font-weight: 700; + font-size: 0.82rem; + color: var(--teal-dark); + padding: 6px 0 8px; + border-bottom: 1px solid var(--line); + margin-bottom: 8px; +} + +.dr-modal-chunk-item { + padding: 8px 0; + border-bottom: 1px solid var(--line); +} + +.dr-modal-chunk-item:last-child { border-bottom: 0; } + +.dr-modal-chunk-idx { + display: block; + font-size: 0.78rem; + font-weight: 700; + color: var(--coral); + margin-bottom: 4px; +} + +.dr-modal-chunk-preview { + margin: 0; + font-size: 0.86rem; + line-height: 1.5; + color: var(--muted); + white-space: pre-wrap; +} diff --git a/assets/js/advocate.js b/assets/js/advocate.js index b22f66e..4464193 100644 --- a/assets/js/advocate.js +++ b/assets/js/advocate.js @@ -6,6 +6,7 @@ let lang = 'en'; let uploadFiles = []; let lastResult = null; + let branchContext = null; const SLICE_DEFS = [ { id: 'family_core', label: 'Family Law Core' }, @@ -65,6 +66,11 @@ modalEyebrow: document.getElementById('advSourceModalEyebrow'), modalMeta: document.getElementById('advSourceModalMeta'), modalText: document.getElementById('advSourceModalText'), + branchPanel: document.getElementById('advBranchPanel'), + branchClear: document.getElementById('advBranchClear'), + branchOrigin: document.getElementById('advBranchOrigin'), + branchSummary: document.getElementById('advBranchSummary'), + branchNotes: document.getElementById('advBranchNotes'), }); if (!els.form) return; @@ -75,7 +81,12 @@ bindRanges(); bindUpload(); bindModal(); + bindBranch(); els.form.addEventListener('submit', onSubmit); + els.results.addEventListener('click', (e) => { + const btn = e.target.closest('.dr-branch-btn'); + if (btn) branchFromSubQ(btn.dataset.question || ''); + }); renderTrace(STEP_LABELS.map((label) => ({ label, detail: 'Waiting…', status: 'idle' }))); }); @@ -214,7 +225,62 @@ source.matched_sub_questions?.length ? ['Matched sub-Q', source.matched_sub_questions.join(', ')] : null, ].filter(Boolean); els.modalMeta.innerHTML = '
' + metaRows.map(([k, v]) => `
${escapeHtml(k)}
${escapeHtml(String(v))}
`).join('') + '
'; - els.modalText.textContent = source.chunk_text || source.excerpt || ''; + + const summary = source.summary || ''; + const chunkText = source.chunk_text || source.excerpt || ''; + const isUpload = source.source_origin === 'upload'; + const hasDocId = source.document_id != null; + + let html = summary + ? `
${escapeHtml(summary)}
` + : `
Summary not yet generated — showing raw chunk below.
`; + + if (chunkText) { + html += ``; + html += ``; + } + if (!isUpload && hasDocId) { + html += ``; + html += `
`; + } + + els.modalText.innerHTML = html; + + const chunkToggle = els.modalText.querySelector('.dr-modal-chunk-toggle'); + const chunkDiv = els.modalText.querySelector('.dr-modal-chunk-text'); + chunkToggle?.addEventListener('click', () => { + const isHidden = chunkDiv.classList.toggle('is-hidden'); + chunkToggle.textContent = isHidden ? 'Show matching chunk ▼' : 'Hide matching chunk ▲'; + }); + + const allChunksBtn = els.modalText.querySelector('.dr-modal-all-chunks'); + const chunksListDiv = els.modalText.querySelector('.dr-modal-chunks-list'); + if (allChunksBtn && chunksListDiv) { + allChunksBtn.addEventListener('click', async () => { + allChunksBtn.disabled = true; + allChunksBtn.textContent = 'Loading…'; + try { + const res = await fetch(`api/document-chunks.php?document_id=${source.document_id}`, { credentials: 'same-origin' }); + const data = await res.json(); + if (data.ok && data.chunks) { + chunksListDiv.innerHTML = + `
${escapeHtml(data.document?.title || '')} · ${data.chunks.length} chunks
` + + data.chunks.map((c) => `
+ #${c.chunk_index + 1}${c.section_title ? ' · ' + escapeHtml(c.section_title) : ''} +

${escapeHtml(truncate(c.content, 300))}

+
`).join(''); + allChunksBtn.remove(); + } else { + allChunksBtn.textContent = 'Could not load chunks.'; + allChunksBtn.disabled = false; + } + } catch (_) { + allChunksBtn.textContent = 'Error loading chunks.'; + allChunksBtn.disabled = false; + } + }); + } + els.modal.classList.remove('is-hidden'); } @@ -282,6 +348,10 @@ controls: getControls(), advocate_role: advocateRole, }; + if (branchContext) { + payload.prior_context = branchContext; + payload.branch_notes = (els.branchNotes ? els.branchNotes.value : '').trim(); + } const stepKeyToIndex = { interpretation: 0, @@ -370,6 +440,7 @@ return; } + finalResult.query = query; lastResult = finalResult; const meta = finalResult.trace_metadata || {}; const rc = meta.retrieval_counts || {}; @@ -596,6 +667,7 @@
${escapeHtml(sq.question || '')}
${sq.rationale ? `
${escapeHtml(sq.rationale)}
` : ''} + `; @@ -611,6 +683,34 @@ } } + function bindBranch() { + if (!els.branchClear) return; + els.branchClear.addEventListener('click', clearBranch); + } + + function clearBranch() { + branchContext = null; + if (els.branchPanel) els.branchPanel.classList.add('is-hidden'); + if (els.branchNotes) els.branchNotes.value = ''; + } + + function branchFromSubQ(question) { + if (!lastResult || !question) return; + branchContext = { + original_query: lastResult.query || '', + brief_summary: (lastResult.brief_markdown || '').slice(0, 600), + what_we_found: lastResult.what_we_found || '', + top_sources: (lastResult.sources || []).slice(0, 5).map((s) => ({ + n: s.n, title: s.title, excerpt: (s.excerpt || '').slice(0, 200), + })), + }; + els.input.value = question; + if (els.branchOrigin) els.branchOrigin.textContent = 'Original query: ' + branchContext.original_query; + if (els.branchSummary) els.branchSummary.textContent = branchContext.brief_summary; + if (els.branchPanel) els.branchPanel.classList.remove('is-hidden'); + els.form.scrollIntoView({ behavior: 'smooth', block: 'start' }); + } + function renderSourceCard(s) { const score = s.reranker_score != null ? s.reranker_score : s.similarity; const originTagClass = s.source_origin === 'upload' ? 'dr-source-tag dr-source-tag--upload' : 'dr-source-tag'; diff --git a/assets/js/deep-research.js b/assets/js/deep-research.js index 7d8ff1b..3e54d3d 100644 --- a/assets/js/deep-research.js +++ b/assets/js/deep-research.js @@ -6,6 +6,7 @@ let lang = 'en'; let uploadFiles = []; let lastResult = null; + let branchContext = null; const SLICE_DEFS = [ { id: 'family_core', label: 'Family Law Core' }, @@ -63,6 +64,11 @@ modalEyebrow: document.getElementById('drSourceModalEyebrow'), modalMeta: document.getElementById('drSourceModalMeta'), modalText: document.getElementById('drSourceModalText'), + branchPanel: document.getElementById('drBranchPanel'), + branchClear: document.getElementById('drBranchClear'), + branchOrigin: document.getElementById('drBranchOrigin'), + branchSummary: document.getElementById('drBranchSummary'), + branchNotes: document.getElementById('drBranchNotes'), }); if (!els.form) return; @@ -72,7 +78,12 @@ bindRanges(); bindUpload(); bindModal(); + bindBranch(); els.form.addEventListener('submit', onSubmit); + els.results.addEventListener('click', (e) => { + const btn = e.target.closest('.dr-branch-btn'); + if (btn) branchFromSubQ(btn.dataset.question || ''); + }); // Pre-render placeholder trace renderTrace(STEP_LABELS.map((label) => ({ label, detail: 'Waiting…', status: 'idle' }))); @@ -195,7 +206,62 @@ source.matched_sub_questions?.length ? ['Matched sub-Q', source.matched_sub_questions.join(', ')] : null, ].filter(Boolean); els.modalMeta.innerHTML = '
' + metaRows.map(([k, v]) => `
${escapeHtml(k)}
${escapeHtml(String(v))}
`).join('') + '
'; - els.modalText.textContent = source.chunk_text || source.excerpt || ''; + + const summary = source.summary || ''; + const chunkText = source.chunk_text || source.excerpt || ''; + const isUpload = source.source_origin === 'upload'; + const hasDocId = source.document_id != null; + + let html = summary + ? `
${escapeHtml(summary)}
` + : `
Summary not yet generated — showing raw chunk below.
`; + + if (chunkText) { + html += ``; + html += ``; + } + if (!isUpload && hasDocId) { + html += ``; + html += `
`; + } + + els.modalText.innerHTML = html; + + const chunkToggle = els.modalText.querySelector('.dr-modal-chunk-toggle'); + const chunkDiv = els.modalText.querySelector('.dr-modal-chunk-text'); + chunkToggle?.addEventListener('click', () => { + const isHidden = chunkDiv.classList.toggle('is-hidden'); + chunkToggle.textContent = isHidden ? 'Show matching chunk ▼' : 'Hide matching chunk ▲'; + }); + + const allChunksBtn = els.modalText.querySelector('.dr-modal-all-chunks'); + const chunksListDiv = els.modalText.querySelector('.dr-modal-chunks-list'); + if (allChunksBtn && chunksListDiv) { + allChunksBtn.addEventListener('click', async () => { + allChunksBtn.disabled = true; + allChunksBtn.textContent = 'Loading…'; + try { + const res = await fetch(`api/document-chunks.php?document_id=${source.document_id}`, { credentials: 'same-origin' }); + const data = await res.json(); + if (data.ok && data.chunks) { + chunksListDiv.innerHTML = + `
${escapeHtml(data.document?.title || '')} · ${data.chunks.length} chunks
` + + data.chunks.map((c) => `
+ #${c.chunk_index + 1}${c.section_title ? ' · ' + escapeHtml(c.section_title) : ''} +

${escapeHtml(truncate(c.content, 300))}

+
`).join(''); + allChunksBtn.remove(); + } else { + allChunksBtn.textContent = 'Could not load chunks.'; + allChunksBtn.disabled = false; + } + } catch (_) { + allChunksBtn.textContent = 'Error loading chunks.'; + allChunksBtn.disabled = false; + } + }); + } + els.modal.classList.remove('is-hidden'); } @@ -257,6 +323,10 @@ language: lang, controls: getControls(), }; + if (branchContext) { + payload.prior_context = branchContext; + payload.branch_notes = (els.branchNotes ? els.branchNotes.value : '').trim(); + } const stepKeyToIndex = { interpretation: 0, @@ -348,6 +418,7 @@ return; } + finalResult.query = query; lastResult = finalResult; const meta = finalResult.trace_metadata || {}; const rc = meta.retrieval_counts || {}; @@ -536,11 +607,40 @@
${escapeHtml(sq.question || '')}
${sq.rationale ? `
${escapeHtml(sq.rationale)}
` : ''} + `; } + function bindBranch() { + if (!els.branchClear) return; + els.branchClear.addEventListener('click', clearBranch); + } + + function clearBranch() { + branchContext = null; + if (els.branchPanel) els.branchPanel.classList.add('is-hidden'); + if (els.branchNotes) els.branchNotes.value = ''; + } + + function branchFromSubQ(question) { + if (!lastResult || !question) return; + branchContext = { + original_query: lastResult.query || '', + brief_summary: (lastResult.brief_markdown || '').slice(0, 600), + what_we_found: lastResult.what_we_found || '', + top_sources: (lastResult.sources || []).slice(0, 5).map((s) => ({ + n: s.n, title: s.title, excerpt: (s.excerpt || '').slice(0, 200), + })), + }; + els.input.value = question; + if (els.branchOrigin) els.branchOrigin.textContent = 'Original query: ' + branchContext.original_query; + if (els.branchSummary) els.branchSummary.textContent = branchContext.brief_summary; + if (els.branchPanel) els.branchPanel.classList.remove('is-hidden'); + els.form.scrollIntoView({ behavior: 'smooth', block: 'start' }); + } + function flashSource(n) { document.querySelectorAll('.dr-source-card.is-highlight').forEach((c) => c.classList.remove('is-highlight')); const target = document.querySelector(`.dr-source-card[data-source-n="${n}"]`); diff --git a/deep-research.php b/deep-research.php index 78ffc0f..aab4f87 100644 --- a/deep-research.php +++ b/deep-research.php @@ -129,6 +129,20 @@ require_once __DIR__ . '/includes/layout.php'; + + diff --git a/includes/DeepResearchAgent.php b/includes/DeepResearchAgent.php index 1e84fa7..58ca3a1 100644 --- a/includes/DeepResearchAgent.php +++ b/includes/DeepResearchAgent.php @@ -23,15 +23,17 @@ final class DbnDeepResearchAgent } public function run( - string $seedQuery, - string $pastedText, - array $uploadedFiles, - array $sliceSelection, - string $engine, - string $language, - array $controls, + string $seedQuery, + string $pastedText, + array $uploadedFiles, + array $sliceSelection, + string $engine, + string $language, + array $controls, ?callable $emit = null, - string $advocateRole = '' + string $advocateRole = '', + ?array $priorContext = null, + string $branchNotes = '' ): array { $seedQuery = trim($seedQuery); $pastedText = trim($pastedText); @@ -82,7 +84,7 @@ final class DbnDeepResearchAgent // STEP 1: Query interpretation $emitRunning('interpretation', 'Query interpretation', 'Summarising the seed input…'); $stepStart = microtime(true); - $interpretation = $this->interpretSeed($seedDescription, $language, $advocateRole); + $interpretation = $this->interpretSeed($seedDescription, $language, $advocateRole, $priorContext, $branchNotes); $this->stepTimings['interpretation'] = $this->elapsedMs($stepStart); $emitStep('interpretation', 'Query interpretation', $interpretation['detail'], 'complete'); @@ -284,6 +286,33 @@ final class DbnDeepResearchAgent $synthesisEngineLabel = $engine === 'azure_full' ? 'Azure gpt-4o' : ($engine === 'gpu' ? 'GPU qwen2.5:14b' : 'Azure gpt-4o-mini'); $emitRunning('synthesis', 'Synthesis', sprintf('Synthesising cited brief with %s — this is the slowest step…', $synthesisEngineLabel)); $stepStart = microtime(true); + // Attach upload summaries (generated lazily) to numbered sources + if (!empty($uploadedFiles) && !empty($numberedSources)) { + $uploadSummaries = []; + foreach ($uploadedFiles as $idx => $file) { + $text = mb_substr((string)($file['text'] ?? ''), 0, 4000, 'UTF-8'); + $filename = (string)($file['filename'] ?? "file-{$idx}"); + if ($text === '') continue; + try { + $raw = $this->azure->chatText([ + ['role' => 'system', 'content' => 'Return only a concise 3-4 sentence summary. No preamble.'], + ['role' => 'user', 'content' => "Summarise this document for a legal researcher.\n\nFilename: {$filename}\n\nContent:\n{$text}"], + ], ['temperature' => 0.1, 'max_tokens' => 200, 'timeout' => 20]); + $uploadSummaries[$idx] = trim($raw); + } catch (Throwable $e) { + error_log('DBN upload summary gen failed for file ' . $idx . ': ' . $e->getMessage()); + $uploadSummaries[$idx] = null; + } + } + foreach ($numberedSources as &$src) { + if (($src['source_origin'] ?? '') !== 'upload') continue; + if (preg_match('/^upload:(\d+):/', (string)($src['chunk_id'] ?? ''), $m)) { + $src['summary'] = $uploadSummaries[(int)$m[1]] ?? null; + } + } + unset($src); + } + $synthesis = $this->synthesise( $seedDescription, $interpretation['brief'], @@ -292,7 +321,9 @@ final class DbnDeepResearchAgent $engine, $language, $controls['temperature'], - $advocateRole + $advocateRole, + $priorContext, + $branchNotes ); $this->stepTimings['synthesis'] = $this->elapsedMs($stepStart); $emitStep( @@ -411,14 +442,30 @@ final class DbnDeepResearchAgent return implode("\n\n", $parts); } - private function interpretSeed(string $seedDescription, string $language, string $advocateRole = ''): array + private function interpretSeed(string $seedDescription, string $language, string $advocateRole = '', ?array $priorContext = null, string $branchNotes = ''): array { $locale = $language === 'no' ? 'Norwegian' : 'English'; $rolePrefix = $advocateRole !== '' ? "You are preparing a case-research brief for: {$advocateRole}. Frame your interpretation to identify the strongest legal angles for this party.\n\n" : ''; + + $priorContextBlock = ''; + if (!empty($priorContext)) { + $parts = ['Prior research context:']; + if (!empty($priorContext['original_query'])) { + $parts[] = 'Original question: ' . mb_substr((string)$priorContext['original_query'], 0, 300, 'UTF-8'); + } + if (!empty($priorContext['what_we_found'])) { + $parts[] = 'Key findings: ' . mb_substr((string)$priorContext['what_we_found'], 0, 400, 'UTF-8'); + } + if ($branchNotes !== '') { + $parts[] = 'Researcher notes: ' . mb_substr($branchNotes, 0, 300, 'UTF-8'); + } + $priorContextBlock = implode("\n", $parts) . "\n\nNow investigate this branch:\n"; + } + $prompt = <<prepare(" SELECT d.id, d.title, d.source_url, d.authority_type, - d.publication_date, d.source_id, d.jurisdiction + d.publication_date, d.source_id, d.jurisdiction, + d.summary, LEFT(d.content, 4000) AS content_excerpt FROM documents d WHERE d.id IN ({$ph}) "); @@ -759,9 +807,30 @@ PROMPT; 'publication_date' => $row['publication_date'] ?? null, 'corpus_source_name' => 'Do Better Legal', 'source_id' => $sid, + 'summary' => $row['summary'] ?? null, + 'content_excerpt' => (string)($row['content_excerpt'] ?? ''), + 'title' => (string)($row['title'] ?? ''), ]; } + // Lazily generate summaries for documents that don't have one yet + $unsummarized = array_filter($docMeta, fn($m) => $m['summary'] === null && $m['content_excerpt'] !== ''); + foreach ($unsummarized as $dId => $m) { + try { + $raw = $this->azure->chatText([ + ['role' => 'system', 'content' => 'Return only a concise 3-4 sentence summary. No preamble.'], + ['role' => 'user', 'content' => "Summarise this Norwegian family law document for a legal researcher.\nFocus on: which legal provisions it covers, its authority type, and what questions it helps answer.\n\nTitle: {$m['title']}\n\nContent:\n{$m['content_excerpt']}"], + ], ['temperature' => 0.1, 'max_tokens' => 200, 'timeout' => 25]); + $summary = trim($raw); + if ($summary !== '') { + $ragDb->prepare("UPDATE documents SET summary = ? WHERE id = ?")->execute([$summary, $dId]); + $docMeta[$dId]['summary'] = $summary; + } + } catch (Throwable $e) { + error_log('DBN hydrateSourceUrls summary gen failed for doc ' . $dId . ': ' . $e->getMessage()); + } + } + // Enrich with corpus source name from bnl_admin.corpus_sources if (!empty($sourceIds)) { $uSids = array_values(array_unique($sourceIds)); @@ -795,6 +864,7 @@ PROMPT; $chunk['authority_label'] = $m['authority_label'] ?? $chunk['authority_label']; $chunk['corpus_source_name'] = $m['corpus_source_name'] ?? null; $chunk['publication_date'] = $m['publication_date'] ?? null; + $chunk['summary'] = $m['summary'] ?? null; } unset($chunk); } @@ -861,14 +931,16 @@ PROMPT; } private function synthesise( - string $seedDescription, - string $brief, - array $subQuestions, - array $numberedSources, - string $engine, - string $language, - float $temperature, - string $advocateRole = '' + string $seedDescription, + string $brief, + array $subQuestions, + array $numberedSources, + string $engine, + string $language, + float $temperature, + string $advocateRole = '', + ?array $priorContext = null, + string $branchNotes = '' ): array { $locale = $language === 'no' ? 'Norwegian' : 'English'; @@ -891,6 +963,23 @@ PROMPT; ]; } + $priorContextSection = ''; + if (!empty($priorContext)) { + $prior = []; + if (!empty($priorContext['original_query'])) { + $prior[] = 'Original research question: ' . mb_substr((string)$priorContext['original_query'], 0, 300, 'UTF-8'); + } + if (!empty($priorContext['brief_summary'])) { + $prior[] = "Key findings from prior research:\n" . mb_substr((string)$priorContext['brief_summary'], 0, 600, 'UTF-8'); + } + if ($branchNotes !== '') { + $prior[] = 'Researcher notes: ' . mb_substr($branchNotes, 0, 300, 'UTF-8'); + } + if ($prior) { + $priorContextSection = "\nBackground from prior research:\n" . implode("\n", $prior) . "\n"; + } + } + $sourcesContext = []; foreach ($numberedSources as $s) { $sourcesContext[] = sprintf( @@ -926,7 +1015,7 @@ PROMPT; $prompt = <<