Add sub-question branching + document summary modals

- Source modal now shows LLM-generated document summary (lazy-gen + cached
  in documents.summary) instead of raw chunk text; toggle reveals matched
  chunk; "View all chunks" button fetches every chunk of the document via
  new api/document-chunks.php endpoint
- Each sub-question card gets a "Branch ↓" button that pre-fills the query
  with that sub-question and shows a context panel with the prior brief
  summary; prior_context + branch_notes are injected into interpretSeed()
  and synthesise() so the LLM knows where the research is coming from
- Upload document summaries generated at synthesis time and attached to
  upload sources alongside corpus summaries
- DB: documents.summary TEXT column added to bnl_corpus on chloe

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-15 19:44:27 +02:00
parent 0ff4eb6d31
commit 343b19d0b4
8 changed files with 566 additions and 28 deletions
+112 -23
View File
@@ -23,15 +23,17 @@ final class DbnDeepResearchAgent
}
public function run(
string $seedQuery,
string $pastedText,
array $uploadedFiles,
array $sliceSelection,
string $engine,
string $language,
array $controls,
string $seedQuery,
string $pastedText,
array $uploadedFiles,
array $sliceSelection,
string $engine,
string $language,
array $controls,
?callable $emit = null,
string $advocateRole = ''
string $advocateRole = '',
?array $priorContext = null,
string $branchNotes = ''
): array {
$seedQuery = trim($seedQuery);
$pastedText = trim($pastedText);
@@ -82,7 +84,7 @@ final class DbnDeepResearchAgent
// STEP 1: Query interpretation
$emitRunning('interpretation', 'Query interpretation', 'Summarising the seed input…');
$stepStart = microtime(true);
$interpretation = $this->interpretSeed($seedDescription, $language, $advocateRole);
$interpretation = $this->interpretSeed($seedDescription, $language, $advocateRole, $priorContext, $branchNotes);
$this->stepTimings['interpretation'] = $this->elapsedMs($stepStart);
$emitStep('interpretation', 'Query interpretation', $interpretation['detail'], 'complete');
@@ -284,6 +286,33 @@ final class DbnDeepResearchAgent
$synthesisEngineLabel = $engine === 'azure_full' ? 'Azure gpt-4o' : ($engine === 'gpu' ? 'GPU qwen2.5:14b' : 'Azure gpt-4o-mini');
$emitRunning('synthesis', 'Synthesis', sprintf('Synthesising cited brief with %s — this is the slowest step…', $synthesisEngineLabel));
$stepStart = microtime(true);
// Attach upload summaries (generated lazily) to numbered sources
if (!empty($uploadedFiles) && !empty($numberedSources)) {
$uploadSummaries = [];
foreach ($uploadedFiles as $idx => $file) {
$text = mb_substr((string)($file['text'] ?? ''), 0, 4000, 'UTF-8');
$filename = (string)($file['filename'] ?? "file-{$idx}");
if ($text === '') continue;
try {
$raw = $this->azure->chatText([
['role' => 'system', 'content' => 'Return only a concise 3-4 sentence summary. No preamble.'],
['role' => 'user', 'content' => "Summarise this document for a legal researcher.\n\nFilename: {$filename}\n\nContent:\n{$text}"],
], ['temperature' => 0.1, 'max_tokens' => 200, 'timeout' => 20]);
$uploadSummaries[$idx] = trim($raw);
} catch (Throwable $e) {
error_log('DBN upload summary gen failed for file ' . $idx . ': ' . $e->getMessage());
$uploadSummaries[$idx] = null;
}
}
foreach ($numberedSources as &$src) {
if (($src['source_origin'] ?? '') !== 'upload') continue;
if (preg_match('/^upload:(\d+):/', (string)($src['chunk_id'] ?? ''), $m)) {
$src['summary'] = $uploadSummaries[(int)$m[1]] ?? null;
}
}
unset($src);
}
$synthesis = $this->synthesise(
$seedDescription,
$interpretation['brief'],
@@ -292,7 +321,9 @@ final class DbnDeepResearchAgent
$engine,
$language,
$controls['temperature'],
$advocateRole
$advocateRole,
$priorContext,
$branchNotes
);
$this->stepTimings['synthesis'] = $this->elapsedMs($stepStart);
$emitStep(
@@ -411,14 +442,30 @@ final class DbnDeepResearchAgent
return implode("\n\n", $parts);
}
private function interpretSeed(string $seedDescription, string $language, string $advocateRole = ''): array
private function interpretSeed(string $seedDescription, string $language, string $advocateRole = '', ?array $priorContext = null, string $branchNotes = ''): array
{
$locale = $language === 'no' ? 'Norwegian' : 'English';
$rolePrefix = $advocateRole !== ''
? "You are preparing a case-research brief for: {$advocateRole}. Frame your interpretation to identify the strongest legal angles for this party.\n\n"
: '';
$priorContextBlock = '';
if (!empty($priorContext)) {
$parts = ['Prior research context:'];
if (!empty($priorContext['original_query'])) {
$parts[] = 'Original question: ' . mb_substr((string)$priorContext['original_query'], 0, 300, 'UTF-8');
}
if (!empty($priorContext['what_we_found'])) {
$parts[] = 'Key findings: ' . mb_substr((string)$priorContext['what_we_found'], 0, 400, 'UTF-8');
}
if ($branchNotes !== '') {
$parts[] = 'Researcher notes: ' . mb_substr($branchNotes, 0, 300, 'UTF-8');
}
$priorContextBlock = implode("\n", $parts) . "\n\nNow investigate this branch:\n";
}
$prompt = <<<PROMPT
{$rolePrefix}You are reviewing the input below to set up a deep legal research pass against the Do Better Norge family-law corpus.
{$rolePrefix}{$priorContextBlock}You are reviewing the input below to set up a deep legal research pass against the Do Better Norge family-law corpus.
Input:
{$seedDescription}
@@ -741,7 +788,8 @@ PROMPT;
$stmt = $ragDb->prepare("
SELECT d.id, d.title, d.source_url, d.authority_type,
d.publication_date, d.source_id, d.jurisdiction
d.publication_date, d.source_id, d.jurisdiction,
d.summary, LEFT(d.content, 4000) AS content_excerpt
FROM documents d
WHERE d.id IN ({$ph})
");
@@ -759,9 +807,30 @@ PROMPT;
'publication_date' => $row['publication_date'] ?? null,
'corpus_source_name' => 'Do Better Legal',
'source_id' => $sid,
'summary' => $row['summary'] ?? null,
'content_excerpt' => (string)($row['content_excerpt'] ?? ''),
'title' => (string)($row['title'] ?? ''),
];
}
// Lazily generate summaries for documents that don't have one yet
$unsummarized = array_filter($docMeta, fn($m) => $m['summary'] === null && $m['content_excerpt'] !== '');
foreach ($unsummarized as $dId => $m) {
try {
$raw = $this->azure->chatText([
['role' => 'system', 'content' => 'Return only a concise 3-4 sentence summary. No preamble.'],
['role' => 'user', 'content' => "Summarise this Norwegian family law document for a legal researcher.\nFocus on: which legal provisions it covers, its authority type, and what questions it helps answer.\n\nTitle: {$m['title']}\n\nContent:\n{$m['content_excerpt']}"],
], ['temperature' => 0.1, 'max_tokens' => 200, 'timeout' => 25]);
$summary = trim($raw);
if ($summary !== '') {
$ragDb->prepare("UPDATE documents SET summary = ? WHERE id = ?")->execute([$summary, $dId]);
$docMeta[$dId]['summary'] = $summary;
}
} catch (Throwable $e) {
error_log('DBN hydrateSourceUrls summary gen failed for doc ' . $dId . ': ' . $e->getMessage());
}
}
// Enrich with corpus source name from bnl_admin.corpus_sources
if (!empty($sourceIds)) {
$uSids = array_values(array_unique($sourceIds));
@@ -795,6 +864,7 @@ PROMPT;
$chunk['authority_label'] = $m['authority_label'] ?? $chunk['authority_label'];
$chunk['corpus_source_name'] = $m['corpus_source_name'] ?? null;
$chunk['publication_date'] = $m['publication_date'] ?? null;
$chunk['summary'] = $m['summary'] ?? null;
}
unset($chunk);
}
@@ -861,14 +931,16 @@ PROMPT;
}
private function synthesise(
string $seedDescription,
string $brief,
array $subQuestions,
array $numberedSources,
string $engine,
string $language,
float $temperature,
string $advocateRole = ''
string $seedDescription,
string $brief,
array $subQuestions,
array $numberedSources,
string $engine,
string $language,
float $temperature,
string $advocateRole = '',
?array $priorContext = null,
string $branchNotes = ''
): array {
$locale = $language === 'no' ? 'Norwegian' : 'English';
@@ -891,6 +963,23 @@ PROMPT;
];
}
$priorContextSection = '';
if (!empty($priorContext)) {
$prior = [];
if (!empty($priorContext['original_query'])) {
$prior[] = 'Original research question: ' . mb_substr((string)$priorContext['original_query'], 0, 300, 'UTF-8');
}
if (!empty($priorContext['brief_summary'])) {
$prior[] = "Key findings from prior research:\n" . mb_substr((string)$priorContext['brief_summary'], 0, 600, 'UTF-8');
}
if ($branchNotes !== '') {
$prior[] = 'Researcher notes: ' . mb_substr($branchNotes, 0, 300, 'UTF-8');
}
if ($prior) {
$priorContextSection = "\nBackground from prior research:\n" . implode("\n", $prior) . "\n";
}
}
$sourcesContext = [];
foreach ($numberedSources as $s) {
$sourcesContext[] = sprintf(
@@ -926,7 +1015,7 @@ PROMPT;
$prompt = <<<PROMPT
You are Do Better Norge Legal Tools producing a legal preparation brief in {$locale}.
Your client: {$advocateRole}
{$priorContextSection}
You MUST ground every claim in the numbered sources below using inline `[n]` citation markers. Do NOT invent statutes, paragraph numbers, case names, dates, or parties.
User input:
@@ -961,7 +1050,7 @@ PROMPT;
} else {
$prompt = <<<PROMPT
You are Do Better Norge Legal Tools running a deep-research synthesis. You MUST ground every claim in the numbered sources below, using inline `[n]` citation markers that map to the source list. Do NOT cite a source you did not use. Do NOT invent statutes, paragraph numbers, case names, dates, or parties.
{$priorContextSection}
User input:
{$seedDescription}