From e130db8119cf3cefeba4a6e89c813301c2f6a31e Mon Sep 17 00:00:00 2001 From: davegilligan Date: Fri, 15 May 2026 11:12:13 +0200 Subject: [PATCH] Deep Research v2: exclude marketing site, deep-link sources, per-agent reports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three user-flagged issues after the first real run with a 920KB sakkyndig PDF: 1. dobetternorge.no marketing-website chunks leaked into the retrieval pool. ClientRagPipeline::searchAll defaults include_beta_website=true; we now pass false for both website flags, AND defensively drop any returned chunk whose source_name contains "website" or title contains "dobetternorge.no" before it can pollute synthesis. 2. Brief returned was "just a paragraph". Bumped synthesis max_tokens 2200→3200, raised timeout 120→180s, and rewrote the prompt to require 400-900 words with min 4 paragraphs when source_count>=3, covering EACH sub-question in its own paragraph. Now also passes authority + jurisdiction into the sources block so the model can pinpoint statutes correctly. 3. No way to see what each "sub-question agent" researched or click through to the source articles. Restructured the results panel so per-sub-question report cards now render ABOVE the synthesised brief. Each report shows the question, the rationale, and the top 3 retrieved sources for that sub-Q with title→deep link + 1-line excerpt. Brief follows. Consolidated numbered sources list at the bottom, with titles as deep links too. Deep-link construction: source_url is hydrated via dbnV6QueryDocumentMeta in a single batched call after retrieval. For Lovdata sources with a section_title containing §, the link is path-anchored to that section (/§43). For other hosts (HUDOC, Regjeringen, Bufdir, etc.) we link to the document root URL. Telemetry: trace_metadata now carries retrieval_counts {raw_corpus, filtered_website, post_filter_corpus, raw_upload, after_dedupe, after_topk} so future regressions are diagnosable from the metadata.jsonl log alone. The completion status pill surfaces the corpus/website/upload split. --- assets/css/tools.css | 124 +++++++++++++++++++++++ assets/js/deep-research.js | 91 ++++++++++++++--- includes/DeepResearchAgent.php | 175 ++++++++++++++++++++++++++++----- 3 files changed, 351 insertions(+), 39 deletions(-) diff --git a/assets/css/tools.css b/assets/css/tools.css index cc1902f..3513758 100644 --- a/assets/css/tools.css +++ b/assets/css/tools.css @@ -2176,3 +2176,127 @@ p { .dr-source-card { grid-template-columns: 32px 1fr; } .dr-source-aside { display: none; } } + +/* Per-sub-question agent report cards (v2) */ +.dr-subq-list { + display: grid; + gap: 10px; +} + +.dr-subq-report { + border: 1px solid var(--line); + border-radius: 8px; + padding: 12px 13px; + background: #fbfcfe; +} + +.dr-subq-report__head { + display: grid; + grid-template-columns: auto 1fr; + gap: 10px; + align-items: start; + margin-bottom: 10px; +} + +.dr-subq-report__index { + display: inline-flex; + align-items: center; + justify-content: center; + min-width: 30px; + height: 24px; + padding: 0 8px; + border-radius: 999px; + background: var(--soft-teal); + color: var(--teal-dark); + font-weight: 800; + font-variant-numeric: tabular-nums; + font-size: 0.78rem; + letter-spacing: 0.04em; + text-transform: uppercase; +} + +.dr-subq-report__question { + font-weight: 700; + color: var(--ink); + line-height: 1.4; +} + +.dr-subq-report__rationale { + margin-top: 4px; + color: var(--muted); + font-size: 0.86rem; + line-height: 1.45; +} + +.dr-mini-source-list { + list-style: none; + padding: 0; + margin: 0; + display: grid; + gap: 6px; +} + +.dr-mini-source { + display: grid; + grid-template-columns: 32px 1fr; + gap: 8px; + align-items: start; + padding: 8px 10px; + background: #fff; + border: 1px solid var(--line); + border-radius: 6px; +} + +.dr-mini-source--empty { + display: block; + color: var(--muted); + padding: 8px 10px; +} + +.dr-mini-source__n { + font-variant-numeric: tabular-nums; + color: var(--coral); + font-weight: 800; + font-size: 0.85rem; +} + +.dr-mini-source__title { + display: inline-block; + font-weight: 700; + color: var(--ink); + text-decoration: none; + line-height: 1.35; +} + +a.dr-mini-source__title:hover { color: var(--teal-dark); text-decoration: underline; } + +.dr-mini-source__meta { + color: var(--muted); + font-size: 0.78rem; + margin-top: 3px; +} + +.dr-mini-source__excerpt { + color: var(--muted); + font-size: 0.86rem; + line-height: 1.45; + margin-top: 5px; +} + +.dr-external-link { + display: inline-block; + color: var(--teal); + font-size: 0.8em; + margin-left: 3px; + vertical-align: 1px; +} + +a.dr-source-title-link { + color: var(--ink); + text-decoration: none; +} + +a.dr-source-title-link:hover { + color: var(--teal-dark); + text-decoration: underline; +} diff --git a/assets/js/deep-research.js b/assets/js/deep-research.js index 39b51f6..16abde6 100644 --- a/assets/js/deep-research.js +++ b/assets/js/deep-research.js @@ -346,8 +346,12 @@ lastResult = finalResult; const meta = finalResult.trace_metadata || {}; + const rc = meta.retrieval_counts || {}; + const countSummary = (rc.post_filter_corpus != null) + ? `${rc.post_filter_corpus} corpus${rc.filtered_website ? ` (${rc.filtered_website} website filtered)` : ''}${rc.raw_upload ? ` + ${rc.raw_upload} upload` : ''}` + : `${meta.source_count || 0} sources`; setStatus( - `Done in ${Math.round((finalResult.latency_ms || 0) / 1000)} s · ${meta.source_count || 0} sources · confidence ${meta.citation_confidence || '?'}`, + `Done in ${Math.round((finalResult.latency_ms || 0) / 1000)} s · ${countSummary} · confidence ${meta.citation_confidence || '?'}`, 'ok' ); els.runButton.disabled = false; @@ -425,19 +429,23 @@ const briefHtml = renderBrief(data.brief_markdown || '', sources); - const subQHtml = subs.length ? ` + // Per-sub-question report cards — the "what each agent researched" view + const subQReportsHtml = subs.length ? `
-

Angles the agent explored

-
    - ${subs.map((sq) => `
  1. ${escapeHtml(sq.question)}${sq.rationale ? `
    ${escapeHtml(sq.rationale)}` : ''}
  2. `).join('')} -
+
+

What each sub-question agent researched

+ ${subs.length} sub-question${subs.length === 1 ? '' : 's'}, top 3 sources each +
+
+ ${subs.map((sq, i) => renderSubQReport(sq, i)).join('')} +
` : ''; const sourcesHtml = `
-

Sources (${sources.length})

- Click a card to see the full chunk + scores +

All sources (${sources.length})

+ Click a card to see the full chunk + scores · external link opens the original article
${sources.map((s) => renderSourceCard(s)).join('')} @@ -459,18 +467,20 @@
` : ''; els.results.innerHTML = ` + ${subQReportsHtml}
+

Synthesised brief

${briefHtml}
- ${subQHtml} ${sourcesHtml} ${uncertHtml} ${nextHtml} `; - // Bind source-card click handlers + citation marker click handlers - els.results.querySelectorAll('[data-source-n]').forEach((node) => { - node.addEventListener('click', () => { + // Bind source-card click handlers (open modal) — but ignore clicks on inner + els.results.querySelectorAll('.dr-source-card[data-source-n]').forEach((node) => { + node.addEventListener('click', (e) => { + if (e.target.closest('a')) return; // let anchor handle its own click const n = parseInt(node.dataset.sourceN, 10); const src = sources.find((s) => s.n === n); if (src) { @@ -479,6 +489,52 @@ } }); }); + // Bind inline citation markers in brief → flash + open modal + els.results.querySelectorAll('.dr-cite[data-source-n]').forEach((node) => { + node.addEventListener('click', (e) => { + if (e.target.closest('a')) return; + const n = parseInt(node.dataset.sourceN, 10); + const src = sources.find((s) => s.n === n); + if (src) { + flashSource(n); + } + }); + }); + } + + function renderSubQReport(sq, idx) { + const top = sq.top_sources || []; + const sourceItems = top.length + ? top.map((s) => { + const link = s.deep_link || s.source_url; + const titleHtml = link + ? `${escapeHtml(s.title || 'Untitled')} ` + : `${escapeHtml(s.title || 'Untitled')}`; + const meta = []; + if (s.section) meta.push(escapeHtml(s.section)); + if (s.authority_label) meta.push(escapeHtml(s.authority_label)); + if (s.source_origin === 'upload') meta.push('your upload'); + return `
  • + [${s.n ?? '?'}] +
    + ${titleHtml} + ${meta.length ? `
    ${meta.join(' · ')}
    ` : ''} +
    ${escapeHtml(truncate(s.excerpt || '', 180))}
    +
    +
  • `; + }).join('') + : `
  • No sources retrieved for this sub-question.
  • `; + + return `
    +
    + ${escapeHtml(sq.id || ('q' + (idx + 1)))} +
    +
    ${escapeHtml(sq.question || '')}
    + ${sq.rationale ? `
    ${escapeHtml(sq.rationale)}
    ` : ''} +
    +
    +
      ${sourceItems}
    +
    `; } function flashSource(n) { @@ -495,13 +551,18 @@ const score = s.reranker_score != null ? s.reranker_score : s.similarity; const originTagClass = s.source_origin === 'upload' ? 'dr-source-tag dr-source-tag--upload' : 'dr-source-tag'; const originLabel = s.source_origin === 'upload' ? 'upload' : 'corpus'; - return ``; +
    `; } // Markdown renderer — minimal: paragraphs, bold/italic, code, [n] citation badges diff --git a/includes/DeepResearchAgent.php b/includes/DeepResearchAgent.php index 610c50b..2a1ed99 100644 --- a/includes/DeepResearchAgent.php +++ b/includes/DeepResearchAgent.php @@ -182,6 +182,9 @@ final class DbnDeepResearchAgent $rawPool = []; $retrievalWarnings = 0; + $rawCorpusCount = 0; + $rawUploadCount = 0; + $filteredOutCount = 0; foreach ($retrievalQueries as $idx => $sq) { if ($emit) { $emit('subq', [ @@ -197,13 +200,15 @@ final class DbnDeepResearchAgent $controls['chunk_limit'], null, [ - 'search_private' => false, - 'search_shared' => true, - 'package_ids' => [(int)$package['id']], - 'shared_doc_ids' => $sharedDocIds, - 'chunk_limit' => $controls['chunk_limit'], - 'search_method' => 'hybrid', - 'reranker_enabled' => true, + 'search_private' => false, + 'search_shared' => true, + 'package_ids' => [(int)$package['id']], + 'shared_doc_ids' => $sharedDocIds, + 'chunk_limit' => $controls['chunk_limit'], + 'search_method' => 'hybrid', + 'reranker_enabled' => true, + 'include_beta_website' => false, + 'include_primary_website'=> false, ] ); } catch (Throwable $e) { @@ -211,13 +216,19 @@ final class DbnDeepResearchAgent $corpusChunks = []; $retrievalWarnings++; } + $rawCorpusCount += count($corpusChunks); foreach ($corpusChunks as $chunk) { + if ($this->isWebsiteChunk($chunk)) { + $filteredOutCount++; + continue; + } $rawPool[] = $this->normalizeCorpusChunk($chunk, $sq['id']); } // Upload chunk retrieval via cosine sim if (!empty($this->uploadVecs)) { $uploadHits = $this->retrieveFromUploads($sq['question'], $controls['chunk_limit'], $controls['similarity_threshold']); + $rawUploadCount += count($uploadHits); foreach ($uploadHits as $hit) { $hit['matched_sub_questions'] = [$sq['id']]; $rawPool[] = $hit; @@ -229,17 +240,32 @@ final class DbnDeepResearchAgent $this->stepTimings['retrieval'] = $this->elapsedMs($stepStart); $retrievalStatus = $retrievalWarnings > 0 ? 'warning' : 'complete'; $retrievalDetail = sprintf( - '%d sub-question(s) × hybrid + RRF + rerank → %d raw chunks → %d unique after dedupe.', + '%d sub-question(s) × hybrid + RRF + rerank → %d corpus chunks (%d filtered) + %d upload hits → %d unique after dedupe.', count($retrievalQueries), - count($rawPool), + $rawCorpusCount, + $filteredOutCount, + $rawUploadCount, count($merged) ); $emitStep('retrieval', 'Retrieval', $retrievalDetail, $retrievalStatus); // Cap pool to reranker top-K for synthesis $synthesisPool = array_slice($merged, 0, $controls['reranker_top_k']); + + // Hydrate corpus sources with source_url + authority_label via batched dbn_v6 query + $this->hydrateSourceUrls($synthesisPool); + $numberedSources = $this->numberSources($synthesisPool); + $retrievalCounts = [ + 'raw_corpus' => $rawCorpusCount, + 'filtered_website' => $filteredOutCount, + 'post_filter_corpus' => $rawCorpusCount - $filteredOutCount, + 'raw_upload' => $rawUploadCount, + 'after_dedupe' => count($merged), + 'after_topk' => count($numberedSources), + ]; + // STEP 6: Synthesis $synthesisEngineLabel = $engine === 'azure_full' ? 'Azure gpt-4o' : ($engine === 'gpu' ? 'GPU qwen2.5:14b' : 'Azure gpt-4o-mini'); $emitRunning('synthesis', 'Synthesis', sprintf('Synthesising cited brief with %s — this is the slowest step…', $synthesisEngineLabel)); @@ -270,18 +296,29 @@ final class DbnDeepResearchAgent $confidence === 'low' ? 'warning' : 'complete' ); - // Stitch sub-question chunk_ids + // Stitch sub-question chunk_ids + top_sources (top 3 sources matched by each sub-Q) $subQOut = []; foreach ($retrievalQueries as $sq) { $matchedChunks = array_values(array_filter( $numberedSources, fn(array $s) => in_array($sq['id'], $s['matched_sub_questions'] ?? [], true) )); + $topSources = array_slice($matchedChunks, 0, 3); $subQOut[] = [ - 'id' => $sq['id'], - 'question' => $sq['question'], - 'rationale' => $sq['rationale'] ?? '', - 'chunk_ids' => array_values(array_map(fn(array $s) => $s['chunk_id'], $matchedChunks)), + 'id' => $sq['id'], + 'question' => $sq['question'], + 'rationale' => $sq['rationale'] ?? '', + 'chunk_ids' => array_values(array_map(fn(array $s) => $s['chunk_id'], $matchedChunks)), + 'top_sources' => array_map(fn(array $s) => [ + 'n' => $s['n'] ?? null, + 'title' => $s['title'] ?? '', + 'section' => $s['section'] ?? null, + 'deep_link' => $s['deep_link'] ?? $s['source_url'] ?? null, + 'source_url' => $s['source_url'] ?? null, + 'source_origin' => $s['source_origin'] ?? 'corpus', + 'authority_label'=> $s['authority_label'] ?? null, + 'excerpt' => $s['excerpt'] ?? '', + ], $topSources), ]; } @@ -305,6 +342,7 @@ final class DbnDeepResearchAgent 'engine_used' => $engine, 'citation_confidence' => $confidence, 'elapsed_ms_per_step' => $this->stepTimings, + 'retrieval_counts' => $retrievalCounts, 'slices_active' => array_keys(array_filter($sliceSelectionNormalized)), ], 'disclaimer' => dbnToolsDisclaimer($language), @@ -553,7 +591,7 @@ PROMPT; 'chunk_id' => isset($chunk['id']) ? (int)$chunk['id'] : null, 'title' => (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source'), 'section' => $chunk['section_title'] ?? null, - 'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Norge'), + 'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Legal'), 'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620), 'chunk_text' => (string)($chunk['content'] ?? ''), 'similarity' => $similarity, @@ -562,10 +600,90 @@ PROMPT; 'source_origin' => 'corpus', 'authority_type' => $chunk['authority_type'] ?? null, 'jurisdiction' => $chunk['jurisdiction'] ?? null, + 'publication_year' => $chunk['publication_year'] ?? null, + // Filled in later by hydrateSourceUrls() + 'source_url' => null, + 'deep_link' => null, + 'authority_label' => null, + 'corpus_source_name'=> null, + 'publication_date' => null, 'matched_sub_questions' => [$subQId], ]; } + /** + * Defensive post-filter: drop any chunk that smells like a marketing-website hit + * (dobetternorge.no marketing pages have source_group 'website-primary'/'website-beta' + * but the chunk payload only carries `source_name` — use a name+title regex check). + */ + private function isWebsiteChunk(array $chunk): bool + { + $name = strtolower((string)($chunk['source_name'] ?? '')); + $title = strtolower((string)($chunk['document_title'] ?? $chunk['title'] ?? '')); + if ($name === '') return false; + // Trusted shared-corpus packages do not contain the word 'website'. Marketing + // sources are explicitly labelled with source_group=website-primary/beta upstream. + if (str_contains($name, 'website')) return true; + if (str_contains($title, 'dobetternorge.no')) return true; + if (preg_match('/^(homepage|landing|about |contact )/i', $title)) return true; + return false; + } + + /** + * Hydrate the synthesisPool in place with source_url/deep_link/authority_label/etc. + * One batched dbn_v6 query for all unique document_ids. + */ + private function hydrateSourceUrls(array &$pool): void + { + $docIds = []; + foreach ($pool as $chunk) { + if (($chunk['source_origin'] ?? 'corpus') !== 'corpus') continue; + $docId = (int)($chunk['document_id'] ?? 0); + if ($docId > 0) $docIds[$docId] = true; + } + if (empty($docIds)) return; + + try { + $meta = dbnV6QueryDocumentMeta(dbnToolsDb(), dbnToolsRagDb(), array_keys($docIds)); + } catch (Throwable $e) { + error_log('DBN deep research hydrateSourceUrls failed: ' . $e->getMessage()); + return; + } + + foreach ($pool as &$chunk) { + if (($chunk['source_origin'] ?? 'corpus') !== 'corpus') continue; + $docId = (int)($chunk['document_id'] ?? 0); + if (!$docId || !isset($meta[$docId])) continue; + $m = $meta[$docId]; + $sourceUrl = $m['source_url'] ?? null; + $chunk['source_url'] = $sourceUrl; + $chunk['deep_link'] = $this->buildDeepLink($sourceUrl, $chunk['section'] ?? null); + $chunk['authority_label'] = $m['authority_label'] ?? $chunk['authority_label']; + $chunk['corpus_source_name'] = $m['corpus_source_name'] ?? null; + $chunk['publication_date'] = $m['publication_date'] ?? null; + } + unset($chunk); + } + + /** + * Construct a clickable URL into the original article. Lovdata supports + * path-style section anchors (e.g. /§43). For other hosts we return the + * document root URL. + */ + private function buildDeepLink(?string $sourceUrl, ?string $sectionTitle): ?string + { + if (!$sourceUrl) return null; + $sourceUrl = trim($sourceUrl); + if ($sourceUrl === '') return null; + + if (preg_match('~^https?://lovdata\.no/~i', $sourceUrl) + && $sectionTitle + && preg_match('/§\s?(\d+[A-Za-z\-]?)/u', $sectionTitle, $m)) { + return rtrim($sourceUrl, '/') . '/§' . $m[1]; + } + return $sourceUrl; + } + private function mergeAndDedupe(array $rawPool, int $cap): array { $byKey = []; @@ -636,12 +754,14 @@ PROMPT; $sourcesContext = []; foreach ($numberedSources as $s) { $sourcesContext[] = sprintf( - "[%d] (%s) %s%s\n Corpus: %s\n Excerpt: %s", + "[%d] (%s) %s%s\n Corpus: %s\n Authority: %s | Jurisdiction: %s\n Excerpt: %s", $s['n'], $s['source_origin'] === 'upload' ? 'uploaded doc' : 'corpus', $s['title'], !empty($s['section']) ? ' — ' . $s['section'] : '', $s['package_or_corpus'], + $s['authority_label'] ?? ($s['authority_type'] ?? 'n/a'), + $s['jurisdiction'] ?? 'n/a', $s['excerpt'] ); } @@ -657,6 +777,11 @@ PROMPT; $subQText = "\nSub-questions explored:\n" . implode("\n", $lines); } + $sourceCount = count($numberedSources); + $lengthGuidance = $sourceCount >= 3 + ? '400-900 words, minimum 4 paragraphs, with clear paragraph breaks. Cover EACH sub-question above in its own paragraph.' + : '250-450 words, 2-3 short paragraphs. Note when evidence is thin.'; + $prompt = <<= 3)"], + "next_practical_step": "one concrete next action the user can take to strengthen the case or close a gap" } Rules: - Every factual claim in `brief_markdown` must end with one or more `[n]` markers. -- If no source supports a point, omit the point. +- If no source supports a point, omit the point — DO NOT speculate. +- Prefer pinpointing statute sections (e.g. "Barneloven §43") and case names verbatim from the source excerpts. +- When multiple sources support the same point, cite all of them (e.g. `[2,4]`). - Respond in {$locale}. -- Output valid JSON only — no markdown fences around the JSON. +- Output valid JSON only — no markdown fences around the JSON object itself. PROMPT; $messages = [ ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'], ['role' => 'user', 'content' => $prompt], ]; - $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 2200, 'timeout' => 120]; + $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3200, 'timeout' => 180]; try { if ($engine === 'gpu') {