Deep Research v2: exclude marketing site, deep-link sources, per-agent reports

Three user-flagged issues after the first real run with a 920KB sakkyndig PDF:

1. dobetternorge.no marketing-website chunks leaked into the retrieval pool.
   ClientRagPipeline::searchAll defaults include_beta_website=true; we now
   pass false for both website flags, AND defensively drop any returned
   chunk whose source_name contains "website" or title contains
   "dobetternorge.no" before it can pollute synthesis.

2. Brief returned was "just a paragraph". Bumped synthesis max_tokens
   2200→3200, raised timeout 120→180s, and rewrote the prompt to require
   400-900 words with min 4 paragraphs when source_count>=3, covering EACH
   sub-question in its own paragraph. Now also passes authority + jurisdiction
   into the sources block so the model can pinpoint statutes correctly.

3. No way to see what each "sub-question agent" researched or click through
   to the source articles. Restructured the results panel so per-sub-question
   report cards now render ABOVE the synthesised brief. Each report shows the
   question, the rationale, and the top 3 retrieved sources for that sub-Q
   with title→deep link + 1-line excerpt. Brief follows. Consolidated
   numbered sources list at the bottom, with titles as deep links too.

Deep-link construction: source_url is hydrated via dbnV6QueryDocumentMeta
in a single batched call after retrieval. For Lovdata sources with a
section_title containing §<n>, the link is path-anchored to that section
(/§43). For other hosts (HUDOC, Regjeringen, Bufdir, etc.) we link to the
document root URL.

Telemetry: trace_metadata now carries retrieval_counts {raw_corpus,
filtered_website, post_filter_corpus, raw_upload, after_dedupe, after_topk}
so future regressions are diagnosable from the metadata.jsonl log alone.
The completion status pill surfaces the corpus/website/upload split.
This commit is contained in:
2026-05-15 11:12:13 +02:00
parent a1a7f442a7
commit e130db8119
3 changed files with 351 additions and 39 deletions
+124
View File
@@ -2176,3 +2176,127 @@ p {
.dr-source-card { grid-template-columns: 32px 1fr; }
.dr-source-aside { display: none; }
}
/* Per-sub-question agent report cards (v2) */
.dr-subq-list {
display: grid;
gap: 10px;
}
.dr-subq-report {
border: 1px solid var(--line);
border-radius: 8px;
padding: 12px 13px;
background: #fbfcfe;
}
.dr-subq-report__head {
display: grid;
grid-template-columns: auto 1fr;
gap: 10px;
align-items: start;
margin-bottom: 10px;
}
.dr-subq-report__index {
display: inline-flex;
align-items: center;
justify-content: center;
min-width: 30px;
height: 24px;
padding: 0 8px;
border-radius: 999px;
background: var(--soft-teal);
color: var(--teal-dark);
font-weight: 800;
font-variant-numeric: tabular-nums;
font-size: 0.78rem;
letter-spacing: 0.04em;
text-transform: uppercase;
}
.dr-subq-report__question {
font-weight: 700;
color: var(--ink);
line-height: 1.4;
}
.dr-subq-report__rationale {
margin-top: 4px;
color: var(--muted);
font-size: 0.86rem;
line-height: 1.45;
}
.dr-mini-source-list {
list-style: none;
padding: 0;
margin: 0;
display: grid;
gap: 6px;
}
.dr-mini-source {
display: grid;
grid-template-columns: 32px 1fr;
gap: 8px;
align-items: start;
padding: 8px 10px;
background: #fff;
border: 1px solid var(--line);
border-radius: 6px;
}
.dr-mini-source--empty {
display: block;
color: var(--muted);
padding: 8px 10px;
}
.dr-mini-source__n {
font-variant-numeric: tabular-nums;
color: var(--coral);
font-weight: 800;
font-size: 0.85rem;
}
.dr-mini-source__title {
display: inline-block;
font-weight: 700;
color: var(--ink);
text-decoration: none;
line-height: 1.35;
}
a.dr-mini-source__title:hover { color: var(--teal-dark); text-decoration: underline; }
.dr-mini-source__meta {
color: var(--muted);
font-size: 0.78rem;
margin-top: 3px;
}
.dr-mini-source__excerpt {
color: var(--muted);
font-size: 0.86rem;
line-height: 1.45;
margin-top: 5px;
}
.dr-external-link {
display: inline-block;
color: var(--teal);
font-size: 0.8em;
margin-left: 3px;
vertical-align: 1px;
}
a.dr-source-title-link {
color: var(--ink);
text-decoration: none;
}
a.dr-source-title-link:hover {
color: var(--teal-dark);
text-decoration: underline;
}
+76 -15
View File
@@ -346,8 +346,12 @@
lastResult = finalResult;
const meta = finalResult.trace_metadata || {};
const rc = meta.retrieval_counts || {};
const countSummary = (rc.post_filter_corpus != null)
? `${rc.post_filter_corpus} corpus${rc.filtered_website ? ` (${rc.filtered_website} website filtered)` : ''}${rc.raw_upload ? ` + ${rc.raw_upload} upload` : ''}`
: `${meta.source_count || 0} sources`;
setStatus(
`Done in ${Math.round((finalResult.latency_ms || 0) / 1000)} s · ${meta.source_count || 0} sources · confidence ${meta.citation_confidence || '?'}`,
`Done in ${Math.round((finalResult.latency_ms || 0) / 1000)} s · ${countSummary} · confidence ${meta.citation_confidence || '?'}`,
'ok'
);
els.runButton.disabled = false;
@@ -425,19 +429,23 @@
const briefHtml = renderBrief(data.brief_markdown || '', sources);
const subQHtml = subs.length ? `
// Per-sub-question report cards — the "what each agent researched" view
const subQReportsHtml = subs.length ? `
<div class="dr-result-block">
<h3 style="margin:0 0 8px;font-size:1rem">Angles the agent explored</h3>
<ol style="padding-left:1.2em;margin:0;color:var(--muted);line-height:1.55">
${subs.map((sq) => `<li><strong style="color:var(--ink)">${escapeHtml(sq.question)}</strong>${sq.rationale ? `<br><small>${escapeHtml(sq.rationale)}</small>` : ''}</li>`).join('')}
</ol>
<div class="dr-sources-head">
<h3>What each sub-question agent researched</h3>
<small>${subs.length} sub-question${subs.length === 1 ? '' : 's'}, top 3 sources each</small>
</div>
<div class="dr-subq-list">
${subs.map((sq, i) => renderSubQReport(sq, i)).join('')}
</div>
</div>` : '';
const sourcesHtml = `
<div class="dr-result-block">
<div class="dr-sources-head">
<h3>Sources (${sources.length})</h3>
<small>Click a card to see the full chunk + scores</small>
<h3>All sources (${sources.length})</h3>
<small>Click a card to see the full chunk + scores · external link opens the original article</small>
</div>
<div class="dr-source-list">
${sources.map((s) => renderSourceCard(s)).join('')}
@@ -459,18 +467,20 @@
</div>` : '';
els.results.innerHTML = `
${subQReportsHtml}
<div class="dr-result-block">
<h3 style="margin:0 0 10px;font-size:1rem">Synthesised brief</h3>
<div class="dr-brief">${briefHtml}</div>
</div>
${subQHtml}
${sourcesHtml}
${uncertHtml}
${nextHtml}
`;
// Bind source-card click handlers + citation marker click handlers
els.results.querySelectorAll('[data-source-n]').forEach((node) => {
node.addEventListener('click', () => {
// Bind source-card click handlers (open modal) — but ignore clicks on inner <a>
els.results.querySelectorAll('.dr-source-card[data-source-n]').forEach((node) => {
node.addEventListener('click', (e) => {
if (e.target.closest('a')) return; // let anchor handle its own click
const n = parseInt(node.dataset.sourceN, 10);
const src = sources.find((s) => s.n === n);
if (src) {
@@ -479,6 +489,52 @@
}
});
});
// Bind inline citation markers in brief → flash + open modal
els.results.querySelectorAll('.dr-cite[data-source-n]').forEach((node) => {
node.addEventListener('click', (e) => {
if (e.target.closest('a')) return;
const n = parseInt(node.dataset.sourceN, 10);
const src = sources.find((s) => s.n === n);
if (src) {
flashSource(n);
}
});
});
}
function renderSubQReport(sq, idx) {
const top = sq.top_sources || [];
const sourceItems = top.length
? top.map((s) => {
const link = s.deep_link || s.source_url;
const titleHtml = link
? `<a href="${escapeHtml(link)}" target="_blank" rel="noopener" class="dr-mini-source__title">${escapeHtml(s.title || 'Untitled')} <span class="dr-external-link" aria-hidden="true">↗</span></a>`
: `<span class="dr-mini-source__title">${escapeHtml(s.title || 'Untitled')}</span>`;
const meta = [];
if (s.section) meta.push(escapeHtml(s.section));
if (s.authority_label) meta.push(escapeHtml(s.authority_label));
if (s.source_origin === 'upload') meta.push('your upload');
return `<li class="dr-mini-source">
<span class="dr-mini-source__n">[${s.n ?? '?'}]</span>
<div class="dr-mini-source__body">
${titleHtml}
${meta.length ? `<div class="dr-mini-source__meta">${meta.join(' · ')}</div>` : ''}
<div class="dr-mini-source__excerpt">${escapeHtml(truncate(s.excerpt || '', 180))}</div>
</div>
</li>`;
}).join('')
: `<li class="dr-mini-source dr-mini-source--empty"><em>No sources retrieved for this sub-question.</em></li>`;
return `<div class="dr-subq-report">
<div class="dr-subq-report__head">
<span class="dr-subq-report__index">${escapeHtml(sq.id || ('q' + (idx + 1)))}</span>
<div class="dr-subq-report__body">
<div class="dr-subq-report__question">${escapeHtml(sq.question || '')}</div>
${sq.rationale ? `<div class="dr-subq-report__rationale">${escapeHtml(sq.rationale)}</div>` : ''}
</div>
</div>
<ul class="dr-mini-source-list">${sourceItems}</ul>
</div>`;
}
function flashSource(n) {
@@ -495,13 +551,18 @@
const score = s.reranker_score != null ? s.reranker_score : s.similarity;
const originTagClass = s.source_origin === 'upload' ? 'dr-source-tag dr-source-tag--upload' : 'dr-source-tag';
const originLabel = s.source_origin === 'upload' ? 'upload' : 'corpus';
return `<button type="button" class="dr-source-card" data-source-n="${s.n}">
const link = s.deep_link || s.source_url;
const titleHtml = link
? `<a href="${escapeHtml(link)}" target="_blank" rel="noopener" class="dr-source-title-link">${escapeHtml(s.title || 'Untitled')} <span class="dr-external-link" aria-hidden="true">↗</span></a>`
: `${escapeHtml(s.title || 'Untitled')}`;
return `<div class="dr-source-card" data-source-n="${s.n}" role="button" tabindex="0">
<span class="dr-source-number">${s.n}</span>
<div class="dr-source-body">
<div class="dr-source-title">${escapeHtml(s.title || 'Untitled')}</div>
<div class="dr-source-title">${titleHtml}</div>
${s.section ? `<div class="dr-source-meta"><span class="dr-source-tag">${escapeHtml(s.section)}</span></div>` : ''}
<div class="dr-source-meta">
<span class="${originTagClass}">${originLabel}</span>
${s.authority_label ? `<span class="dr-source-tag">${escapeHtml(s.authority_label)}</span>` : ''}
<span class="dr-source-tag dr-source-tag--score">${escapeHtml(s.package_or_corpus || '—')}</span>
${(s.matched_sub_questions || []).map((q) => `<span class="dr-source-tag">${escapeHtml(q)}</span>`).join('')}
</div>
@@ -511,7 +572,7 @@
<span>score<br><b>${score != null ? Number(score).toFixed(2) : '—'}</b></span>
${s.reranker_score != null && s.similarity != null ? `<span>sim<br><b>${Number(s.similarity).toFixed(2)}</b></span>` : ''}
</div>
</button>`;
</div>`;
}
// Markdown renderer — minimal: paragraphs, bold/italic, code, [n] citation badges