Add Deep Research tool — agent + rank/rerank RAG

New surface at /deep-research.php where the user pastes a question or
uploads PDF/DOCX/TXT case files and a LLM-orchestrated agent researches
the Do Better Norge legal corpus from 3-5 angles, with hybrid retrieval,
cross-encoder rerank, and synthesis that emits an inline-[n]-cited
markdown brief plus a numbered sources panel.

Uploaded documents are chunked + embedded in memory only (nomic-embed-text
via LiteLLM) and searched alongside the shared corpus during the same
request — never persisted to disk, DB, or Qdrant.

Reuses ClientRagPipeline::searchAll (hybrid + rerank), dbnV6 slice
helpers, and the existing extract.php text-extraction logic via a new
dbnToolsExtractUploadedFile() helper. Also adds dbnToolsCallGpuLlm()
helper in bootstrap.php — fixes a latent bug where LegalTools.php
was already calling that name with no definition.

Search.php is unchanged.
This commit is contained in:
2026-05-15 10:30:47 +02:00
parent 55e11cb649
commit 4cbe0a4ac4
10 changed files with 2119 additions and 125 deletions
+481
View File
@@ -0,0 +1,481 @@
/* deep-research.js — page-scoped UI for /deep-research.php */
(function () {
'use strict';
const els = {};
let lang = 'en';
let uploadFiles = [];
let lastResult = null;
const SLICE_DEFS = [
{ id: 'family_core', label: 'Family Law Core' },
{ id: 'child_welfare', label: 'Child Welfare' },
{ id: 'echr_hague', label: 'ECHR and Hague' },
{ id: 'broader_legal', label: 'Broader Legal Support' },
];
const STEP_LABELS = [
'Query interpretation',
'Query expansion',
'Slice resolution',
'Upload indexing',
'Retrieval',
'Synthesis',
'Citation confidence',
];
document.addEventListener('DOMContentLoaded', () => {
if (!document.body.dataset.activeTool || document.body.dataset.activeTool !== 'deep-research') return;
Object.assign(els, {
form: document.getElementById('deepResearchForm'),
input: document.getElementById('drInput'),
status: document.getElementById('drStatus'),
runButton: document.getElementById('drRunButton'),
results: document.getElementById('drResults'),
traceList: document.getElementById('traceList'),
slices: Array.from(document.querySelectorAll('.dr-slice')),
langButtons: Array.from(document.querySelectorAll('#drLangSwitcher .lang-btn')),
engineRadios: Array.from(document.querySelectorAll('input[name="drEngine"]')),
subQ: document.getElementById('drSubQ'),
subQVal: document.getElementById('drSubQValue'),
chunkLimit: document.getElementById('drChunkLimit'),
chunkLimitVal: document.getElementById('drChunkLimitValue'),
sim: document.getElementById('drSim'),
simVal: document.getElementById('drSimValue'),
topK: document.getElementById('drTopK'),
topKVal: document.getElementById('drTopKValue'),
temp: document.getElementById('drTemp'),
tempVal: document.getElementById('drTempValue'),
uploadZone: document.getElementById('drUploadZone'),
uploadInput: document.getElementById('drUploadInput'),
uploadPrompt: document.getElementById('drUploadPrompt'),
uploadFileInfo: document.getElementById('drUploadFileInfo'),
uploadFileList: document.getElementById('drUploadFileList'),
uploadClear: document.getElementById('drUploadClear'),
modal: document.getElementById('drSourceModal'),
modalClose: document.getElementById('drSourceModalClose'),
modalTitle: document.getElementById('drSourceModalTitle'),
modalEyebrow: document.getElementById('drSourceModalEyebrow'),
modalMeta: document.getElementById('drSourceModalMeta'),
modalText: document.getElementById('drSourceModalText'),
});
if (!els.form) return;
bindSlices();
bindLang();
bindRanges();
bindUpload();
bindModal();
els.form.addEventListener('submit', onSubmit);
// Pre-render placeholder trace
renderTrace(STEP_LABELS.map((label) => ({ label, detail: 'Waiting…', status: 'idle' })));
});
function bindSlices() {
els.slices.forEach((btn) => {
btn.addEventListener('click', () => {
const isOn = btn.classList.toggle('is-on');
btn.setAttribute('aria-pressed', isOn ? 'true' : 'false');
const badge = btn.querySelector('.dr-slice__badge');
if (badge) badge.textContent = isOn ? 'on' : 'off';
});
});
}
function bindLang() {
els.langButtons.forEach((b) => {
b.addEventListener('click', () => {
els.langButtons.forEach((x) => x.classList.remove('is-active'));
b.classList.add('is-active');
lang = b.dataset.lang || 'en';
});
});
}
function bindRanges() {
const pairs = [
[els.subQ, els.subQVal, (v) => v],
[els.chunkLimit, els.chunkLimitVal, (v) => v],
[els.sim, els.simVal, (v) => Number(v).toFixed(2)],
[els.topK, els.topKVal, (v) => v],
[els.temp, els.tempVal, (v) => Number(v).toFixed(2)],
];
pairs.forEach(([range, label, fmt]) => {
if (!range || !label) return;
const sync = () => { label.textContent = fmt(range.value); };
range.addEventListener('input', sync);
sync();
});
}
function bindUpload() {
if (!els.uploadZone) return;
const onFiles = (fileList) => {
const files = Array.from(fileList || []).slice(0, 5);
if (uploadFiles.length + files.length > 5) {
setStatus('At most 5 files can be uploaded per request.', 'error');
return;
}
files.forEach((f) => {
if (f.size > 4 * 1024 * 1024) {
setStatus(`${f.name} exceeds the 4 MB limit.`, 'error');
return;
}
const ext = (f.name.split('.').pop() || '').toLowerCase();
if (!['pdf', 'docx', 'txt'].includes(ext)) {
setStatus(`${f.name} is not a supported file type.`, 'error');
return;
}
uploadFiles.push(f);
});
renderUploadList();
};
els.uploadInput.addEventListener('change', (e) => onFiles(e.target.files));
els.uploadZone.addEventListener('dragover', (e) => { e.preventDefault(); els.uploadZone.classList.add('is-drop'); });
els.uploadZone.addEventListener('dragleave', () => els.uploadZone.classList.remove('is-drop'));
els.uploadZone.addEventListener('drop', (e) => {
e.preventDefault();
els.uploadZone.classList.remove('is-drop');
onFiles(e.dataTransfer?.files);
});
els.uploadClear?.addEventListener('click', () => {
uploadFiles = [];
els.uploadInput.value = '';
renderUploadList();
});
}
function renderUploadList() {
if (!uploadFiles.length) {
els.uploadFileInfo.classList.add('is-hidden');
els.uploadPrompt.classList.remove('is-hidden');
return;
}
els.uploadPrompt.classList.add('is-hidden');
els.uploadFileInfo.classList.remove('is-hidden');
els.uploadFileList.innerHTML = uploadFiles.map((f, i) => {
const kb = (f.size / 1024).toFixed(0);
return `<li><span class="upload-filename">${escapeHtml(f.name)}</span><span class="upload-chars">${kb} KB</span></li>`;
}).join('');
}
function bindModal() {
els.modalClose?.addEventListener('click', closeModal);
els.modal?.addEventListener('click', (e) => {
if (e.target === els.modal) closeModal();
});
document.addEventListener('keydown', (e) => {
if (e.key === 'Escape' && els.modal && !els.modal.classList.contains('is-hidden')) closeModal();
});
}
function closeModal() {
els.modal?.classList.add('is-hidden');
}
function openModal(source) {
if (!source) return;
els.modalEyebrow.textContent = source.source_origin === 'upload' ? 'Uploaded file' : 'Corpus source';
els.modalTitle.textContent = source.title || 'Source';
const metaRows = [
['Number', `[${source.n}]`],
source.section ? ['Section', source.section] : null,
['Corpus / package', source.package_or_corpus || '—'],
source.authority_type ? ['Authority', source.authority_type] : null,
source.jurisdiction ? ['Jurisdiction', source.jurisdiction] : null,
source.similarity != null ? ['Similarity', String(source.similarity)] : null,
source.reranker_score != null ? ['Rerank score', String(source.reranker_score)] : null,
source.matched_sub_questions?.length ? ['Matched sub-Q', source.matched_sub_questions.join(', ')] : null,
].filter(Boolean);
els.modalMeta.innerHTML = '<dl>' + metaRows.map(([k, v]) => `<dt>${escapeHtml(k)}</dt><dd>${escapeHtml(String(v))}</dd>`).join('') + '</dl>';
els.modalText.textContent = source.chunk_text || source.excerpt || '';
els.modal.classList.remove('is-hidden');
}
function getSelectedSlices() {
const out = {};
SLICE_DEFS.forEach((s) => {
const btn = els.slices.find((b) => b.dataset.slice === s.id);
out[s.id] = !!(btn && btn.classList.contains('is-on'));
});
return out;
}
function getEngine() {
const checked = els.engineRadios.find((r) => r.checked);
return checked ? checked.value : 'azure_mini';
}
function getControls() {
return {
sub_q_count: parseInt(els.subQ.value, 10),
chunk_limit: parseInt(els.chunkLimit.value, 10),
similarity_threshold: parseFloat(els.sim.value),
reranker_top_k: parseInt(els.topK.value, 10),
temperature: parseFloat(els.temp.value),
};
}
async function onSubmit(e) {
e.preventDefault();
const query = (els.input.value || '').trim();
if (!query && uploadFiles.length === 0) {
setStatus('Type a question or upload a file before running deep research.', 'error');
return;
}
const slices = getSelectedSlices();
if (!Object.values(slices).some(Boolean)) {
setStatus('Enable at least one corpus slice.', 'error');
return;
}
setStatus('Running deep research…', 'busy');
els.runButton.disabled = true;
els.results.innerHTML = `<div class="empty-state"><h3>Working…</h3><p>The agent is expanding the question, retrieving from the corpus, and synthesising the brief. This usually takes 615 seconds.</p></div>`;
// Render placeholder trace with first step running
const placeholder = STEP_LABELS.map((label, i) => ({
label,
detail: i === 0 ? 'Running…' : 'Queued',
status: i === 0 ? 'running' : 'idle',
}));
renderTrace(placeholder);
const payload = {
query,
paste_text: '',
slices,
engine: getEngine(),
language: lang,
controls: getControls(),
};
let response;
try {
if (uploadFiles.length > 0) {
const form = new FormData();
form.append('payload', JSON.stringify(payload));
uploadFiles.forEach((f) => form.append('files[]', f));
response = await fetch('api/deep-research.php', { method: 'POST', body: form, credentials: 'same-origin' });
} else {
response = await fetch('api/deep-research.php', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
credentials: 'same-origin',
});
}
} catch (err) {
setStatus(`Network error: ${err.message || err}`, 'error');
els.runButton.disabled = false;
return;
}
let data = null;
try { data = await response.json(); } catch (_) {}
if (!response.ok || !data || data.ok === false) {
const msg = (data && data.error && data.error.message) || `Request failed (${response.status}).`;
setStatus(msg, 'error');
els.runButton.disabled = false;
renderTrace(placeholder.map((s, i) => i === 0 ? { ...s, status: 'error', detail: msg } : s));
return;
}
lastResult = data;
setStatus(`Done in ${data.latency_ms || 0} ms · ${data.trace_metadata?.source_count || 0} sources · confidence ${data.trace_metadata?.citation_confidence || '?'}`, 'ok');
els.runButton.disabled = false;
renderTrace(data.trace || []);
renderResults(data);
}
function setStatus(message, kind) {
els.status.textContent = message;
els.status.style.color = kind === 'error' ? '#b41e1e' : (kind === 'ok' ? 'var(--teal-dark)' : 'var(--muted)');
}
function renderTrace(steps) {
if (!els.traceList) return;
els.traceList.classList.add('is-rich');
els.traceList.innerHTML = steps.map((step, i) => {
const statusClass = step.status === 'running' ? 'is-running'
: step.status === 'complete' ? 'is-done'
: step.status === 'warning' ? 'is-warning'
: step.status === 'error' ? 'is-error'
: '';
const marker = step.status === 'complete' ? '✓'
: step.status === 'warning' ? '!'
: step.status === 'error' ? '×'
: (i + 1);
return `<li class="trace-step ${statusClass}">
<span class="trace-step__marker">${marker}</span>
<div>
<span class="trace-step__label">${escapeHtml(step.label || '')}</span>
<span class="trace-step__detail">${escapeHtml(step.detail || '')}</span>
</div>
</li>`;
}).join('');
}
function renderResults(data) {
const sources = data.sources || [];
const subs = data.sub_questions || [];
const briefHtml = renderBrief(data.brief_markdown || '', sources);
const subQHtml = subs.length ? `
<div class="dr-result-block">
<h3 style="margin:0 0 8px;font-size:1rem">Angles the agent explored</h3>
<ol style="padding-left:1.2em;margin:0;color:var(--muted);line-height:1.55">
${subs.map((sq) => `<li><strong style="color:var(--ink)">${escapeHtml(sq.question)}</strong>${sq.rationale ? `<br><small>${escapeHtml(sq.rationale)}</small>` : ''}</li>`).join('')}
</ol>
</div>` : '';
const sourcesHtml = `
<div class="dr-result-block">
<div class="dr-sources-head">
<h3>Sources (${sources.length})</h3>
<small>Click a card to see the full chunk + scores</small>
</div>
<div class="dr-source-list">
${sources.map((s) => renderSourceCard(s)).join('')}
</div>
</div>`;
const uncertHtml = (data.what_remains_uncertain || []).length ? `
<div class="dr-result-block">
<h3 style="margin:0 0 8px;font-size:0.95rem;color:var(--muted)">What remains uncertain</h3>
<ul style="padding-left:1.2em;margin:0;color:var(--muted);line-height:1.55">
${(data.what_remains_uncertain || []).map((u) => `<li>${escapeHtml(String(u))}</li>`).join('')}
</ul>
</div>` : '';
const nextHtml = data.next_practical_step ? `
<div class="dr-result-block">
<h3 style="margin:0 0 6px;font-size:0.95rem">Next practical step</h3>
<p style="margin:0;color:var(--ink);line-height:1.5">${escapeHtml(data.next_practical_step)}</p>
</div>` : '';
els.results.innerHTML = `
<div class="dr-result-block">
<div class="dr-brief">${briefHtml}</div>
</div>
${subQHtml}
${sourcesHtml}
${uncertHtml}
${nextHtml}
`;
// Bind source-card click handlers + citation marker click handlers
els.results.querySelectorAll('[data-source-n]').forEach((node) => {
node.addEventListener('click', () => {
const n = parseInt(node.dataset.sourceN, 10);
const src = sources.find((s) => s.n === n);
if (src) {
openModal(src);
flashSource(n);
}
});
});
}
function flashSource(n) {
document.querySelectorAll('.dr-source-card.is-highlight').forEach((c) => c.classList.remove('is-highlight'));
const target = document.querySelector(`.dr-source-card[data-source-n="${n}"]`);
if (target) {
target.classList.add('is-highlight');
target.scrollIntoView({ behavior: 'smooth', block: 'center' });
setTimeout(() => target.classList.remove('is-highlight'), 1800);
}
}
function renderSourceCard(s) {
const score = s.reranker_score != null ? s.reranker_score : s.similarity;
const originTagClass = s.source_origin === 'upload' ? 'dr-source-tag dr-source-tag--upload' : 'dr-source-tag';
const originLabel = s.source_origin === 'upload' ? 'upload' : 'corpus';
return `<button type="button" class="dr-source-card" data-source-n="${s.n}">
<span class="dr-source-number">${s.n}</span>
<div class="dr-source-body">
<div class="dr-source-title">${escapeHtml(s.title || 'Untitled')}</div>
${s.section ? `<div class="dr-source-meta"><span class="dr-source-tag">${escapeHtml(s.section)}</span></div>` : ''}
<div class="dr-source-meta">
<span class="${originTagClass}">${originLabel}</span>
<span class="dr-source-tag dr-source-tag--score">${escapeHtml(s.package_or_corpus || '—')}</span>
${(s.matched_sub_questions || []).map((q) => `<span class="dr-source-tag">${escapeHtml(q)}</span>`).join('')}
</div>
<p class="dr-source-excerpt">${escapeHtml(truncate(s.excerpt || '', 240))}</p>
</div>
<div class="dr-source-aside">
<span>score<br><b>${score != null ? Number(score).toFixed(2) : '—'}</b></span>
${s.reranker_score != null && s.similarity != null ? `<span>sim<br><b>${Number(s.similarity).toFixed(2)}</b></span>` : ''}
</div>
</button>`;
}
// Markdown renderer — minimal: paragraphs, bold/italic, code, [n] citation badges
function renderBrief(markdown, sources) {
if (!markdown) return '<p><em>No brief was returned.</em></p>';
const sourceSet = new Set((sources || []).map((s) => s.n));
const escaped = escapeHtml(markdown);
// Citation markers [1], [1,2], [1-3]
const withCites = escaped.replace(/\[(\d+(?:\s*[-,]\s*\d+)*)\]/g, (_, group) => {
const nums = expandCiteGroup(group);
return nums.map((n) => {
const known = sourceSet.has(n);
const cls = known ? 'dr-cite' : 'dr-cite';
return `<span class="${cls}" data-source-n="${n}" role="button" tabindex="0">${n}</span>`;
}).join('');
});
// Bold/italic
const withBold = withCites
.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>')
.replace(/(^|[^*])\*([^*]+)\*(?!\*)/g, '$1<em>$2</em>')
.replace(/`([^`]+)`/g, '<code>$1</code>');
// Paragraphs
const paragraphs = withBold.split(/\n{2,}/).map((p) => {
const t = p.trim();
if (!t) return '';
if (/^### /.test(t)) return `<h4 style="margin:14px 0 6px;color:var(--ink);font-size:1rem">${t.replace(/^### /, '')}</h4>`;
return `<p>${t.replace(/\n/g, '<br>')}</p>`;
}).join('');
return paragraphs;
}
function expandCiteGroup(group) {
const out = [];
group.split(',').forEach((part) => {
const range = part.trim().match(/^(\d+)\s*-\s*(\d+)$/);
if (range) {
const a = parseInt(range[1], 10);
const b = parseInt(range[2], 10);
for (let i = a; i <= b; i++) out.push(i);
} else {
const n = parseInt(part.trim(), 10);
if (!Number.isNaN(n)) out.push(n);
}
});
return Array.from(new Set(out));
}
function escapeHtml(s) {
return String(s)
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');
}
function truncate(s, n) {
if (!s) return '';
if (s.length <= n) return s;
return s.slice(0, n - 1) + '…';
}
})();