diff --git a/api/deep-research.php b/api/deep-research.php
new file mode 100644
index 0000000..b54366a
--- /dev/null
+++ b/api/deep-research.php
@@ -0,0 +1,67 @@
+ 5) {
+ dbnToolsAbort('At most 5 files can be uploaded per request.', 413, 'too_many_files');
+ }
+ for ($i = 0; $i < $count; $i++) {
+ $file = [
+ 'name' => $_FILES['files']['name'][$i] ?? '',
+ 'type' => $_FILES['files']['type'][$i] ?? '',
+ 'tmp_name' => $_FILES['files']['tmp_name'][$i] ?? '',
+ 'error' => $_FILES['files']['error'][$i] ?? UPLOAD_ERR_NO_FILE,
+ 'size' => $_FILES['files']['size'][$i] ?? 0,
+ ];
+ $extracted = dbnToolsExtractUploadedFile($file);
+ $uploadedFiles[] = [
+ 'filename' => $extracted['filename'],
+ 'text' => $extracted['text'],
+ 'chars' => $extracted['chars'],
+ 'truncated' => $extracted['truncated'],
+ ];
+ }
+ }
+
+ return (new DbnDeepResearchAgent())->run(
+ $seedQuery,
+ $pastedText,
+ $uploadedFiles,
+ is_array($sliceInput) ? $sliceInput : [],
+ $engine,
+ $language,
+ $controls
+ );
+});
diff --git a/api/extract.php b/api/extract.php
index b3cd7e7..613ffdc 100644
--- a/api/extract.php
+++ b/api/extract.php
@@ -6,132 +6,16 @@ require_once __DIR__ . '/../includes/bootstrap.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
-const EXTRACT_MAX_BYTES = 4 * 1024 * 1024;
-const EXTRACT_TEXT_LIMIT = 128000;
-const EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx'];
-
try {
if (empty($_FILES['file']) || !is_array($_FILES['file'])) {
dbnToolsError('No file was uploaded.', 422, 'missing_file');
}
- $file = $_FILES['file'];
- $errCode = (int)($file['error'] ?? UPLOAD_ERR_NO_FILE);
-
- if ($errCode !== UPLOAD_ERR_OK) {
- $msg = match ($errCode) {
- UPLOAD_ERR_INI_SIZE, UPLOAD_ERR_FORM_SIZE => 'The file exceeds the allowed size limit.',
- UPLOAD_ERR_NO_TMP_DIR => 'No temporary directory is available.',
- UPLOAD_ERR_CANT_WRITE => 'Unable to save the uploaded file.',
- default => 'File upload failed.',
- };
- dbnToolsError($msg, 422, 'upload_error');
- }
-
- $originalName = basename((string)($file['name'] ?? ''));
- $tmpPath = (string)($file['tmp_name'] ?? '');
- $size = (int)($file['size'] ?? 0);
-
- if (!is_uploaded_file($tmpPath)) {
- dbnToolsError('Invalid file upload.', 400, 'invalid_upload');
- }
- if ($size === 0) {
- dbnToolsError('The uploaded file is empty.', 422, 'file_empty');
- }
- if ($size > EXTRACT_MAX_BYTES) {
- dbnToolsError('File exceeds the 4 MB limit.', 413, 'file_too_large');
- }
-
- $ext = strtolower(pathinfo($originalName, PATHINFO_EXTENSION));
- if (!in_array($ext, EXTRACT_ALLOWED_EXTS, true)) {
- dbnToolsError('Unsupported file type. Upload a .pdf, .docx, or .txt file.', 422, 'unsupported_type');
- }
-
- $text = match ($ext) {
- 'txt' => extractTxt($tmpPath),
- 'pdf' => extractPdf($tmpPath),
- 'docx' => extractDocx($tmpPath),
- };
-
- $text = trim($text);
- if ($text === '') {
- dbnToolsError('No text could be extracted from this file.', 422, 'no_text');
- }
-
- $truncated = false;
- if (mb_strlen($text, 'UTF-8') > EXTRACT_TEXT_LIMIT) {
- $text = mb_substr($text, 0, EXTRACT_TEXT_LIMIT, 'UTF-8');
- $truncated = true;
- }
-
- dbnToolsRespond([
- 'ok' => true,
- 'text' => $text,
- 'filename' => $originalName,
- 'chars' => mb_strlen($text, 'UTF-8'),
- 'truncated' => $truncated,
- ]);
+ $result = dbnToolsExtractUploadedFile($_FILES['file']);
+ dbnToolsRespond($result);
} catch (DbnToolsHttpException $e) {
dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra);
} catch (Throwable $e) {
error_log('DBN extract error: ' . $e->getMessage());
dbnToolsError('Text extraction failed.', 500, 'extract_error');
}
-
-function extractTxt(string $path): string
-{
- $content = file_get_contents($path);
- if ($content === false) {
- throw new DbnToolsHttpException('Unable to read the file.', 500, 'read_error');
- }
- return mb_convert_encoding($content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252');
-}
-
-function extractPdf(string $path): string
-{
- $cmd = 'pdftotext ' . escapeshellarg($path) . ' - 2>/dev/null';
- $output = shell_exec($cmd);
- if ($output === null || $output === false || trim($output) === '') {
- throw new DbnToolsHttpException(
- 'PDF text extraction failed. The file may be image-only or encrypted.',
- 422,
- 'pdf_extract_failed'
- );
- }
- return $output;
-}
-
-function extractDocx(string $path): string
-{
- $zip = new ZipArchive();
- $result = $zip->open($path);
- if ($result !== true) {
- throw new DbnToolsHttpException('Unable to open the .docx file.', 422, 'docx_open_failed');
- }
-
- $xml = $zip->getFromName('word/document.xml');
- $zip->close();
-
- if ($xml === false) {
- throw new DbnToolsHttpException('No document content found in this .docx file.', 422, 'docx_no_content');
- }
-
- $doc = new DOMDocument();
- libxml_use_internal_errors(true);
- $doc->loadXML($xml);
- libxml_clear_errors();
-
- $xpath = new DOMXPath($doc);
- $xpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
-
- $paragraphs = [];
- foreach ($xpath->query('//w:p') as $para) {
- $runs = [];
- foreach ($xpath->query('.//w:t', $para) as $t) {
- $runs[] = $t->textContent;
- }
- $paragraphs[] = implode('', $runs);
- }
-
- return implode("\n", $paragraphs);
-}
diff --git a/assets/css/tools.css b/assets/css/tools.css
index e3536ac..cc1902f 100644
--- a/assets/css/tools.css
+++ b/assets/css/tools.css
@@ -1701,3 +1701,478 @@ p {
font-weight: 500;
margin: 0;
}
+
+/* =========================================================================
+ Deep Research — agent + rank/rerank RAG surface
+ ========================================================================= */
+
+.deep-research .lang-switcher {
+ display: inline-flex;
+ gap: 6px;
+}
+
+.deep-research .lang-btn {
+ padding: 6px 10px;
+ border-radius: 999px;
+ background: #fff;
+ border: 1px solid var(--line);
+ color: var(--muted);
+ font-weight: 700;
+}
+
+.deep-research .lang-btn.is-active {
+ background: var(--soft-teal);
+ color: var(--teal-dark);
+ border-color: rgba(15, 118, 110, 0.30);
+}
+
+.dr-slice-section {
+ display: grid;
+ gap: 8px;
+}
+
+.dr-slice-grid {
+ display: grid;
+ grid-template-columns: repeat(2, minmax(0, 1fr));
+ gap: 10px;
+}
+
+.dr-slice {
+ text-align: left;
+ background: #fbfcfe;
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ padding: 12px 13px;
+ cursor: pointer;
+ min-height: 96px;
+ display: grid;
+ gap: 6px;
+ align-content: start;
+ transition: border-color 120ms ease, background 120ms ease;
+}
+
+.dr-slice:hover {
+ border-color: rgba(15, 118, 110, 0.30);
+}
+
+.dr-slice.is-on {
+ background: var(--soft-teal);
+ border-color: rgba(15, 118, 110, 0.45);
+}
+
+.dr-slice__head {
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
+ gap: 8px;
+}
+
+.dr-slice__title {
+ font-weight: 800;
+ color: var(--ink);
+}
+
+.dr-slice__badge {
+ background: #fff;
+ border: 1px solid var(--line);
+ border-radius: 999px;
+ color: var(--muted);
+ font-size: 0.66rem;
+ font-weight: 800;
+ letter-spacing: 0.06em;
+ padding: 3px 8px;
+ text-transform: uppercase;
+}
+
+.dr-slice.is-on .dr-slice__badge {
+ background: var(--teal);
+ border-color: var(--teal);
+ color: #fff;
+}
+
+.dr-slice__tagline {
+ margin: 0;
+ color: var(--muted);
+ font-size: 0.86rem;
+ line-height: 1.4;
+}
+
+.advanced-panel .dr-control-grid {
+ display: grid;
+ grid-template-columns: repeat(5, minmax(0, 1fr));
+ gap: 8px;
+ margin-top: 10px;
+}
+
+.dr-control-card {
+ background: #fbfcfe;
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ padding: 10px;
+}
+
+.dr-control-card label {
+ display: flex;
+ justify-content: space-between;
+ gap: 8px;
+ align-items: center;
+ font-weight: 800;
+ color: var(--ink);
+ font-size: 0.85rem;
+}
+
+.dr-control-card small {
+ display: block;
+ margin-top: 8px;
+ color: var(--muted);
+ font-size: 0.74rem;
+ line-height: 1.4;
+}
+
+.dr-control-card input[type="range"] {
+ width: 100%;
+ margin-top: 8px;
+ accent-color: var(--teal);
+}
+
+.dr-control-value {
+ color: var(--coral);
+ font-variant-numeric: tabular-nums;
+}
+
+@media (max-width: 980px) {
+ .advanced-panel .dr-control-grid {
+ grid-template-columns: repeat(2, minmax(0, 1fr));
+ }
+ .dr-slice-grid {
+ grid-template-columns: 1fr;
+ }
+}
+
+.deep-research-results {
+ display: grid;
+ gap: 14px;
+}
+
+.dr-result-block {
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ padding: 16px;
+ background: #fff;
+}
+
+.dr-brief {
+ line-height: 1.65;
+ color: var(--ink);
+ font-size: 1.0rem;
+}
+
+.dr-brief p {
+ margin: 0 0 12px;
+}
+
+.dr-brief code {
+ font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
+ background: var(--soft-teal);
+ padding: 1px 5px;
+ border-radius: 4px;
+ font-size: 0.86em;
+}
+
+.dr-brief strong { color: var(--ink); }
+.dr-brief em { color: var(--muted); }
+
+.dr-cite {
+ display: inline-flex;
+ align-items: center;
+ justify-content: center;
+ min-width: 18px;
+ height: 18px;
+ margin: 0 1px;
+ padding: 0 5px;
+ border-radius: 999px;
+ background: var(--soft-coral);
+ color: var(--coral);
+ font-size: 0.72rem;
+ font-weight: 800;
+ font-variant-numeric: tabular-nums;
+ cursor: pointer;
+ border: 1px solid rgba(194, 65, 12, 0.25);
+ vertical-align: 1px;
+}
+
+.dr-cite:hover { background: var(--coral); color: #fff; }
+
+.dr-sources-head {
+ display: flex;
+ align-items: baseline;
+ justify-content: space-between;
+ margin-bottom: 10px;
+}
+
+.dr-sources-head h3 {
+ margin: 0;
+ font-size: 1rem;
+}
+
+.dr-sources-head small {
+ color: var(--muted);
+ font-size: 0.82rem;
+}
+
+.dr-source-list {
+ display: grid;
+ gap: 10px;
+}
+
+.dr-source-card {
+ display: grid;
+ grid-template-columns: 34px 1fr auto;
+ gap: 12px;
+ align-items: start;
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ padding: 12px;
+ background: #fbfcfe;
+ cursor: pointer;
+ text-align: left;
+ width: 100%;
+}
+
+.dr-source-card:hover { border-color: rgba(15, 118, 110, 0.40); }
+
+.dr-source-card.is-highlight {
+ border-color: var(--coral);
+ background: var(--soft-coral);
+}
+
+.dr-source-number {
+ display: inline-flex;
+ align-items: center;
+ justify-content: center;
+ width: 28px;
+ height: 28px;
+ border-radius: 999px;
+ background: var(--soft-coral);
+ color: var(--coral);
+ font-weight: 900;
+ font-variant-numeric: tabular-nums;
+}
+
+.dr-source-body {
+ min-width: 0;
+}
+
+.dr-source-title {
+ font-weight: 800;
+ color: var(--ink);
+ line-height: 1.35;
+}
+
+.dr-source-meta {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 6px;
+ margin-top: 6px;
+}
+
+.dr-source-tag {
+ background: var(--soft-teal);
+ color: var(--teal-dark);
+ border-radius: 999px;
+ font-size: 0.7rem;
+ font-weight: 800;
+ padding: 3px 8px;
+ text-transform: uppercase;
+}
+
+.dr-source-tag--upload { background: #fff0e8; color: #8a4524; }
+.dr-source-tag--score { background: #eef3fb; color: #314158; }
+
+.dr-source-excerpt {
+ color: var(--muted);
+ margin-top: 8px;
+ line-height: 1.5;
+ font-size: 0.92rem;
+}
+
+.dr-source-aside {
+ align-self: stretch;
+ display: grid;
+ grid-template-rows: auto auto;
+ gap: 6px;
+ font-size: 0.78rem;
+ color: var(--muted);
+ text-align: right;
+ min-width: 90px;
+}
+
+.dr-source-aside b {
+ color: var(--ink);
+ font-variant-numeric: tabular-nums;
+ font-size: 0.92rem;
+}
+
+/* Method trace — overrides for #traceList rendered in rich mode */
+.trace-list.is-rich {
+ display: grid;
+ gap: 8px;
+}
+
+.trace-list.is-rich .trace-step {
+ display: grid;
+ grid-template-columns: 28px 1fr;
+ gap: 10px;
+ align-items: start;
+ padding: 10px 12px;
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ background: #fbfcfe;
+ list-style: none;
+}
+
+.trace-list.is-rich .trace-step__marker {
+ display: inline-flex;
+ align-items: center;
+ justify-content: center;
+ width: 22px;
+ height: 22px;
+ border-radius: 999px;
+ border: 1px solid var(--line);
+ background: #fff;
+ color: var(--muted);
+ font-size: 0.72rem;
+ font-weight: 900;
+ font-variant-numeric: tabular-nums;
+}
+
+.trace-list.is-rich .trace-step__label {
+ display: block;
+ font-weight: 800;
+ color: var(--ink);
+ font-size: 0.94rem;
+}
+
+.trace-list.is-rich .trace-step__detail {
+ display: block;
+ margin-top: 4px;
+ color: var(--muted);
+ font-size: 0.83rem;
+ line-height: 1.45;
+}
+
+.trace-list.is-rich .trace-step.is-running {
+ background: var(--soft-coral);
+}
+.trace-list.is-rich .trace-step.is-running .trace-step__marker {
+ background: rgba(194, 65, 12, 0.18);
+ border-color: rgba(194, 65, 12, 0.35);
+ color: var(--coral);
+ animation: drTracePulse 950ms ease-in-out infinite;
+}
+
+.trace-list.is-rich .trace-step.is-done .trace-step__marker {
+ background: var(--soft-teal);
+ border-color: rgba(15, 118, 110, 0.30);
+ color: var(--teal-dark);
+}
+
+.trace-list.is-rich .trace-step.is-warning .trace-step__marker {
+ background: #fff4dc;
+ border-color: rgba(183, 121, 31, 0.35);
+ color: var(--amber);
+}
+
+.trace-list.is-rich .trace-step.is-error {
+ background: #fff0e8;
+}
+.trace-list.is-rich .trace-step.is-error .trace-step__marker {
+ background: rgba(180, 30, 30, 0.10);
+ border-color: rgba(180, 30, 30, 0.30);
+ color: #b41e1e;
+}
+
+@keyframes drTracePulse {
+ 0%, 100% { opacity: 0.55; transform: scale(0.92); }
+ 50% { opacity: 1; transform: scale(1.04); }
+}
+
+/* Source modal */
+.dr-source-modal {
+ position: fixed;
+ inset: 0;
+ background: rgba(23, 32, 51, 0.62);
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ padding: 24px;
+ z-index: 9999;
+}
+
+.dr-source-modal__dialog {
+ width: min(960px, 100%);
+ max-height: 90vh;
+ background: #fff;
+ border-radius: 8px;
+ box-shadow: 0 28px 92px rgba(0, 0, 0, 0.34);
+ overflow: hidden;
+ display: grid;
+ grid-template-rows: auto 1fr;
+}
+
+.dr-source-modal__head {
+ display: flex;
+ align-items: start;
+ justify-content: space-between;
+ gap: 14px;
+ padding: 16px 18px;
+ border-bottom: 1px solid var(--line);
+}
+
+.dr-source-modal__head h3 {
+ margin: 0;
+ color: var(--ink);
+ line-height: 1.25;
+ font-size: 1.2rem;
+}
+
+.dr-source-modal__body {
+ display: grid;
+ grid-template-columns: 260px minmax(0, 1fr);
+ overflow: hidden;
+}
+
+.dr-source-modal__meta,
+.dr-source-modal__text {
+ padding: 16px 18px;
+ overflow: auto;
+}
+
+.dr-source-modal__meta {
+ border-right: 1px solid var(--line);
+ background: #fbfcfe;
+ color: var(--muted);
+ font-size: 0.88rem;
+ line-height: 1.55;
+}
+
+.dr-source-modal__meta dt {
+ color: var(--ink);
+ font-weight: 800;
+ margin-top: 8px;
+}
+
+.dr-source-modal__meta dt:first-of-type { margin-top: 0; }
+
+.dr-source-modal__text {
+ white-space: pre-wrap;
+ line-height: 1.7;
+ color: var(--ink);
+}
+
+@media (max-width: 720px) {
+ .dr-source-modal__body { grid-template-columns: 1fr; }
+ .dr-source-modal__meta { border-right: 0; border-bottom: 1px solid var(--line); }
+ .dr-source-card { grid-template-columns: 32px 1fr; }
+ .dr-source-aside { display: none; }
+}
diff --git a/assets/js/deep-research.js b/assets/js/deep-research.js
new file mode 100644
index 0000000..fbbb32e
--- /dev/null
+++ b/assets/js/deep-research.js
@@ -0,0 +1,481 @@
+/* deep-research.js — page-scoped UI for /deep-research.php */
+(function () {
+ 'use strict';
+
+ const els = {};
+ let lang = 'en';
+ let uploadFiles = [];
+ let lastResult = null;
+
+ const SLICE_DEFS = [
+ { id: 'family_core', label: 'Family Law Core' },
+ { id: 'child_welfare', label: 'Child Welfare' },
+ { id: 'echr_hague', label: 'ECHR and Hague' },
+ { id: 'broader_legal', label: 'Broader Legal Support' },
+ ];
+
+ const STEP_LABELS = [
+ 'Query interpretation',
+ 'Query expansion',
+ 'Slice resolution',
+ 'Upload indexing',
+ 'Retrieval',
+ 'Synthesis',
+ 'Citation confidence',
+ ];
+
+ document.addEventListener('DOMContentLoaded', () => {
+ if (!document.body.dataset.activeTool || document.body.dataset.activeTool !== 'deep-research') return;
+
+ Object.assign(els, {
+ form: document.getElementById('deepResearchForm'),
+ input: document.getElementById('drInput'),
+ status: document.getElementById('drStatus'),
+ runButton: document.getElementById('drRunButton'),
+ results: document.getElementById('drResults'),
+ traceList: document.getElementById('traceList'),
+ slices: Array.from(document.querySelectorAll('.dr-slice')),
+ langButtons: Array.from(document.querySelectorAll('#drLangSwitcher .lang-btn')),
+ engineRadios: Array.from(document.querySelectorAll('input[name="drEngine"]')),
+ subQ: document.getElementById('drSubQ'),
+ subQVal: document.getElementById('drSubQValue'),
+ chunkLimit: document.getElementById('drChunkLimit'),
+ chunkLimitVal: document.getElementById('drChunkLimitValue'),
+ sim: document.getElementById('drSim'),
+ simVal: document.getElementById('drSimValue'),
+ topK: document.getElementById('drTopK'),
+ topKVal: document.getElementById('drTopKValue'),
+ temp: document.getElementById('drTemp'),
+ tempVal: document.getElementById('drTempValue'),
+ uploadZone: document.getElementById('drUploadZone'),
+ uploadInput: document.getElementById('drUploadInput'),
+ uploadPrompt: document.getElementById('drUploadPrompt'),
+ uploadFileInfo: document.getElementById('drUploadFileInfo'),
+ uploadFileList: document.getElementById('drUploadFileList'),
+ uploadClear: document.getElementById('drUploadClear'),
+ modal: document.getElementById('drSourceModal'),
+ modalClose: document.getElementById('drSourceModalClose'),
+ modalTitle: document.getElementById('drSourceModalTitle'),
+ modalEyebrow: document.getElementById('drSourceModalEyebrow'),
+ modalMeta: document.getElementById('drSourceModalMeta'),
+ modalText: document.getElementById('drSourceModalText'),
+ });
+
+ if (!els.form) return;
+
+ bindSlices();
+ bindLang();
+ bindRanges();
+ bindUpload();
+ bindModal();
+ els.form.addEventListener('submit', onSubmit);
+
+ // Pre-render placeholder trace
+ renderTrace(STEP_LABELS.map((label) => ({ label, detail: 'Waiting…', status: 'idle' })));
+ });
+
+ function bindSlices() {
+ els.slices.forEach((btn) => {
+ btn.addEventListener('click', () => {
+ const isOn = btn.classList.toggle('is-on');
+ btn.setAttribute('aria-pressed', isOn ? 'true' : 'false');
+ const badge = btn.querySelector('.dr-slice__badge');
+ if (badge) badge.textContent = isOn ? 'on' : 'off';
+ });
+ });
+ }
+
+ function bindLang() {
+ els.langButtons.forEach((b) => {
+ b.addEventListener('click', () => {
+ els.langButtons.forEach((x) => x.classList.remove('is-active'));
+ b.classList.add('is-active');
+ lang = b.dataset.lang || 'en';
+ });
+ });
+ }
+
+ function bindRanges() {
+ const pairs = [
+ [els.subQ, els.subQVal, (v) => v],
+ [els.chunkLimit, els.chunkLimitVal, (v) => v],
+ [els.sim, els.simVal, (v) => Number(v).toFixed(2)],
+ [els.topK, els.topKVal, (v) => v],
+ [els.temp, els.tempVal, (v) => Number(v).toFixed(2)],
+ ];
+ pairs.forEach(([range, label, fmt]) => {
+ if (!range || !label) return;
+ const sync = () => { label.textContent = fmt(range.value); };
+ range.addEventListener('input', sync);
+ sync();
+ });
+ }
+
+ function bindUpload() {
+ if (!els.uploadZone) return;
+ const onFiles = (fileList) => {
+ const files = Array.from(fileList || []).slice(0, 5);
+ if (uploadFiles.length + files.length > 5) {
+ setStatus('At most 5 files can be uploaded per request.', 'error');
+ return;
+ }
+ files.forEach((f) => {
+ if (f.size > 4 * 1024 * 1024) {
+ setStatus(`${f.name} exceeds the 4 MB limit.`, 'error');
+ return;
+ }
+ const ext = (f.name.split('.').pop() || '').toLowerCase();
+ if (!['pdf', 'docx', 'txt'].includes(ext)) {
+ setStatus(`${f.name} is not a supported file type.`, 'error');
+ return;
+ }
+ uploadFiles.push(f);
+ });
+ renderUploadList();
+ };
+ els.uploadInput.addEventListener('change', (e) => onFiles(e.target.files));
+ els.uploadZone.addEventListener('dragover', (e) => { e.preventDefault(); els.uploadZone.classList.add('is-drop'); });
+ els.uploadZone.addEventListener('dragleave', () => els.uploadZone.classList.remove('is-drop'));
+ els.uploadZone.addEventListener('drop', (e) => {
+ e.preventDefault();
+ els.uploadZone.classList.remove('is-drop');
+ onFiles(e.dataTransfer?.files);
+ });
+ els.uploadClear?.addEventListener('click', () => {
+ uploadFiles = [];
+ els.uploadInput.value = '';
+ renderUploadList();
+ });
+ }
+
+ function renderUploadList() {
+ if (!uploadFiles.length) {
+ els.uploadFileInfo.classList.add('is-hidden');
+ els.uploadPrompt.classList.remove('is-hidden');
+ return;
+ }
+ els.uploadPrompt.classList.add('is-hidden');
+ els.uploadFileInfo.classList.remove('is-hidden');
+ els.uploadFileList.innerHTML = uploadFiles.map((f, i) => {
+ const kb = (f.size / 1024).toFixed(0);
+ return `
${escapeHtml(f.name)} ${kb} KB `;
+ }).join('');
+ }
+
+ function bindModal() {
+ els.modalClose?.addEventListener('click', closeModal);
+ els.modal?.addEventListener('click', (e) => {
+ if (e.target === els.modal) closeModal();
+ });
+ document.addEventListener('keydown', (e) => {
+ if (e.key === 'Escape' && els.modal && !els.modal.classList.contains('is-hidden')) closeModal();
+ });
+ }
+
+ function closeModal() {
+ els.modal?.classList.add('is-hidden');
+ }
+
+ function openModal(source) {
+ if (!source) return;
+ els.modalEyebrow.textContent = source.source_origin === 'upload' ? 'Uploaded file' : 'Corpus source';
+ els.modalTitle.textContent = source.title || 'Source';
+ const metaRows = [
+ ['Number', `[${source.n}]`],
+ source.section ? ['Section', source.section] : null,
+ ['Corpus / package', source.package_or_corpus || '—'],
+ source.authority_type ? ['Authority', source.authority_type] : null,
+ source.jurisdiction ? ['Jurisdiction', source.jurisdiction] : null,
+ source.similarity != null ? ['Similarity', String(source.similarity)] : null,
+ source.reranker_score != null ? ['Rerank score', String(source.reranker_score)] : null,
+ source.matched_sub_questions?.length ? ['Matched sub-Q', source.matched_sub_questions.join(', ')] : null,
+ ].filter(Boolean);
+ els.modalMeta.innerHTML = '' + metaRows.map(([k, v]) => `${escapeHtml(k)} ${escapeHtml(String(v))} `).join('') + ' ';
+ els.modalText.textContent = source.chunk_text || source.excerpt || '';
+ els.modal.classList.remove('is-hidden');
+ }
+
+ function getSelectedSlices() {
+ const out = {};
+ SLICE_DEFS.forEach((s) => {
+ const btn = els.slices.find((b) => b.dataset.slice === s.id);
+ out[s.id] = !!(btn && btn.classList.contains('is-on'));
+ });
+ return out;
+ }
+
+ function getEngine() {
+ const checked = els.engineRadios.find((r) => r.checked);
+ return checked ? checked.value : 'azure_mini';
+ }
+
+ function getControls() {
+ return {
+ sub_q_count: parseInt(els.subQ.value, 10),
+ chunk_limit: parseInt(els.chunkLimit.value, 10),
+ similarity_threshold: parseFloat(els.sim.value),
+ reranker_top_k: parseInt(els.topK.value, 10),
+ temperature: parseFloat(els.temp.value),
+ };
+ }
+
+ async function onSubmit(e) {
+ e.preventDefault();
+ const query = (els.input.value || '').trim();
+ if (!query && uploadFiles.length === 0) {
+ setStatus('Type a question or upload a file before running deep research.', 'error');
+ return;
+ }
+ const slices = getSelectedSlices();
+ if (!Object.values(slices).some(Boolean)) {
+ setStatus('Enable at least one corpus slice.', 'error');
+ return;
+ }
+
+ setStatus('Running deep research…', 'busy');
+ els.runButton.disabled = true;
+ els.results.innerHTML = `Working… The agent is expanding the question, retrieving from the corpus, and synthesising the brief. This usually takes 6–15 seconds.
`;
+
+ // Render placeholder trace with first step running
+ const placeholder = STEP_LABELS.map((label, i) => ({
+ label,
+ detail: i === 0 ? 'Running…' : 'Queued',
+ status: i === 0 ? 'running' : 'idle',
+ }));
+ renderTrace(placeholder);
+
+ const payload = {
+ query,
+ paste_text: '',
+ slices,
+ engine: getEngine(),
+ language: lang,
+ controls: getControls(),
+ };
+
+ let response;
+ try {
+ if (uploadFiles.length > 0) {
+ const form = new FormData();
+ form.append('payload', JSON.stringify(payload));
+ uploadFiles.forEach((f) => form.append('files[]', f));
+ response = await fetch('api/deep-research.php', { method: 'POST', body: form, credentials: 'same-origin' });
+ } else {
+ response = await fetch('api/deep-research.php', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify(payload),
+ credentials: 'same-origin',
+ });
+ }
+ } catch (err) {
+ setStatus(`Network error: ${err.message || err}`, 'error');
+ els.runButton.disabled = false;
+ return;
+ }
+
+ let data = null;
+ try { data = await response.json(); } catch (_) {}
+
+ if (!response.ok || !data || data.ok === false) {
+ const msg = (data && data.error && data.error.message) || `Request failed (${response.status}).`;
+ setStatus(msg, 'error');
+ els.runButton.disabled = false;
+ renderTrace(placeholder.map((s, i) => i === 0 ? { ...s, status: 'error', detail: msg } : s));
+ return;
+ }
+
+ lastResult = data;
+ setStatus(`Done in ${data.latency_ms || 0} ms · ${data.trace_metadata?.source_count || 0} sources · confidence ${data.trace_metadata?.citation_confidence || '?'}`, 'ok');
+ els.runButton.disabled = false;
+ renderTrace(data.trace || []);
+ renderResults(data);
+ }
+
+ function setStatus(message, kind) {
+ els.status.textContent = message;
+ els.status.style.color = kind === 'error' ? '#b41e1e' : (kind === 'ok' ? 'var(--teal-dark)' : 'var(--muted)');
+ }
+
+ function renderTrace(steps) {
+ if (!els.traceList) return;
+ els.traceList.classList.add('is-rich');
+ els.traceList.innerHTML = steps.map((step, i) => {
+ const statusClass = step.status === 'running' ? 'is-running'
+ : step.status === 'complete' ? 'is-done'
+ : step.status === 'warning' ? 'is-warning'
+ : step.status === 'error' ? 'is-error'
+ : '';
+ const marker = step.status === 'complete' ? '✓'
+ : step.status === 'warning' ? '!'
+ : step.status === 'error' ? '×'
+ : (i + 1);
+ return `
+ ${marker}
+
+ ${escapeHtml(step.label || '')}
+ ${escapeHtml(step.detail || '')}
+
+ `;
+ }).join('');
+ }
+
+ function renderResults(data) {
+ const sources = data.sources || [];
+ const subs = data.sub_questions || [];
+
+ const briefHtml = renderBrief(data.brief_markdown || '', sources);
+
+ const subQHtml = subs.length ? `
+
+
Angles the agent explored
+
+ ${subs.map((sq) => `${escapeHtml(sq.question)} ${sq.rationale ? `${escapeHtml(sq.rationale)} ` : ''} `).join('')}
+
+
` : '';
+
+ const sourcesHtml = `
+
+
+
Sources (${sources.length})
+ Click a card to see the full chunk + scores
+
+
+ ${sources.map((s) => renderSourceCard(s)).join('')}
+
+
`;
+
+ const uncertHtml = (data.what_remains_uncertain || []).length ? `
+
+
What remains uncertain
+
+ ${(data.what_remains_uncertain || []).map((u) => `${escapeHtml(String(u))} `).join('')}
+
+
` : '';
+
+ const nextHtml = data.next_practical_step ? `
+
+
Next practical step
+
${escapeHtml(data.next_practical_step)}
+
` : '';
+
+ els.results.innerHTML = `
+
+ ${subQHtml}
+ ${sourcesHtml}
+ ${uncertHtml}
+ ${nextHtml}
+ `;
+
+ // Bind source-card click handlers + citation marker click handlers
+ els.results.querySelectorAll('[data-source-n]').forEach((node) => {
+ node.addEventListener('click', () => {
+ const n = parseInt(node.dataset.sourceN, 10);
+ const src = sources.find((s) => s.n === n);
+ if (src) {
+ openModal(src);
+ flashSource(n);
+ }
+ });
+ });
+ }
+
+ function flashSource(n) {
+ document.querySelectorAll('.dr-source-card.is-highlight').forEach((c) => c.classList.remove('is-highlight'));
+ const target = document.querySelector(`.dr-source-card[data-source-n="${n}"]`);
+ if (target) {
+ target.classList.add('is-highlight');
+ target.scrollIntoView({ behavior: 'smooth', block: 'center' });
+ setTimeout(() => target.classList.remove('is-highlight'), 1800);
+ }
+ }
+
+ function renderSourceCard(s) {
+ const score = s.reranker_score != null ? s.reranker_score : s.similarity;
+ const originTagClass = s.source_origin === 'upload' ? 'dr-source-tag dr-source-tag--upload' : 'dr-source-tag';
+ const originLabel = s.source_origin === 'upload' ? 'upload' : 'corpus';
+ return `
+ ${s.n}
+
+
${escapeHtml(s.title || 'Untitled')}
+ ${s.section ? `
${escapeHtml(s.section)}
` : ''}
+
+ ${originLabel}
+ ${escapeHtml(s.package_or_corpus || '—')}
+ ${(s.matched_sub_questions || []).map((q) => `${escapeHtml(q)} `).join('')}
+
+
${escapeHtml(truncate(s.excerpt || '', 240))}
+
+
+ score${score != null ? Number(score).toFixed(2) : '—'}
+ ${s.reranker_score != null && s.similarity != null ? `sim${Number(s.similarity).toFixed(2)} ` : ''}
+
+ `;
+ }
+
+ // Markdown renderer — minimal: paragraphs, bold/italic, code, [n] citation badges
+ function renderBrief(markdown, sources) {
+ if (!markdown) return 'No brief was returned.
';
+ const sourceSet = new Set((sources || []).map((s) => s.n));
+ const escaped = escapeHtml(markdown);
+
+ // Citation markers [1], [1,2], [1-3]
+ const withCites = escaped.replace(/\[(\d+(?:\s*[-,]\s*\d+)*)\]/g, (_, group) => {
+ const nums = expandCiteGroup(group);
+ return nums.map((n) => {
+ const known = sourceSet.has(n);
+ const cls = known ? 'dr-cite' : 'dr-cite';
+ return `${n} `;
+ }).join('');
+ });
+
+ // Bold/italic
+ const withBold = withCites
+ .replace(/\*\*([^*]+)\*\*/g, '$1 ')
+ .replace(/(^|[^*])\*([^*]+)\*(?!\*)/g, '$1$2 ')
+ .replace(/`([^`]+)`/g, '$1');
+
+ // Paragraphs
+ const paragraphs = withBold.split(/\n{2,}/).map((p) => {
+ const t = p.trim();
+ if (!t) return '';
+ if (/^### /.test(t)) return `${t.replace(/^### /, '')} `;
+ return `${t.replace(/\n/g, ' ')}
`;
+ }).join('');
+
+ return paragraphs;
+ }
+
+ function expandCiteGroup(group) {
+ const out = [];
+ group.split(',').forEach((part) => {
+ const range = part.trim().match(/^(\d+)\s*-\s*(\d+)$/);
+ if (range) {
+ const a = parseInt(range[1], 10);
+ const b = parseInt(range[2], 10);
+ for (let i = a; i <= b; i++) out.push(i);
+ } else {
+ const n = parseInt(part.trim(), 10);
+ if (!Number.isNaN(n)) out.push(n);
+ }
+ });
+ return Array.from(new Set(out));
+ }
+
+ function escapeHtml(s) {
+ return String(s)
+ .replace(/&/g, '&')
+ .replace(//g, '>')
+ .replace(/"/g, '"')
+ .replace(/'/g, ''');
+ }
+
+ function truncate(s, n) {
+ if (!s) return '';
+ if (s.length <= n) return s;
+ return s.slice(0, n - 1) + '…';
+ }
+})();
diff --git a/deep-research.php b/deep-research.php
new file mode 100644
index 0000000..572e01d
--- /dev/null
+++ b/deep-research.php
@@ -0,0 +1,162 @@
+
+
+
+
+
+
Ready
+
Pick slices, drop a case file or paste a question, then run. The agent will expand the question, retrieve from the corpus + your upload, rerank, and synthesise a cited brief.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/includes/DeepResearchAgent.php b/includes/DeepResearchAgent.php
new file mode 100644
index 0000000..d08c4d5
--- /dev/null
+++ b/includes/DeepResearchAgent.php
@@ -0,0 +1,727 @@
+azure = $azure ?: new DbnAzureOpenAiGateway();
+ }
+
+ public function run(
+ string $seedQuery,
+ string $pastedText,
+ array $uploadedFiles,
+ array $sliceSelection,
+ string $engine,
+ string $language,
+ array $controls
+ ): array {
+ $seedQuery = trim($seedQuery);
+ $pastedText = trim($pastedText);
+ $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini';
+ $language = in_array($language, ['en', 'no'], true) ? $language : 'en';
+
+ $controls = $this->normalizeControls($controls);
+
+ if ($seedQuery === '' && $pastedText === '' && empty($uploadedFiles)) {
+ dbnToolsAbort('Provide a question, paste text, or upload at least one file.', 422, 'missing_seed');
+ }
+
+ $client = dbnToolsRequireClient();
+ $package = $this->requireFamilyPackage((int)$client['id']);
+
+ dbnToolsBootCaveau();
+ $aiPortalRoot = dbnToolsAiPortalRoot();
+ require_once $aiPortalRoot . '/platform/includes/dbn_v6.php';
+ require_once $aiPortalRoot . '/lib/ai/AiGateway.php';
+
+ $this->ai = new AiGateway();
+ $this->uploadVecs = [];
+ $this->stepTimings = [];
+
+ $trace = [];
+ $seedDescription = $this->buildSeedDescription($seedQuery, $pastedText, $uploadedFiles);
+
+ // STEP 1: Query interpretation — build research brief
+ $stepStart = microtime(true);
+ $interpretation = $this->interpretSeed($seedDescription, $language);
+ $this->stepTimings['interpretation'] = $this->elapsedMs($stepStart);
+ $trace[] = $this->trace(
+ 'Query interpretation',
+ $interpretation['detail'],
+ 'complete'
+ );
+
+ // STEP 2: Query expansion
+ $stepStart = microtime(true);
+ $expansion = $this->expandQueries($seedDescription, $interpretation['brief'], $controls['sub_q_count'], $language);
+ $this->stepTimings['expansion'] = $this->elapsedMs($stepStart);
+ $subQuestions = $expansion['questions'];
+ $expansionStatus = $expansion['fallback'] ? 'warning' : 'complete';
+ $trace[] = $this->trace(
+ 'Query expansion',
+ $expansion['fallback']
+ ? 'Could not parse sub-questions; falling back to retrieving on the seed query alone.'
+ : sprintf('Generated %d sub-questions to research the corpus from multiple angles.', count($subQuestions)),
+ $expansionStatus
+ );
+
+ // STEP 3: Slice resolution
+ $stepStart = microtime(true);
+ $sliceSelectionNormalized = dbnV6NormalizeSliceSelection($sliceSelection);
+ if (!array_filter($sliceSelectionNormalized)) {
+ dbnToolsAbort('Enable at least one corpus slice before running deep research.', 422, 'no_slices');
+ }
+ $ragDb = dbnToolsRagDb();
+ try {
+ $sharedDocIds = dbnV6ResolveSelectedDocIds($ragDb, $sliceSelectionNormalized);
+ $sliceStatus = 'complete';
+ $sliceDetail = sprintf(
+ '%d slice(s) active → %d candidate documents constrain the corpus search.',
+ count(array_filter($sliceSelectionNormalized)),
+ count($sharedDocIds)
+ );
+ } catch (Throwable $e) {
+ error_log('DBN deep research slice resolve failed: ' . $e->getMessage());
+ $sharedDocIds = [];
+ $sliceStatus = 'warning';
+ $sliceDetail = 'Slice resolution failed; corpus search will run unconstrained.';
+ }
+ $this->stepTimings['slice_resolution'] = $this->elapsedMs($stepStart);
+ $trace[] = $this->trace('Slice resolution', $sliceDetail, $sliceStatus);
+
+ // STEP 4: Upload indexing (in-memory, ephemeral)
+ $stepStart = microtime(true);
+ $uploadChunks = [];
+ foreach ($uploadedFiles as $idx => $file) {
+ $filename = (string)($file['filename'] ?? sprintf('upload-%d', $idx + 1));
+ $text = (string)($file['text'] ?? '');
+ $uploadChunks = array_merge($uploadChunks, $this->splitIntoChunks($text, $filename, $idx));
+ }
+ $uploadStatus = 'complete';
+ $uploadDetail = sprintf('%d upload file(s) → %d in-memory chunks indexed with nomic-embed-text.', count($uploadedFiles), count($uploadChunks));
+ if ($uploadChunks) {
+ try {
+ $texts = array_map(fn(array $c) => $c['text'], $uploadChunks);
+ $vecs = $this->ai->embedBatch($texts, 'nomic-embed-text');
+ if (count($vecs) === count($uploadChunks)) {
+ foreach ($uploadChunks as $i => $chunk) {
+ $this->uploadVecs[] = [
+ 'meta' => $chunk,
+ 'vec' => $vecs[$i],
+ ];
+ }
+ } else {
+ $uploadStatus = 'warning';
+ $uploadDetail = 'Upload embedding returned an unexpected count; uploaded chunks will not participate in retrieval.';
+ }
+ } catch (Throwable $e) {
+ error_log('DBN deep research upload embed failed: ' . $e->getMessage());
+ $uploadStatus = 'warning';
+ $uploadDetail = 'Upload embedding gateway unreachable; uploaded chunks will not participate in retrieval.';
+ $this->uploadVecs = [];
+ }
+ } elseif (empty($uploadedFiles)) {
+ $uploadDetail = 'No files uploaded; agent will research the corpus only.';
+ }
+ $this->stepTimings['upload_indexing'] = $this->elapsedMs($stepStart);
+ $trace[] = $this->trace('Upload indexing', $uploadDetail, $uploadStatus);
+
+ // STEP 5: Retrieval (per sub-question)
+ $stepStart = microtime(true);
+ $retrievalQueries = $subQuestions ?: [[
+ 'id' => 'q1',
+ 'question' => $seedQuery !== '' ? $seedQuery : ($interpretation['brief'] ?: 'legal research'),
+ 'rationale' => 'Seed query (no sub-question expansion).',
+ ]];
+
+ try {
+ $rag = new ClientRagPipeline((int)$client['id'], 'http://10.0.1.10:4000', 60);
+ } catch (Throwable $e) {
+ dbnToolsAbort('Could not initialise the retrieval pipeline.', 503, 'rag_init_failed');
+ }
+
+ $rawPool = [];
+ $retrievalWarnings = 0;
+ foreach ($retrievalQueries as $sq) {
+ try {
+ $corpusChunks = $rag->searchAll(
+ $sq['question'],
+ $controls['chunk_limit'],
+ null,
+ [
+ 'search_private' => false,
+ 'search_shared' => true,
+ 'package_ids' => [(int)$package['id']],
+ 'shared_doc_ids' => $sharedDocIds,
+ 'chunk_limit' => $controls['chunk_limit'],
+ 'search_method' => 'hybrid',
+ 'reranker_enabled' => true,
+ ]
+ );
+ } catch (Throwable $e) {
+ error_log('DBN deep research sub-Q retrieval failed: ' . $e->getMessage());
+ $corpusChunks = [];
+ $retrievalWarnings++;
+ }
+ foreach ($corpusChunks as $chunk) {
+ $rawPool[] = $this->normalizeCorpusChunk($chunk, $sq['id']);
+ }
+
+ // Upload chunk retrieval via cosine sim
+ if (!empty($this->uploadVecs)) {
+ $uploadHits = $this->retrieveFromUploads($sq['question'], $controls['chunk_limit'], $controls['similarity_threshold']);
+ foreach ($uploadHits as $hit) {
+ $hit['matched_sub_questions'] = [$sq['id']];
+ $rawPool[] = $hit;
+ }
+ }
+ }
+
+ $merged = $this->mergeAndDedupe($rawPool, self::POOL_CAP);
+ $this->stepTimings['retrieval'] = $this->elapsedMs($stepStart);
+ $retrievalStatus = $retrievalWarnings > 0 ? 'warning' : 'complete';
+ $trace[] = $this->trace(
+ 'Retrieval',
+ sprintf(
+ '%d sub-question(s) × hybrid + RRF + rerank → %d raw chunks → %d unique after dedupe.',
+ count($retrievalQueries),
+ count($rawPool),
+ count($merged)
+ ),
+ $retrievalStatus
+ );
+
+ // Cap pool to reranker top-K for synthesis
+ $synthesisPool = array_slice($merged, 0, $controls['reranker_top_k']);
+ $numberedSources = $this->numberSources($synthesisPool);
+
+ // STEP 6: Synthesis
+ $stepStart = microtime(true);
+ $synthesis = $this->synthesise(
+ $seedDescription,
+ $interpretation['brief'],
+ $retrievalQueries,
+ $numberedSources,
+ $engine,
+ $language,
+ $controls['temperature']
+ );
+ $this->stepTimings['synthesis'] = $this->elapsedMs($stepStart);
+ $trace[] = $this->trace(
+ 'Synthesis',
+ sprintf('%s synthesised the brief using %d grounded source(s).', $synthesis['deploy_label'], count($numberedSources)),
+ 'complete'
+ );
+
+ // STEP 7: Confidence
+ $confidence = $this->citationConfidence($numberedSources);
+ $trace[] = $this->trace(
+ 'Citation confidence',
+ sprintf('%s confidence based on %d source(s) and reranker score distribution.', ucfirst($confidence), count($numberedSources)),
+ $confidence === 'low' ? 'warning' : 'complete'
+ );
+
+ // Stitch sub-question chunk_ids
+ $subQOut = [];
+ foreach ($retrievalQueries as $sq) {
+ $matchedChunks = array_values(array_filter(
+ $numberedSources,
+ fn(array $s) => in_array($sq['id'], $s['matched_sub_questions'] ?? [], true)
+ ));
+ $subQOut[] = [
+ 'id' => $sq['id'],
+ 'question' => $sq['question'],
+ 'rationale' => $sq['rationale'] ?? '',
+ 'chunk_ids' => array_values(array_map(fn(array $s) => $s['chunk_id'], $matchedChunks)),
+ ];
+ }
+
+ return [
+ 'tool' => 'deep_research',
+ 'language' => $language,
+ 'brief_markdown' => (string)($synthesis['json']['brief_markdown'] ?? $synthesis['json']['answer'] ?? ''),
+ 'sub_questions' => $subQOut,
+ 'sources' => $numberedSources,
+ 'what_we_found' => (string)($synthesis['json']['what_we_found'] ?? ''),
+ 'evidence_trail' => $numberedSources,
+ 'what_remains_uncertain' => $synthesis['json']['what_remains_uncertain'] ?? [],
+ 'next_practical_step' => (string)($synthesis['json']['next_practical_step'] ?? ''),
+ 'trace' => $trace,
+ 'trace_metadata' => [
+ 'chunk_count' => count($merged),
+ 'source_count' => count($numberedSources),
+ 'sub_question_count' => count($retrievalQueries),
+ 'upload_chunk_count' => count($this->uploadVecs),
+ 'deployment' => $synthesis['deploy_label'],
+ 'engine_used' => $engine,
+ 'citation_confidence' => $confidence,
+ 'elapsed_ms_per_step' => $this->stepTimings,
+ 'slices_active' => array_keys(array_filter($sliceSelectionNormalized)),
+ ],
+ 'disclaimer' => dbnToolsDisclaimer($language),
+ ];
+ }
+
+ private function normalizeControls(array $controls): array
+ {
+ return [
+ 'sub_q_count' => max(3, min(5, (int)($controls['sub_q_count'] ?? 4))),
+ 'chunk_limit' => max(4, min(10, (int)($controls['chunk_limit'] ?? 6))),
+ 'similarity_threshold' => max(0.2, min(0.6, (float)($controls['similarity_threshold'] ?? 0.30))),
+ 'reranker_top_k' => max(8, min(14, (int)($controls['reranker_top_k'] ?? 12))),
+ 'temperature' => max(0.05, min(0.4, (float)($controls['temperature'] ?? 0.15))),
+ ];
+ }
+
+ private function requireFamilyPackage(int $clientId): array
+ {
+ $package = dbnToolsFetchPackage('family-legal');
+ if (!$package || empty($package['is_active'])) {
+ dbnToolsAbort('The family-legal corpus package is not active.', 503, 'package_unavailable');
+ }
+ if (!dbnToolsHasActiveSubscription($clientId, (int)$package['id'])) {
+ dbnToolsAbort('Do Better Norge does not have an active family-legal subscription.', 503, 'subscription_missing');
+ }
+ return $package;
+ }
+
+ private function buildSeedDescription(string $seedQuery, string $pastedText, array $uploadedFiles): string
+ {
+ $parts = [];
+ if ($seedQuery !== '') {
+ $parts[] = "Question:\n" . mb_substr($seedQuery, 0, self::MAX_SEED_CHARS, 'UTF-8');
+ }
+ if ($pastedText !== '') {
+ $parts[] = "Pasted text:\n" . mb_substr($pastedText, 0, self::MAX_SEED_CHARS, 'UTF-8');
+ }
+ foreach ($uploadedFiles as $idx => $file) {
+ $filename = (string)($file['filename'] ?? sprintf('upload-%d', $idx + 1));
+ $text = (string)($file['text'] ?? '');
+ if ($text === '') {
+ continue;
+ }
+ $parts[] = sprintf("Uploaded file [%s]:\n%s", $filename, mb_substr($text, 0, self::MAX_UPLOAD_CHARS, 'UTF-8'));
+ }
+ return implode("\n\n", $parts);
+ }
+
+ private function interpretSeed(string $seedDescription, string $language): array
+ {
+ $locale = $language === 'no' ? 'Norwegian' : 'English';
+ $prompt = <<azure->chatText([
+ ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
+ ['role' => 'user', 'content' => $prompt],
+ ], ['json' => true, 'temperature' => 0.1, 'max_tokens' => 400, 'timeout' => 30]);
+ $json = $this->azure->decodeJsonObject($raw);
+ if (is_array($json) && !empty($json['brief'])) {
+ $signals = $json['key_signals'] ?? [];
+ $signalText = is_array($signals) ? implode(', ', array_slice($signals, 0, 6)) : '';
+ return [
+ 'brief' => (string)$json['brief'],
+ 'detail' => sprintf('Research focus: %s%s', (string)$json['brief'], $signalText ? ' — signals: ' . $signalText : ''),
+ ];
+ }
+ } catch (Throwable $e) {
+ error_log('DBN deep research interpretation failed: ' . $e->getMessage());
+ }
+
+ return [
+ 'brief' => '',
+ 'detail' => 'Interpretation step skipped — proceeding with raw seed input.',
+ ];
+ }
+
+ private function expandQueries(string $seedDescription, string $brief, int $targetCount, string $language): array
+ {
+ $locale = $language === 'no' ? 'Norwegian' : 'English';
+ $prompt = <<azure->chatText([
+ ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
+ ['role' => 'user', 'content' => $prompt],
+ ], ['json' => true, 'temperature' => 0.2, 'max_tokens' => 700, 'timeout' => 35]);
+ $json = $this->azure->decodeJsonObject($raw);
+ $items = is_array($json['sub_questions'] ?? null) ? $json['sub_questions'] : [];
+ $normalized = [];
+ foreach ($items as $i => $item) {
+ if (!is_array($item) || empty($item['question'])) {
+ continue;
+ }
+ $normalized[] = [
+ 'id' => 'q' . ($i + 1),
+ 'question' => trim((string)$item['question']),
+ 'rationale' => trim((string)($item['rationale'] ?? '')),
+ ];
+ if (count($normalized) >= $targetCount) break;
+ }
+ if (count($normalized) >= 2) {
+ return ['questions' => $normalized, 'fallback' => false];
+ }
+ } catch (Throwable $e) {
+ error_log('DBN deep research expansion failed: ' . $e->getMessage());
+ }
+
+ return ['questions' => [], 'fallback' => true];
+ }
+
+ private function splitIntoChunks(string $text, string $filename, int $fileIdx): array
+ {
+ $text = preg_replace('/\s+/u', ' ', trim($text)) ?? '';
+ if ($text === '') {
+ return [];
+ }
+ $words = preg_split('/\s+/u', $text, -1, PREG_SPLIT_NO_EMPTY) ?: [];
+ if (!$words) {
+ return [];
+ }
+
+ $chunks = [];
+ $i = 0;
+ $chunkIdx = 0;
+ $total = count($words);
+ while ($i < $total) {
+ $slice = array_slice($words, $i, self::CHUNK_WORDS);
+ if (count($slice) >= self::MIN_CHUNK_WORDS || $i === 0) {
+ $chunks[] = [
+ 'chunk_id' => sprintf('upload:%d:%d', $fileIdx, $chunkIdx),
+ 'file_index' => $fileIdx,
+ 'chunk_index'=> $chunkIdx,
+ 'filename' => $filename,
+ 'text' => implode(' ', $slice),
+ ];
+ $chunkIdx++;
+ }
+ $advance = self::CHUNK_WORDS - self::CHUNK_OVERLAP_WORDS;
+ if ($advance < 1) $advance = 1;
+ $i += $advance;
+ if (count($slice) < self::CHUNK_WORDS) {
+ break;
+ }
+ }
+ return $chunks;
+ }
+
+ private function retrieveFromUploads(string $question, int $limitPerSubQ, float $threshold): array
+ {
+ if (empty($this->uploadVecs)) {
+ return [];
+ }
+ try {
+ $qVec = $this->ai->embed($question, 'nomic-embed-text');
+ } catch (Throwable $e) {
+ error_log('DBN deep research sub-Q embed failed: ' . $e->getMessage());
+ return [];
+ }
+ if (empty($qVec)) {
+ return [];
+ }
+ $scored = [];
+ foreach ($this->uploadVecs as $entry) {
+ $sim = $this->cosineSim($qVec, $entry['vec']);
+ if ($sim < $threshold) {
+ continue;
+ }
+ $scored[] = [
+ 'chunk_id' => $entry['meta']['chunk_id'],
+ 'title' => 'uploaded: ' . $entry['meta']['filename'],
+ 'section' => null,
+ 'package_or_corpus' => 'Your upload',
+ 'excerpt' => dbnToolsExcerpt($entry['meta']['text'], 620),
+ 'chunk_text' => $entry['meta']['text'],
+ 'similarity' => round($sim, 4),
+ 'reranker_score' => null,
+ 'document_id' => null,
+ 'source_origin' => 'upload',
+ 'authority_type' => null,
+ 'jurisdiction' => null,
+ ];
+ }
+ usort($scored, fn(array $a, array $b) => ($b['similarity'] <=> $a['similarity']));
+ $keep = (int)ceil($limitPerSubQ / 2);
+ return array_slice($scored, 0, max(1, $keep));
+ }
+
+ private function cosineSim(array $a, array $b): float
+ {
+ $len = min(count($a), count($b));
+ if ($len === 0) return 0.0;
+ $dot = 0.0;
+ $na = 0.0;
+ $nb = 0.0;
+ for ($i = 0; $i < $len; $i++) {
+ $x = (float)$a[$i];
+ $y = (float)$b[$i];
+ $dot += $x * $y;
+ $na += $x * $x;
+ $nb += $y * $y;
+ }
+ if ($na === 0.0 || $nb === 0.0) return 0.0;
+ return $dot / (sqrt($na) * sqrt($nb));
+ }
+
+ private function normalizeCorpusChunk(array $chunk, string $subQId): array
+ {
+ $similarity = isset($chunk['similarity']) ? round((float)$chunk['similarity'], 4) : null;
+ $rerankerScore = isset($chunk['reranker_score']) ? round((float)$chunk['reranker_score'], 4) : null;
+ return [
+ 'chunk_id' => isset($chunk['id']) ? (int)$chunk['id'] : null,
+ 'title' => (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source'),
+ 'section' => $chunk['section_title'] ?? null,
+ 'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Norge'),
+ 'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620),
+ 'chunk_text' => (string)($chunk['content'] ?? ''),
+ 'similarity' => $similarity,
+ 'reranker_score' => $rerankerScore,
+ 'document_id' => isset($chunk['document_id']) ? (int)$chunk['document_id'] : null,
+ 'source_origin' => 'corpus',
+ 'authority_type' => $chunk['authority_type'] ?? null,
+ 'jurisdiction' => $chunk['jurisdiction'] ?? null,
+ 'matched_sub_questions' => [$subQId],
+ ];
+ }
+
+ private function mergeAndDedupe(array $rawPool, int $cap): array
+ {
+ $byKey = [];
+ foreach ($rawPool as $chunk) {
+ $key = ($chunk['source_origin'] ?? 'corpus') . ':' . ($chunk['chunk_id'] ?? bin2hex(random_bytes(4)));
+ if (!isset($byKey[$key])) {
+ $byKey[$key] = $chunk;
+ continue;
+ }
+ $existing = $byKey[$key];
+ $existing['matched_sub_questions'] = array_values(array_unique(array_merge(
+ $existing['matched_sub_questions'] ?? [],
+ $chunk['matched_sub_questions'] ?? []
+ )));
+ // Keep the higher similarity score
+ if (($chunk['similarity'] ?? 0) > ($existing['similarity'] ?? 0)) {
+ $existing['similarity'] = $chunk['similarity'];
+ }
+ if (($chunk['reranker_score'] ?? 0) > ($existing['reranker_score'] ?? 0)) {
+ $existing['reranker_score'] = $chunk['reranker_score'];
+ }
+ $byKey[$key] = $existing;
+ }
+ $merged = array_values($byKey);
+ usort($merged, function (array $a, array $b): int {
+ $aScore = $a['reranker_score'] ?? $a['similarity'] ?? 0;
+ $bScore = $b['reranker_score'] ?? $b['similarity'] ?? 0;
+ return $bScore <=> $aScore;
+ });
+ return array_slice($merged, 0, $cap);
+ }
+
+ private function numberSources(array $chunks): array
+ {
+ $out = [];
+ foreach ($chunks as $i => $c) {
+ $c['n'] = $i + 1;
+ $out[] = $c;
+ }
+ return $out;
+ }
+
+ private function synthesise(
+ string $seedDescription,
+ string $brief,
+ array $subQuestions,
+ array $numberedSources,
+ string $engine,
+ string $language,
+ float $temperature
+ ): array {
+ $locale = $language === 'no' ? 'Norwegian' : 'English';
+
+ if (empty($numberedSources)) {
+ return [
+ 'json' => [
+ 'brief_markdown' => $language === 'no'
+ ? 'Jeg fant ikke tilstrekkelig kildestøtte i korpuset til å gi et grunnlagsbasert svar.'
+ : 'I did not find enough source support in the corpus to give a grounded answer.',
+ 'what_we_found' => 'No retrieved sources passed the similarity threshold.',
+ 'what_remains_uncertain' => ['No corpus evidence retrieved for the given query and slice selection.'],
+ 'next_practical_step' => 'Try widening slice selection or rephrasing with more specific statutory or party terms.',
+ ],
+ 'deploy_label' => $engine === 'gpu' ? 'GPU (cuttlefish)' : ($engine === 'azure_full' ? 'gpt-4o' : $this->azure->chatDeployment()),
+ ];
+ }
+
+ $sourcesContext = [];
+ foreach ($numberedSources as $s) {
+ $sourcesContext[] = sprintf(
+ "[%d] (%s) %s%s\n Corpus: %s\n Excerpt: %s",
+ $s['n'],
+ $s['source_origin'] === 'upload' ? 'uploaded doc' : 'corpus',
+ $s['title'],
+ !empty($s['section']) ? ' — ' . $s['section'] : '',
+ $s['package_or_corpus'],
+ $s['excerpt']
+ );
+ }
+ $sourcesText = implode("\n\n", $sourcesContext);
+
+ $subQText = '';
+ if ($subQuestions) {
+ $lines = array_map(
+ fn(array $sq, int $i): string => sprintf('%d. (%s) %s', $i + 1, $sq['id'], $sq['question']),
+ $subQuestions,
+ array_keys($subQuestions)
+ );
+ $subQText = "\nSub-questions explored:\n" . implode("\n", $lines);
+ }
+
+ $prompt = << 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
+ ['role' => 'user', 'content' => $prompt],
+ ];
+ $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 2200, 'timeout' => 120];
+
+ try {
+ if ($engine === 'gpu') {
+ $response = dbnToolsCallGpuLlm($messages, $opts);
+ $deployLabel = 'GPU (cuttlefish)';
+ $raw = (string)($response['choices'][0]['message']['content'] ?? '');
+ } elseif ($engine === 'azure_full') {
+ $raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts);
+ $deployLabel = 'gpt-4o';
+ } else {
+ $raw = $this->azure->chatText($messages, $opts);
+ $deployLabel = $this->azure->chatDeployment();
+ }
+ } catch (Throwable $e) {
+ dbnToolsAbort('Synthesis LLM request failed: ' . $e->getMessage(), 502, 'llm_error');
+ }
+
+ $json = $this->azure->decodeJsonObject($raw);
+ if (!is_array($json) || empty($json['brief_markdown'])) {
+ // Salvage as plain markdown
+ $json = [
+ 'brief_markdown' => $raw,
+ 'what_we_found' => 'Synthesis returned non-structured output; rendered as raw markdown.',
+ 'what_remains_uncertain' => ['Response format could not be validated as structured JSON.'],
+ 'next_practical_step' => 'Review the brief manually before relying on it.',
+ ];
+ }
+
+ return [
+ 'json' => $json,
+ 'deploy_label' => $deployLabel,
+ ];
+ }
+
+ private function citationConfidence(array $sources): string
+ {
+ if (!$sources) {
+ return 'low';
+ }
+ $scores = array_values(array_filter(array_map(
+ fn(array $s) => $s['reranker_score'] ?? $s['similarity'] ?? null,
+ $sources
+ ), 'is_numeric'));
+ $best = $scores ? max($scores) : 0;
+ if (count($sources) >= 6 && $best >= 0.5) {
+ return 'high';
+ }
+ if (count($sources) >= 3 && $best >= 0.35) {
+ return 'medium';
+ }
+ return 'low';
+ }
+
+ private function trace(string $label, string $detail, string $status = 'complete'): array
+ {
+ return [
+ 'label' => $label,
+ 'detail' => $detail,
+ 'status' => $status,
+ ];
+ }
+
+ private function elapsedMs(float $start): int
+ {
+ return (int)round((microtime(true) - $start) * 1000);
+ }
+}
diff --git a/includes/bootstrap.php b/includes/bootstrap.php
index 20668c2..6df79aa 100644
--- a/includes/bootstrap.php
+++ b/includes/bootstrap.php
@@ -487,3 +487,192 @@ function dbnToolsExcerpt(string $text, int $limit = 520): string
}
return rtrim(mb_substr($text, 0, $limit - 1, 'UTF-8')) . '…';
}
+
+const DBN_TOOLS_EXTRACT_MAX_BYTES = 4 * 1024 * 1024;
+const DBN_TOOLS_EXTRACT_TEXT_LIMIT = 128000;
+const DBN_TOOLS_EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx'];
+
+function dbnToolsExtractUploadedFile(array $file): array
+{
+ $errCode = (int)($file['error'] ?? UPLOAD_ERR_NO_FILE);
+ if ($errCode !== UPLOAD_ERR_OK) {
+ $msg = match ($errCode) {
+ UPLOAD_ERR_INI_SIZE, UPLOAD_ERR_FORM_SIZE => 'The file exceeds the allowed size limit.',
+ UPLOAD_ERR_NO_TMP_DIR => 'No temporary directory is available.',
+ UPLOAD_ERR_CANT_WRITE => 'Unable to save the uploaded file.',
+ default => 'File upload failed.',
+ };
+ dbnToolsAbort($msg, 422, 'upload_error');
+ }
+
+ $originalName = basename((string)($file['name'] ?? ''));
+ $tmpPath = (string)($file['tmp_name'] ?? '');
+ $size = (int)($file['size'] ?? 0);
+
+ if (!is_uploaded_file($tmpPath)) {
+ dbnToolsAbort('Invalid file upload.', 400, 'invalid_upload');
+ }
+ if ($size === 0) {
+ dbnToolsAbort('The uploaded file is empty.', 422, 'file_empty');
+ }
+ if ($size > DBN_TOOLS_EXTRACT_MAX_BYTES) {
+ dbnToolsAbort('File exceeds the 4 MB limit.', 413, 'file_too_large');
+ }
+
+ $ext = strtolower(pathinfo($originalName, PATHINFO_EXTENSION));
+ if (!in_array($ext, DBN_TOOLS_EXTRACT_ALLOWED_EXTS, true)) {
+ dbnToolsAbort('Unsupported file type. Upload a .pdf, .docx, or .txt file.', 422, 'unsupported_type');
+ }
+
+ $text = match ($ext) {
+ 'txt' => dbnToolsExtractTxt($tmpPath),
+ 'pdf' => dbnToolsExtractPdf($tmpPath),
+ 'docx' => dbnToolsExtractDocx($tmpPath),
+ };
+
+ $text = trim($text);
+ if ($text === '') {
+ dbnToolsAbort('No text could be extracted from this file.', 422, 'no_text');
+ }
+
+ $truncated = false;
+ if (mb_strlen($text, 'UTF-8') > DBN_TOOLS_EXTRACT_TEXT_LIMIT) {
+ $text = mb_substr($text, 0, DBN_TOOLS_EXTRACT_TEXT_LIMIT, 'UTF-8');
+ $truncated = true;
+ }
+
+ return [
+ 'ok' => true,
+ 'text' => $text,
+ 'filename' => $originalName,
+ 'chars' => mb_strlen($text, 'UTF-8'),
+ 'truncated' => $truncated,
+ ];
+}
+
+function dbnToolsExtractTxt(string $path): string
+{
+ $content = file_get_contents($path);
+ if ($content === false) {
+ throw new DbnToolsHttpException('Unable to read the file.', 500, 'read_error');
+ }
+ return mb_convert_encoding($content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252');
+}
+
+function dbnToolsExtractPdf(string $path): string
+{
+ $cmd = 'pdftotext ' . escapeshellarg($path) . ' - 2>/dev/null';
+ $output = shell_exec($cmd);
+ if ($output === null || $output === false || trim($output) === '') {
+ throw new DbnToolsHttpException(
+ 'PDF text extraction failed. The file may be image-only or encrypted.',
+ 422,
+ 'pdf_extract_failed'
+ );
+ }
+ return $output;
+}
+
+function dbnToolsExtractDocx(string $path): string
+{
+ $zip = new ZipArchive();
+ $result = $zip->open($path);
+ if ($result !== true) {
+ throw new DbnToolsHttpException('Unable to open the .docx file.', 422, 'docx_open_failed');
+ }
+
+ $xml = $zip->getFromName('word/document.xml');
+ $zip->close();
+
+ if ($xml === false) {
+ throw new DbnToolsHttpException('No document content found in this .docx file.', 422, 'docx_no_content');
+ }
+
+ $doc = new DOMDocument();
+ libxml_use_internal_errors(true);
+ $doc->loadXML($xml);
+ libxml_clear_errors();
+
+ $xpath = new DOMXPath($doc);
+ $xpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
+
+ $paragraphs = [];
+ foreach ($xpath->query('//w:p') as $para) {
+ $runs = [];
+ foreach ($xpath->query('.//w:t', $para) as $t) {
+ $runs[] = $t->textContent;
+ }
+ $paragraphs[] = implode('', $runs);
+ }
+
+ return implode("\n", $paragraphs);
+}
+
+function dbnToolsCallGpuLlm(array $messages, array $options = []): array
+{
+ $url = 'http://10.0.1.10:4000/v1/chat/completions';
+ $apiKey = (string)(dbnToolsEnv('LITELLM_MASTER_KEY') ?: 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d');
+ $model = (string)($options['model'] ?? 'qwen2.5:14b');
+ $timeout = (int)($options['timeout'] ?? 90);
+
+ $payload = [
+ 'model' => $model,
+ 'messages' => $messages,
+ 'temperature' => $options['temperature'] ?? 0.1,
+ 'max_tokens' => $options['max_tokens'] ?? 8000,
+ ];
+ if (!empty($options['json'])) {
+ $payload['response_format'] = ['type' => 'json_object'];
+ }
+
+ $body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
+ $headers = [
+ 'Content-Type: application/json',
+ 'Authorization: Bearer ' . $apiKey,
+ ];
+
+ if (function_exists('curl_init')) {
+ $ch = curl_init($url);
+ curl_setopt_array($ch, [
+ CURLOPT_RETURNTRANSFER => true,
+ CURLOPT_POST => true,
+ CURLOPT_POSTFIELDS => $body,
+ CURLOPT_HTTPHEADER => $headers,
+ CURLOPT_TIMEOUT => $timeout,
+ ]);
+ $response = curl_exec($ch);
+ $code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
+ $err = curl_error($ch);
+ curl_close($ch);
+
+ if ($response === false) {
+ throw new RuntimeException('GPU LiteLLM request failed: ' . $err);
+ }
+ } else {
+ $ctx = stream_context_create(['http' => [
+ 'method' => 'POST',
+ 'header' => implode("\r\n", $headers),
+ 'content' => $body,
+ 'timeout' => $timeout,
+ 'ignore_errors' => true,
+ ]]);
+ $response = @file_get_contents($url, false, $ctx);
+ $code = 0;
+ if (isset($http_response_header[0]) && preg_match('/\s(\d{3})\s/', $http_response_header[0], $m)) {
+ $code = (int)$m[1];
+ }
+ if ($response === false) {
+ throw new RuntimeException('GPU LiteLLM request failed.');
+ }
+ }
+
+ $decoded = json_decode($response, true);
+ if (!is_array($decoded)) {
+ throw new RuntimeException('GPU LiteLLM returned non-JSON response.');
+ }
+ if ($code < 200 || $code >= 300) {
+ $msg = $decoded['error']['message'] ?? ('HTTP ' . $code);
+ throw new RuntimeException('GPU LiteLLM error: ' . $msg);
+ }
+ return $decoded;
+}
diff --git a/includes/layout.php b/includes/layout.php
index 43f9921..af1bb54 100644
--- a/includes/layout.php
+++ b/includes/layout.php
@@ -9,12 +9,13 @@ if (!dbnToolsIsAuthenticated()) {
}
$navItems = [
- 'ask' => ['Ask', 'Source-grounded'],
- 'search' => ['Search', 'Legal sources'],
- 'summarize' => ['Summarize', 'Pasted text'],
- 'timeline' => ['Timeline', 'Events'],
- 'redact' => ['Redact', 'Privacy'],
- 'transcribe' => ['Transcribe', 'Audio'],
+ 'ask' => ['Ask', 'Source-grounded'],
+ 'search' => ['Search', 'Legal sources'],
+ 'deep-research' => ['Deep research', 'Agent + RAG'],
+ 'summarize' => ['Summarize', 'Pasted text'],
+ 'timeline' => ['Timeline', 'Events'],
+ 'redact' => ['Redact', 'Privacy'],
+ 'transcribe' => ['Transcribe', 'Audio'],
];
$toolName = $toolName ?? 'ask';
$toolTitle = $toolTitle ?? 'Legal Tools';
diff --git a/includes/layout_footer.php b/includes/layout_footer.php
index bc3e187..25450fa 100644
--- a/includes/layout_footer.php
+++ b/includes/layout_footer.php
@@ -18,5 +18,8 @@
+
+
+