Add Deep Research tool — agent + rank/rerank RAG
New surface at /deep-research.php where the user pastes a question or uploads PDF/DOCX/TXT case files and a LLM-orchestrated agent researches the Do Better Norge legal corpus from 3-5 angles, with hybrid retrieval, cross-encoder rerank, and synthesis that emits an inline-[n]-cited markdown brief plus a numbered sources panel. Uploaded documents are chunked + embedded in memory only (nomic-embed-text via LiteLLM) and searched alongside the shared corpus during the same request — never persisted to disk, DB, or Qdrant. Reuses ClientRagPipeline::searchAll (hybrid + rerank), dbnV6 slice helpers, and the existing extract.php text-extraction logic via a new dbnToolsExtractUploadedFile() helper. Also adds dbnToolsCallGpuLlm() helper in bootstrap.php — fixes a latent bug where LegalTools.php was already calling that name with no definition. Search.php is unchanged.
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/../includes/bootstrap.php';
|
||||
require_once __DIR__ . '/../includes/DeepResearchAgent.php';
|
||||
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
$isMultipart = stripos((string)($_SERVER['CONTENT_TYPE'] ?? ''), 'multipart/form-data') !== false;
|
||||
|
||||
if ($isMultipart) {
|
||||
$payloadRaw = (string)($_POST['payload'] ?? '');
|
||||
if ($payloadRaw === '') {
|
||||
dbnToolsError('Multipart request is missing the "payload" JSON field.', 422, 'missing_payload');
|
||||
}
|
||||
$input = json_decode($payloadRaw, true);
|
||||
if (!is_array($input)) {
|
||||
dbnToolsError('Multipart "payload" field must be valid JSON.', 422, 'invalid_payload_json');
|
||||
}
|
||||
} else {
|
||||
$input = dbnToolsJsonInput(120000);
|
||||
}
|
||||
|
||||
$language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
|
||||
|
||||
dbnToolsWithTelemetry('deep_research', $language, function () use ($input, $language) {
|
||||
$seedQuery = dbnToolsString($input, 'query', 4000, false);
|
||||
$pastedText = dbnToolsString($input, 'paste_text', 64000, false);
|
||||
$sliceInput = $input['slices'] ?? null;
|
||||
$engine = (string)($input['engine'] ?? 'azure_mini');
|
||||
$controls = is_array($input['controls'] ?? null) ? $input['controls'] : [];
|
||||
|
||||
$uploadedFiles = [];
|
||||
if (!empty($_FILES['files']) && is_array($_FILES['files']['tmp_name'] ?? null)) {
|
||||
$count = count($_FILES['files']['tmp_name']);
|
||||
if ($count > 5) {
|
||||
dbnToolsAbort('At most 5 files can be uploaded per request.', 413, 'too_many_files');
|
||||
}
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
$file = [
|
||||
'name' => $_FILES['files']['name'][$i] ?? '',
|
||||
'type' => $_FILES['files']['type'][$i] ?? '',
|
||||
'tmp_name' => $_FILES['files']['tmp_name'][$i] ?? '',
|
||||
'error' => $_FILES['files']['error'][$i] ?? UPLOAD_ERR_NO_FILE,
|
||||
'size' => $_FILES['files']['size'][$i] ?? 0,
|
||||
];
|
||||
$extracted = dbnToolsExtractUploadedFile($file);
|
||||
$uploadedFiles[] = [
|
||||
'filename' => $extracted['filename'],
|
||||
'text' => $extracted['text'],
|
||||
'chars' => $extracted['chars'],
|
||||
'truncated' => $extracted['truncated'],
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return (new DbnDeepResearchAgent())->run(
|
||||
$seedQuery,
|
||||
$pastedText,
|
||||
$uploadedFiles,
|
||||
is_array($sliceInput) ? $sliceInput : [],
|
||||
$engine,
|
||||
$language,
|
||||
$controls
|
||||
);
|
||||
});
|
||||
+2
-118
@@ -6,132 +6,16 @@ require_once __DIR__ . '/../includes/bootstrap.php';
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
const EXTRACT_MAX_BYTES = 4 * 1024 * 1024;
|
||||
const EXTRACT_TEXT_LIMIT = 128000;
|
||||
const EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx'];
|
||||
|
||||
try {
|
||||
if (empty($_FILES['file']) || !is_array($_FILES['file'])) {
|
||||
dbnToolsError('No file was uploaded.', 422, 'missing_file');
|
||||
}
|
||||
|
||||
$file = $_FILES['file'];
|
||||
$errCode = (int)($file['error'] ?? UPLOAD_ERR_NO_FILE);
|
||||
|
||||
if ($errCode !== UPLOAD_ERR_OK) {
|
||||
$msg = match ($errCode) {
|
||||
UPLOAD_ERR_INI_SIZE, UPLOAD_ERR_FORM_SIZE => 'The file exceeds the allowed size limit.',
|
||||
UPLOAD_ERR_NO_TMP_DIR => 'No temporary directory is available.',
|
||||
UPLOAD_ERR_CANT_WRITE => 'Unable to save the uploaded file.',
|
||||
default => 'File upload failed.',
|
||||
};
|
||||
dbnToolsError($msg, 422, 'upload_error');
|
||||
}
|
||||
|
||||
$originalName = basename((string)($file['name'] ?? ''));
|
||||
$tmpPath = (string)($file['tmp_name'] ?? '');
|
||||
$size = (int)($file['size'] ?? 0);
|
||||
|
||||
if (!is_uploaded_file($tmpPath)) {
|
||||
dbnToolsError('Invalid file upload.', 400, 'invalid_upload');
|
||||
}
|
||||
if ($size === 0) {
|
||||
dbnToolsError('The uploaded file is empty.', 422, 'file_empty');
|
||||
}
|
||||
if ($size > EXTRACT_MAX_BYTES) {
|
||||
dbnToolsError('File exceeds the 4 MB limit.', 413, 'file_too_large');
|
||||
}
|
||||
|
||||
$ext = strtolower(pathinfo($originalName, PATHINFO_EXTENSION));
|
||||
if (!in_array($ext, EXTRACT_ALLOWED_EXTS, true)) {
|
||||
dbnToolsError('Unsupported file type. Upload a .pdf, .docx, or .txt file.', 422, 'unsupported_type');
|
||||
}
|
||||
|
||||
$text = match ($ext) {
|
||||
'txt' => extractTxt($tmpPath),
|
||||
'pdf' => extractPdf($tmpPath),
|
||||
'docx' => extractDocx($tmpPath),
|
||||
};
|
||||
|
||||
$text = trim($text);
|
||||
if ($text === '') {
|
||||
dbnToolsError('No text could be extracted from this file.', 422, 'no_text');
|
||||
}
|
||||
|
||||
$truncated = false;
|
||||
if (mb_strlen($text, 'UTF-8') > EXTRACT_TEXT_LIMIT) {
|
||||
$text = mb_substr($text, 0, EXTRACT_TEXT_LIMIT, 'UTF-8');
|
||||
$truncated = true;
|
||||
}
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'text' => $text,
|
||||
'filename' => $originalName,
|
||||
'chars' => mb_strlen($text, 'UTF-8'),
|
||||
'truncated' => $truncated,
|
||||
]);
|
||||
$result = dbnToolsExtractUploadedFile($_FILES['file']);
|
||||
dbnToolsRespond($result);
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra);
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN extract error: ' . $e->getMessage());
|
||||
dbnToolsError('Text extraction failed.', 500, 'extract_error');
|
||||
}
|
||||
|
||||
function extractTxt(string $path): string
|
||||
{
|
||||
$content = file_get_contents($path);
|
||||
if ($content === false) {
|
||||
throw new DbnToolsHttpException('Unable to read the file.', 500, 'read_error');
|
||||
}
|
||||
return mb_convert_encoding($content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252');
|
||||
}
|
||||
|
||||
function extractPdf(string $path): string
|
||||
{
|
||||
$cmd = 'pdftotext ' . escapeshellarg($path) . ' - 2>/dev/null';
|
||||
$output = shell_exec($cmd);
|
||||
if ($output === null || $output === false || trim($output) === '') {
|
||||
throw new DbnToolsHttpException(
|
||||
'PDF text extraction failed. The file may be image-only or encrypted.',
|
||||
422,
|
||||
'pdf_extract_failed'
|
||||
);
|
||||
}
|
||||
return $output;
|
||||
}
|
||||
|
||||
function extractDocx(string $path): string
|
||||
{
|
||||
$zip = new ZipArchive();
|
||||
$result = $zip->open($path);
|
||||
if ($result !== true) {
|
||||
throw new DbnToolsHttpException('Unable to open the .docx file.', 422, 'docx_open_failed');
|
||||
}
|
||||
|
||||
$xml = $zip->getFromName('word/document.xml');
|
||||
$zip->close();
|
||||
|
||||
if ($xml === false) {
|
||||
throw new DbnToolsHttpException('No document content found in this .docx file.', 422, 'docx_no_content');
|
||||
}
|
||||
|
||||
$doc = new DOMDocument();
|
||||
libxml_use_internal_errors(true);
|
||||
$doc->loadXML($xml);
|
||||
libxml_clear_errors();
|
||||
|
||||
$xpath = new DOMXPath($doc);
|
||||
$xpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
|
||||
|
||||
$paragraphs = [];
|
||||
foreach ($xpath->query('//w:p') as $para) {
|
||||
$runs = [];
|
||||
foreach ($xpath->query('.//w:t', $para) as $t) {
|
||||
$runs[] = $t->textContent;
|
||||
}
|
||||
$paragraphs[] = implode('', $runs);
|
||||
}
|
||||
|
||||
return implode("\n", $paragraphs);
|
||||
}
|
||||
|
||||
@@ -1701,3 +1701,478 @@ p {
|
||||
font-weight: 500;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* =========================================================================
|
||||
Deep Research — agent + rank/rerank RAG surface
|
||||
========================================================================= */
|
||||
|
||||
.deep-research .lang-switcher {
|
||||
display: inline-flex;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.deep-research .lang-btn {
|
||||
padding: 6px 10px;
|
||||
border-radius: 999px;
|
||||
background: #fff;
|
||||
border: 1px solid var(--line);
|
||||
color: var(--muted);
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.deep-research .lang-btn.is-active {
|
||||
background: var(--soft-teal);
|
||||
color: var(--teal-dark);
|
||||
border-color: rgba(15, 118, 110, 0.30);
|
||||
}
|
||||
|
||||
.dr-slice-section {
|
||||
display: grid;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.dr-slice-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.dr-slice {
|
||||
text-align: left;
|
||||
background: #fbfcfe;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
padding: 12px 13px;
|
||||
cursor: pointer;
|
||||
min-height: 96px;
|
||||
display: grid;
|
||||
gap: 6px;
|
||||
align-content: start;
|
||||
transition: border-color 120ms ease, background 120ms ease;
|
||||
}
|
||||
|
||||
.dr-slice:hover {
|
||||
border-color: rgba(15, 118, 110, 0.30);
|
||||
}
|
||||
|
||||
.dr-slice.is-on {
|
||||
background: var(--soft-teal);
|
||||
border-color: rgba(15, 118, 110, 0.45);
|
||||
}
|
||||
|
||||
.dr-slice__head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.dr-slice__title {
|
||||
font-weight: 800;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
.dr-slice__badge {
|
||||
background: #fff;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 999px;
|
||||
color: var(--muted);
|
||||
font-size: 0.66rem;
|
||||
font-weight: 800;
|
||||
letter-spacing: 0.06em;
|
||||
padding: 3px 8px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.dr-slice.is-on .dr-slice__badge {
|
||||
background: var(--teal);
|
||||
border-color: var(--teal);
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.dr-slice__tagline {
|
||||
margin: 0;
|
||||
color: var(--muted);
|
||||
font-size: 0.86rem;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.advanced-panel .dr-control-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(5, minmax(0, 1fr));
|
||||
gap: 8px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.dr-control-card {
|
||||
background: #fbfcfe;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.dr-control-card label {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
gap: 8px;
|
||||
align-items: center;
|
||||
font-weight: 800;
|
||||
color: var(--ink);
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
|
||||
.dr-control-card small {
|
||||
display: block;
|
||||
margin-top: 8px;
|
||||
color: var(--muted);
|
||||
font-size: 0.74rem;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.dr-control-card input[type="range"] {
|
||||
width: 100%;
|
||||
margin-top: 8px;
|
||||
accent-color: var(--teal);
|
||||
}
|
||||
|
||||
.dr-control-value {
|
||||
color: var(--coral);
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
@media (max-width: 980px) {
|
||||
.advanced-panel .dr-control-grid {
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
}
|
||||
.dr-slice-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
.deep-research-results {
|
||||
display: grid;
|
||||
gap: 14px;
|
||||
}
|
||||
|
||||
.dr-result-block {
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
padding: 16px;
|
||||
background: #fff;
|
||||
}
|
||||
|
||||
.dr-brief {
|
||||
line-height: 1.65;
|
||||
color: var(--ink);
|
||||
font-size: 1.0rem;
|
||||
}
|
||||
|
||||
.dr-brief p {
|
||||
margin: 0 0 12px;
|
||||
}
|
||||
|
||||
.dr-brief code {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
|
||||
background: var(--soft-teal);
|
||||
padding: 1px 5px;
|
||||
border-radius: 4px;
|
||||
font-size: 0.86em;
|
||||
}
|
||||
|
||||
.dr-brief strong { color: var(--ink); }
|
||||
.dr-brief em { color: var(--muted); }
|
||||
|
||||
.dr-cite {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
min-width: 18px;
|
||||
height: 18px;
|
||||
margin: 0 1px;
|
||||
padding: 0 5px;
|
||||
border-radius: 999px;
|
||||
background: var(--soft-coral);
|
||||
color: var(--coral);
|
||||
font-size: 0.72rem;
|
||||
font-weight: 800;
|
||||
font-variant-numeric: tabular-nums;
|
||||
cursor: pointer;
|
||||
border: 1px solid rgba(194, 65, 12, 0.25);
|
||||
vertical-align: 1px;
|
||||
}
|
||||
|
||||
.dr-cite:hover { background: var(--coral); color: #fff; }
|
||||
|
||||
.dr-sources-head {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.dr-sources-head h3 {
|
||||
margin: 0;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
.dr-sources-head small {
|
||||
color: var(--muted);
|
||||
font-size: 0.82rem;
|
||||
}
|
||||
|
||||
.dr-source-list {
|
||||
display: grid;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.dr-source-card {
|
||||
display: grid;
|
||||
grid-template-columns: 34px 1fr auto;
|
||||
gap: 12px;
|
||||
align-items: start;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
padding: 12px;
|
||||
background: #fbfcfe;
|
||||
cursor: pointer;
|
||||
text-align: left;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.dr-source-card:hover { border-color: rgba(15, 118, 110, 0.40); }
|
||||
|
||||
.dr-source-card.is-highlight {
|
||||
border-color: var(--coral);
|
||||
background: var(--soft-coral);
|
||||
}
|
||||
|
||||
.dr-source-number {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
border-radius: 999px;
|
||||
background: var(--soft-coral);
|
||||
color: var(--coral);
|
||||
font-weight: 900;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
.dr-source-body {
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.dr-source-title {
|
||||
font-weight: 800;
|
||||
color: var(--ink);
|
||||
line-height: 1.35;
|
||||
}
|
||||
|
||||
.dr-source-meta {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 6px;
|
||||
margin-top: 6px;
|
||||
}
|
||||
|
||||
.dr-source-tag {
|
||||
background: var(--soft-teal);
|
||||
color: var(--teal-dark);
|
||||
border-radius: 999px;
|
||||
font-size: 0.7rem;
|
||||
font-weight: 800;
|
||||
padding: 3px 8px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.dr-source-tag--upload { background: #fff0e8; color: #8a4524; }
|
||||
.dr-source-tag--score { background: #eef3fb; color: #314158; }
|
||||
|
||||
.dr-source-excerpt {
|
||||
color: var(--muted);
|
||||
margin-top: 8px;
|
||||
line-height: 1.5;
|
||||
font-size: 0.92rem;
|
||||
}
|
||||
|
||||
.dr-source-aside {
|
||||
align-self: stretch;
|
||||
display: grid;
|
||||
grid-template-rows: auto auto;
|
||||
gap: 6px;
|
||||
font-size: 0.78rem;
|
||||
color: var(--muted);
|
||||
text-align: right;
|
||||
min-width: 90px;
|
||||
}
|
||||
|
||||
.dr-source-aside b {
|
||||
color: var(--ink);
|
||||
font-variant-numeric: tabular-nums;
|
||||
font-size: 0.92rem;
|
||||
}
|
||||
|
||||
/* Method trace — overrides for #traceList rendered in rich mode */
|
||||
.trace-list.is-rich {
|
||||
display: grid;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.trace-list.is-rich .trace-step {
|
||||
display: grid;
|
||||
grid-template-columns: 28px 1fr;
|
||||
gap: 10px;
|
||||
align-items: start;
|
||||
padding: 10px 12px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
background: #fbfcfe;
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.trace-list.is-rich .trace-step__marker {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 22px;
|
||||
height: 22px;
|
||||
border-radius: 999px;
|
||||
border: 1px solid var(--line);
|
||||
background: #fff;
|
||||
color: var(--muted);
|
||||
font-size: 0.72rem;
|
||||
font-weight: 900;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
.trace-list.is-rich .trace-step__label {
|
||||
display: block;
|
||||
font-weight: 800;
|
||||
color: var(--ink);
|
||||
font-size: 0.94rem;
|
||||
}
|
||||
|
||||
.trace-list.is-rich .trace-step__detail {
|
||||
display: block;
|
||||
margin-top: 4px;
|
||||
color: var(--muted);
|
||||
font-size: 0.83rem;
|
||||
line-height: 1.45;
|
||||
}
|
||||
|
||||
.trace-list.is-rich .trace-step.is-running {
|
||||
background: var(--soft-coral);
|
||||
}
|
||||
.trace-list.is-rich .trace-step.is-running .trace-step__marker {
|
||||
background: rgba(194, 65, 12, 0.18);
|
||||
border-color: rgba(194, 65, 12, 0.35);
|
||||
color: var(--coral);
|
||||
animation: drTracePulse 950ms ease-in-out infinite;
|
||||
}
|
||||
|
||||
.trace-list.is-rich .trace-step.is-done .trace-step__marker {
|
||||
background: var(--soft-teal);
|
||||
border-color: rgba(15, 118, 110, 0.30);
|
||||
color: var(--teal-dark);
|
||||
}
|
||||
|
||||
.trace-list.is-rich .trace-step.is-warning .trace-step__marker {
|
||||
background: #fff4dc;
|
||||
border-color: rgba(183, 121, 31, 0.35);
|
||||
color: var(--amber);
|
||||
}
|
||||
|
||||
.trace-list.is-rich .trace-step.is-error {
|
||||
background: #fff0e8;
|
||||
}
|
||||
.trace-list.is-rich .trace-step.is-error .trace-step__marker {
|
||||
background: rgba(180, 30, 30, 0.10);
|
||||
border-color: rgba(180, 30, 30, 0.30);
|
||||
color: #b41e1e;
|
||||
}
|
||||
|
||||
@keyframes drTracePulse {
|
||||
0%, 100% { opacity: 0.55; transform: scale(0.92); }
|
||||
50% { opacity: 1; transform: scale(1.04); }
|
||||
}
|
||||
|
||||
/* Source modal */
|
||||
.dr-source-modal {
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
background: rgba(23, 32, 51, 0.62);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 24px;
|
||||
z-index: 9999;
|
||||
}
|
||||
|
||||
.dr-source-modal__dialog {
|
||||
width: min(960px, 100%);
|
||||
max-height: 90vh;
|
||||
background: #fff;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 28px 92px rgba(0, 0, 0, 0.34);
|
||||
overflow: hidden;
|
||||
display: grid;
|
||||
grid-template-rows: auto 1fr;
|
||||
}
|
||||
|
||||
.dr-source-modal__head {
|
||||
display: flex;
|
||||
align-items: start;
|
||||
justify-content: space-between;
|
||||
gap: 14px;
|
||||
padding: 16px 18px;
|
||||
border-bottom: 1px solid var(--line);
|
||||
}
|
||||
|
||||
.dr-source-modal__head h3 {
|
||||
margin: 0;
|
||||
color: var(--ink);
|
||||
line-height: 1.25;
|
||||
font-size: 1.2rem;
|
||||
}
|
||||
|
||||
.dr-source-modal__body {
|
||||
display: grid;
|
||||
grid-template-columns: 260px minmax(0, 1fr);
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.dr-source-modal__meta,
|
||||
.dr-source-modal__text {
|
||||
padding: 16px 18px;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.dr-source-modal__meta {
|
||||
border-right: 1px solid var(--line);
|
||||
background: #fbfcfe;
|
||||
color: var(--muted);
|
||||
font-size: 0.88rem;
|
||||
line-height: 1.55;
|
||||
}
|
||||
|
||||
.dr-source-modal__meta dt {
|
||||
color: var(--ink);
|
||||
font-weight: 800;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
.dr-source-modal__meta dt:first-of-type { margin-top: 0; }
|
||||
|
||||
.dr-source-modal__text {
|
||||
white-space: pre-wrap;
|
||||
line-height: 1.7;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
@media (max-width: 720px) {
|
||||
.dr-source-modal__body { grid-template-columns: 1fr; }
|
||||
.dr-source-modal__meta { border-right: 0; border-bottom: 1px solid var(--line); }
|
||||
.dr-source-card { grid-template-columns: 32px 1fr; }
|
||||
.dr-source-aside { display: none; }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,481 @@
|
||||
/* deep-research.js — page-scoped UI for /deep-research.php */
|
||||
(function () {
|
||||
'use strict';
|
||||
|
||||
const els = {};
|
||||
let lang = 'en';
|
||||
let uploadFiles = [];
|
||||
let lastResult = null;
|
||||
|
||||
const SLICE_DEFS = [
|
||||
{ id: 'family_core', label: 'Family Law Core' },
|
||||
{ id: 'child_welfare', label: 'Child Welfare' },
|
||||
{ id: 'echr_hague', label: 'ECHR and Hague' },
|
||||
{ id: 'broader_legal', label: 'Broader Legal Support' },
|
||||
];
|
||||
|
||||
const STEP_LABELS = [
|
||||
'Query interpretation',
|
||||
'Query expansion',
|
||||
'Slice resolution',
|
||||
'Upload indexing',
|
||||
'Retrieval',
|
||||
'Synthesis',
|
||||
'Citation confidence',
|
||||
];
|
||||
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
if (!document.body.dataset.activeTool || document.body.dataset.activeTool !== 'deep-research') return;
|
||||
|
||||
Object.assign(els, {
|
||||
form: document.getElementById('deepResearchForm'),
|
||||
input: document.getElementById('drInput'),
|
||||
status: document.getElementById('drStatus'),
|
||||
runButton: document.getElementById('drRunButton'),
|
||||
results: document.getElementById('drResults'),
|
||||
traceList: document.getElementById('traceList'),
|
||||
slices: Array.from(document.querySelectorAll('.dr-slice')),
|
||||
langButtons: Array.from(document.querySelectorAll('#drLangSwitcher .lang-btn')),
|
||||
engineRadios: Array.from(document.querySelectorAll('input[name="drEngine"]')),
|
||||
subQ: document.getElementById('drSubQ'),
|
||||
subQVal: document.getElementById('drSubQValue'),
|
||||
chunkLimit: document.getElementById('drChunkLimit'),
|
||||
chunkLimitVal: document.getElementById('drChunkLimitValue'),
|
||||
sim: document.getElementById('drSim'),
|
||||
simVal: document.getElementById('drSimValue'),
|
||||
topK: document.getElementById('drTopK'),
|
||||
topKVal: document.getElementById('drTopKValue'),
|
||||
temp: document.getElementById('drTemp'),
|
||||
tempVal: document.getElementById('drTempValue'),
|
||||
uploadZone: document.getElementById('drUploadZone'),
|
||||
uploadInput: document.getElementById('drUploadInput'),
|
||||
uploadPrompt: document.getElementById('drUploadPrompt'),
|
||||
uploadFileInfo: document.getElementById('drUploadFileInfo'),
|
||||
uploadFileList: document.getElementById('drUploadFileList'),
|
||||
uploadClear: document.getElementById('drUploadClear'),
|
||||
modal: document.getElementById('drSourceModal'),
|
||||
modalClose: document.getElementById('drSourceModalClose'),
|
||||
modalTitle: document.getElementById('drSourceModalTitle'),
|
||||
modalEyebrow: document.getElementById('drSourceModalEyebrow'),
|
||||
modalMeta: document.getElementById('drSourceModalMeta'),
|
||||
modalText: document.getElementById('drSourceModalText'),
|
||||
});
|
||||
|
||||
if (!els.form) return;
|
||||
|
||||
bindSlices();
|
||||
bindLang();
|
||||
bindRanges();
|
||||
bindUpload();
|
||||
bindModal();
|
||||
els.form.addEventListener('submit', onSubmit);
|
||||
|
||||
// Pre-render placeholder trace
|
||||
renderTrace(STEP_LABELS.map((label) => ({ label, detail: 'Waiting…', status: 'idle' })));
|
||||
});
|
||||
|
||||
function bindSlices() {
|
||||
els.slices.forEach((btn) => {
|
||||
btn.addEventListener('click', () => {
|
||||
const isOn = btn.classList.toggle('is-on');
|
||||
btn.setAttribute('aria-pressed', isOn ? 'true' : 'false');
|
||||
const badge = btn.querySelector('.dr-slice__badge');
|
||||
if (badge) badge.textContent = isOn ? 'on' : 'off';
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function bindLang() {
|
||||
els.langButtons.forEach((b) => {
|
||||
b.addEventListener('click', () => {
|
||||
els.langButtons.forEach((x) => x.classList.remove('is-active'));
|
||||
b.classList.add('is-active');
|
||||
lang = b.dataset.lang || 'en';
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function bindRanges() {
|
||||
const pairs = [
|
||||
[els.subQ, els.subQVal, (v) => v],
|
||||
[els.chunkLimit, els.chunkLimitVal, (v) => v],
|
||||
[els.sim, els.simVal, (v) => Number(v).toFixed(2)],
|
||||
[els.topK, els.topKVal, (v) => v],
|
||||
[els.temp, els.tempVal, (v) => Number(v).toFixed(2)],
|
||||
];
|
||||
pairs.forEach(([range, label, fmt]) => {
|
||||
if (!range || !label) return;
|
||||
const sync = () => { label.textContent = fmt(range.value); };
|
||||
range.addEventListener('input', sync);
|
||||
sync();
|
||||
});
|
||||
}
|
||||
|
||||
function bindUpload() {
|
||||
if (!els.uploadZone) return;
|
||||
const onFiles = (fileList) => {
|
||||
const files = Array.from(fileList || []).slice(0, 5);
|
||||
if (uploadFiles.length + files.length > 5) {
|
||||
setStatus('At most 5 files can be uploaded per request.', 'error');
|
||||
return;
|
||||
}
|
||||
files.forEach((f) => {
|
||||
if (f.size > 4 * 1024 * 1024) {
|
||||
setStatus(`${f.name} exceeds the 4 MB limit.`, 'error');
|
||||
return;
|
||||
}
|
||||
const ext = (f.name.split('.').pop() || '').toLowerCase();
|
||||
if (!['pdf', 'docx', 'txt'].includes(ext)) {
|
||||
setStatus(`${f.name} is not a supported file type.`, 'error');
|
||||
return;
|
||||
}
|
||||
uploadFiles.push(f);
|
||||
});
|
||||
renderUploadList();
|
||||
};
|
||||
els.uploadInput.addEventListener('change', (e) => onFiles(e.target.files));
|
||||
els.uploadZone.addEventListener('dragover', (e) => { e.preventDefault(); els.uploadZone.classList.add('is-drop'); });
|
||||
els.uploadZone.addEventListener('dragleave', () => els.uploadZone.classList.remove('is-drop'));
|
||||
els.uploadZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
els.uploadZone.classList.remove('is-drop');
|
||||
onFiles(e.dataTransfer?.files);
|
||||
});
|
||||
els.uploadClear?.addEventListener('click', () => {
|
||||
uploadFiles = [];
|
||||
els.uploadInput.value = '';
|
||||
renderUploadList();
|
||||
});
|
||||
}
|
||||
|
||||
function renderUploadList() {
|
||||
if (!uploadFiles.length) {
|
||||
els.uploadFileInfo.classList.add('is-hidden');
|
||||
els.uploadPrompt.classList.remove('is-hidden');
|
||||
return;
|
||||
}
|
||||
els.uploadPrompt.classList.add('is-hidden');
|
||||
els.uploadFileInfo.classList.remove('is-hidden');
|
||||
els.uploadFileList.innerHTML = uploadFiles.map((f, i) => {
|
||||
const kb = (f.size / 1024).toFixed(0);
|
||||
return `<li><span class="upload-filename">${escapeHtml(f.name)}</span><span class="upload-chars">${kb} KB</span></li>`;
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function bindModal() {
|
||||
els.modalClose?.addEventListener('click', closeModal);
|
||||
els.modal?.addEventListener('click', (e) => {
|
||||
if (e.target === els.modal) closeModal();
|
||||
});
|
||||
document.addEventListener('keydown', (e) => {
|
||||
if (e.key === 'Escape' && els.modal && !els.modal.classList.contains('is-hidden')) closeModal();
|
||||
});
|
||||
}
|
||||
|
||||
function closeModal() {
|
||||
els.modal?.classList.add('is-hidden');
|
||||
}
|
||||
|
||||
function openModal(source) {
|
||||
if (!source) return;
|
||||
els.modalEyebrow.textContent = source.source_origin === 'upload' ? 'Uploaded file' : 'Corpus source';
|
||||
els.modalTitle.textContent = source.title || 'Source';
|
||||
const metaRows = [
|
||||
['Number', `[${source.n}]`],
|
||||
source.section ? ['Section', source.section] : null,
|
||||
['Corpus / package', source.package_or_corpus || '—'],
|
||||
source.authority_type ? ['Authority', source.authority_type] : null,
|
||||
source.jurisdiction ? ['Jurisdiction', source.jurisdiction] : null,
|
||||
source.similarity != null ? ['Similarity', String(source.similarity)] : null,
|
||||
source.reranker_score != null ? ['Rerank score', String(source.reranker_score)] : null,
|
||||
source.matched_sub_questions?.length ? ['Matched sub-Q', source.matched_sub_questions.join(', ')] : null,
|
||||
].filter(Boolean);
|
||||
els.modalMeta.innerHTML = '<dl>' + metaRows.map(([k, v]) => `<dt>${escapeHtml(k)}</dt><dd>${escapeHtml(String(v))}</dd>`).join('') + '</dl>';
|
||||
els.modalText.textContent = source.chunk_text || source.excerpt || '';
|
||||
els.modal.classList.remove('is-hidden');
|
||||
}
|
||||
|
||||
function getSelectedSlices() {
|
||||
const out = {};
|
||||
SLICE_DEFS.forEach((s) => {
|
||||
const btn = els.slices.find((b) => b.dataset.slice === s.id);
|
||||
out[s.id] = !!(btn && btn.classList.contains('is-on'));
|
||||
});
|
||||
return out;
|
||||
}
|
||||
|
||||
function getEngine() {
|
||||
const checked = els.engineRadios.find((r) => r.checked);
|
||||
return checked ? checked.value : 'azure_mini';
|
||||
}
|
||||
|
||||
function getControls() {
|
||||
return {
|
||||
sub_q_count: parseInt(els.subQ.value, 10),
|
||||
chunk_limit: parseInt(els.chunkLimit.value, 10),
|
||||
similarity_threshold: parseFloat(els.sim.value),
|
||||
reranker_top_k: parseInt(els.topK.value, 10),
|
||||
temperature: parseFloat(els.temp.value),
|
||||
};
|
||||
}
|
||||
|
||||
async function onSubmit(e) {
|
||||
e.preventDefault();
|
||||
const query = (els.input.value || '').trim();
|
||||
if (!query && uploadFiles.length === 0) {
|
||||
setStatus('Type a question or upload a file before running deep research.', 'error');
|
||||
return;
|
||||
}
|
||||
const slices = getSelectedSlices();
|
||||
if (!Object.values(slices).some(Boolean)) {
|
||||
setStatus('Enable at least one corpus slice.', 'error');
|
||||
return;
|
||||
}
|
||||
|
||||
setStatus('Running deep research…', 'busy');
|
||||
els.runButton.disabled = true;
|
||||
els.results.innerHTML = `<div class="empty-state"><h3>Working…</h3><p>The agent is expanding the question, retrieving from the corpus, and synthesising the brief. This usually takes 6–15 seconds.</p></div>`;
|
||||
|
||||
// Render placeholder trace with first step running
|
||||
const placeholder = STEP_LABELS.map((label, i) => ({
|
||||
label,
|
||||
detail: i === 0 ? 'Running…' : 'Queued',
|
||||
status: i === 0 ? 'running' : 'idle',
|
||||
}));
|
||||
renderTrace(placeholder);
|
||||
|
||||
const payload = {
|
||||
query,
|
||||
paste_text: '',
|
||||
slices,
|
||||
engine: getEngine(),
|
||||
language: lang,
|
||||
controls: getControls(),
|
||||
};
|
||||
|
||||
let response;
|
||||
try {
|
||||
if (uploadFiles.length > 0) {
|
||||
const form = new FormData();
|
||||
form.append('payload', JSON.stringify(payload));
|
||||
uploadFiles.forEach((f) => form.append('files[]', f));
|
||||
response = await fetch('api/deep-research.php', { method: 'POST', body: form, credentials: 'same-origin' });
|
||||
} else {
|
||||
response = await fetch('api/deep-research.php', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload),
|
||||
credentials: 'same-origin',
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
setStatus(`Network error: ${err.message || err}`, 'error');
|
||||
els.runButton.disabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
let data = null;
|
||||
try { data = await response.json(); } catch (_) {}
|
||||
|
||||
if (!response.ok || !data || data.ok === false) {
|
||||
const msg = (data && data.error && data.error.message) || `Request failed (${response.status}).`;
|
||||
setStatus(msg, 'error');
|
||||
els.runButton.disabled = false;
|
||||
renderTrace(placeholder.map((s, i) => i === 0 ? { ...s, status: 'error', detail: msg } : s));
|
||||
return;
|
||||
}
|
||||
|
||||
lastResult = data;
|
||||
setStatus(`Done in ${data.latency_ms || 0} ms · ${data.trace_metadata?.source_count || 0} sources · confidence ${data.trace_metadata?.citation_confidence || '?'}`, 'ok');
|
||||
els.runButton.disabled = false;
|
||||
renderTrace(data.trace || []);
|
||||
renderResults(data);
|
||||
}
|
||||
|
||||
function setStatus(message, kind) {
|
||||
els.status.textContent = message;
|
||||
els.status.style.color = kind === 'error' ? '#b41e1e' : (kind === 'ok' ? 'var(--teal-dark)' : 'var(--muted)');
|
||||
}
|
||||
|
||||
function renderTrace(steps) {
|
||||
if (!els.traceList) return;
|
||||
els.traceList.classList.add('is-rich');
|
||||
els.traceList.innerHTML = steps.map((step, i) => {
|
||||
const statusClass = step.status === 'running' ? 'is-running'
|
||||
: step.status === 'complete' ? 'is-done'
|
||||
: step.status === 'warning' ? 'is-warning'
|
||||
: step.status === 'error' ? 'is-error'
|
||||
: '';
|
||||
const marker = step.status === 'complete' ? '✓'
|
||||
: step.status === 'warning' ? '!'
|
||||
: step.status === 'error' ? '×'
|
||||
: (i + 1);
|
||||
return `<li class="trace-step ${statusClass}">
|
||||
<span class="trace-step__marker">${marker}</span>
|
||||
<div>
|
||||
<span class="trace-step__label">${escapeHtml(step.label || '')}</span>
|
||||
<span class="trace-step__detail">${escapeHtml(step.detail || '')}</span>
|
||||
</div>
|
||||
</li>`;
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function renderResults(data) {
|
||||
const sources = data.sources || [];
|
||||
const subs = data.sub_questions || [];
|
||||
|
||||
const briefHtml = renderBrief(data.brief_markdown || '', sources);
|
||||
|
||||
const subQHtml = subs.length ? `
|
||||
<div class="dr-result-block">
|
||||
<h3 style="margin:0 0 8px;font-size:1rem">Angles the agent explored</h3>
|
||||
<ol style="padding-left:1.2em;margin:0;color:var(--muted);line-height:1.55">
|
||||
${subs.map((sq) => `<li><strong style="color:var(--ink)">${escapeHtml(sq.question)}</strong>${sq.rationale ? `<br><small>${escapeHtml(sq.rationale)}</small>` : ''}</li>`).join('')}
|
||||
</ol>
|
||||
</div>` : '';
|
||||
|
||||
const sourcesHtml = `
|
||||
<div class="dr-result-block">
|
||||
<div class="dr-sources-head">
|
||||
<h3>Sources (${sources.length})</h3>
|
||||
<small>Click a card to see the full chunk + scores</small>
|
||||
</div>
|
||||
<div class="dr-source-list">
|
||||
${sources.map((s) => renderSourceCard(s)).join('')}
|
||||
</div>
|
||||
</div>`;
|
||||
|
||||
const uncertHtml = (data.what_remains_uncertain || []).length ? `
|
||||
<div class="dr-result-block">
|
||||
<h3 style="margin:0 0 8px;font-size:0.95rem;color:var(--muted)">What remains uncertain</h3>
|
||||
<ul style="padding-left:1.2em;margin:0;color:var(--muted);line-height:1.55">
|
||||
${(data.what_remains_uncertain || []).map((u) => `<li>${escapeHtml(String(u))}</li>`).join('')}
|
||||
</ul>
|
||||
</div>` : '';
|
||||
|
||||
const nextHtml = data.next_practical_step ? `
|
||||
<div class="dr-result-block">
|
||||
<h3 style="margin:0 0 6px;font-size:0.95rem">Next practical step</h3>
|
||||
<p style="margin:0;color:var(--ink);line-height:1.5">${escapeHtml(data.next_practical_step)}</p>
|
||||
</div>` : '';
|
||||
|
||||
els.results.innerHTML = `
|
||||
<div class="dr-result-block">
|
||||
<div class="dr-brief">${briefHtml}</div>
|
||||
</div>
|
||||
${subQHtml}
|
||||
${sourcesHtml}
|
||||
${uncertHtml}
|
||||
${nextHtml}
|
||||
`;
|
||||
|
||||
// Bind source-card click handlers + citation marker click handlers
|
||||
els.results.querySelectorAll('[data-source-n]').forEach((node) => {
|
||||
node.addEventListener('click', () => {
|
||||
const n = parseInt(node.dataset.sourceN, 10);
|
||||
const src = sources.find((s) => s.n === n);
|
||||
if (src) {
|
||||
openModal(src);
|
||||
flashSource(n);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function flashSource(n) {
|
||||
document.querySelectorAll('.dr-source-card.is-highlight').forEach((c) => c.classList.remove('is-highlight'));
|
||||
const target = document.querySelector(`.dr-source-card[data-source-n="${n}"]`);
|
||||
if (target) {
|
||||
target.classList.add('is-highlight');
|
||||
target.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||||
setTimeout(() => target.classList.remove('is-highlight'), 1800);
|
||||
}
|
||||
}
|
||||
|
||||
function renderSourceCard(s) {
|
||||
const score = s.reranker_score != null ? s.reranker_score : s.similarity;
|
||||
const originTagClass = s.source_origin === 'upload' ? 'dr-source-tag dr-source-tag--upload' : 'dr-source-tag';
|
||||
const originLabel = s.source_origin === 'upload' ? 'upload' : 'corpus';
|
||||
return `<button type="button" class="dr-source-card" data-source-n="${s.n}">
|
||||
<span class="dr-source-number">${s.n}</span>
|
||||
<div class="dr-source-body">
|
||||
<div class="dr-source-title">${escapeHtml(s.title || 'Untitled')}</div>
|
||||
${s.section ? `<div class="dr-source-meta"><span class="dr-source-tag">${escapeHtml(s.section)}</span></div>` : ''}
|
||||
<div class="dr-source-meta">
|
||||
<span class="${originTagClass}">${originLabel}</span>
|
||||
<span class="dr-source-tag dr-source-tag--score">${escapeHtml(s.package_or_corpus || '—')}</span>
|
||||
${(s.matched_sub_questions || []).map((q) => `<span class="dr-source-tag">${escapeHtml(q)}</span>`).join('')}
|
||||
</div>
|
||||
<p class="dr-source-excerpt">${escapeHtml(truncate(s.excerpt || '', 240))}</p>
|
||||
</div>
|
||||
<div class="dr-source-aside">
|
||||
<span>score<br><b>${score != null ? Number(score).toFixed(2) : '—'}</b></span>
|
||||
${s.reranker_score != null && s.similarity != null ? `<span>sim<br><b>${Number(s.similarity).toFixed(2)}</b></span>` : ''}
|
||||
</div>
|
||||
</button>`;
|
||||
}
|
||||
|
||||
// Markdown renderer — minimal: paragraphs, bold/italic, code, [n] citation badges
|
||||
function renderBrief(markdown, sources) {
|
||||
if (!markdown) return '<p><em>No brief was returned.</em></p>';
|
||||
const sourceSet = new Set((sources || []).map((s) => s.n));
|
||||
const escaped = escapeHtml(markdown);
|
||||
|
||||
// Citation markers [1], [1,2], [1-3]
|
||||
const withCites = escaped.replace(/\[(\d+(?:\s*[-,]\s*\d+)*)\]/g, (_, group) => {
|
||||
const nums = expandCiteGroup(group);
|
||||
return nums.map((n) => {
|
||||
const known = sourceSet.has(n);
|
||||
const cls = known ? 'dr-cite' : 'dr-cite';
|
||||
return `<span class="${cls}" data-source-n="${n}" role="button" tabindex="0">${n}</span>`;
|
||||
}).join('');
|
||||
});
|
||||
|
||||
// Bold/italic
|
||||
const withBold = withCites
|
||||
.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>')
|
||||
.replace(/(^|[^*])\*([^*]+)\*(?!\*)/g, '$1<em>$2</em>')
|
||||
.replace(/`([^`]+)`/g, '<code>$1</code>');
|
||||
|
||||
// Paragraphs
|
||||
const paragraphs = withBold.split(/\n{2,}/).map((p) => {
|
||||
const t = p.trim();
|
||||
if (!t) return '';
|
||||
if (/^### /.test(t)) return `<h4 style="margin:14px 0 6px;color:var(--ink);font-size:1rem">${t.replace(/^### /, '')}</h4>`;
|
||||
return `<p>${t.replace(/\n/g, '<br>')}</p>`;
|
||||
}).join('');
|
||||
|
||||
return paragraphs;
|
||||
}
|
||||
|
||||
function expandCiteGroup(group) {
|
||||
const out = [];
|
||||
group.split(',').forEach((part) => {
|
||||
const range = part.trim().match(/^(\d+)\s*-\s*(\d+)$/);
|
||||
if (range) {
|
||||
const a = parseInt(range[1], 10);
|
||||
const b = parseInt(range[2], 10);
|
||||
for (let i = a; i <= b; i++) out.push(i);
|
||||
} else {
|
||||
const n = parseInt(part.trim(), 10);
|
||||
if (!Number.isNaN(n)) out.push(n);
|
||||
}
|
||||
});
|
||||
return Array.from(new Set(out));
|
||||
}
|
||||
|
||||
function escapeHtml(s) {
|
||||
return String(s)
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''');
|
||||
}
|
||||
|
||||
function truncate(s, n) {
|
||||
if (!s) return '';
|
||||
if (s.length <= n) return s;
|
||||
return s.slice(0, n - 1) + '…';
|
||||
}
|
||||
})();
|
||||
@@ -0,0 +1,162 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
$toolName = 'deep-research';
|
||||
$toolTitle = 'Deep Research';
|
||||
$toolKind = 'Agent + Rank/Rerank RAG';
|
||||
$toolBadge = 'family-legal';
|
||||
$extraScripts = ['assets/js/deep-research.js'];
|
||||
require_once __DIR__ . '/includes/layout.php';
|
||||
?>
|
||||
<form id="deepResearchForm" class="tool-form deep-research" enctype="multipart/form-data">
|
||||
|
||||
<div class="lang-switcher" id="drLangSwitcher" role="group" aria-label="UI language">
|
||||
<button type="button" class="lang-btn is-active" data-lang="en">🇬🇧 EN</button>
|
||||
<button type="button" class="lang-btn" data-lang="no">🇳🇴 NO</button>
|
||||
</div>
|
||||
|
||||
<div class="control-row" id="drEngineControl">
|
||||
<span class="control-label">Engine</span>
|
||||
<label><input type="radio" name="drEngine" value="azure_mini" checked> Azure gpt-4o-mini ★ <small class="control-hint">(fast)</small></label>
|
||||
<label><input type="radio" name="drEngine" value="azure_full"> Azure gpt-4o <small class="control-hint">(best)</small></label>
|
||||
<label><input type="radio" name="drEngine" value="gpu"> GPU (cuttlefish) <small class="control-hint">(local)</small></label>
|
||||
</div>
|
||||
<p class="upload-hint">Azure engines use your BNL Azure credits. GPU runs qwen2.5:14b via LiteLLM on cuttlefish.</p>
|
||||
|
||||
<div class="dr-slice-section">
|
||||
<p class="control-label">Corpus slices</p>
|
||||
<p class="upload-hint">Select which slices of the Do Better Norge legal corpus the agent searches. Toggle Broader Legal on when the question reaches beyond family law.</p>
|
||||
<div class="dr-slice-grid">
|
||||
<button type="button" class="dr-slice is-on" data-slice="family_core" aria-pressed="true">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">Family Law Core</span>
|
||||
<span class="dr-slice__badge">on</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Barneloven, custody, samvær, mediation</p>
|
||||
</button>
|
||||
<button type="button" class="dr-slice is-on" data-slice="child_welfare" aria-pressed="true">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">Child Welfare</span>
|
||||
<span class="dr-slice__badge">on</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Barnevern, omsorgsovertakelse, foster care</p>
|
||||
</button>
|
||||
<button type="button" class="dr-slice is-on" data-slice="echr_hague" aria-pressed="true">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">ECHR and Hague</span>
|
||||
<span class="dr-slice__badge">on</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Article 8, EMD, HCCH, cross-border family</p>
|
||||
</button>
|
||||
<button type="button" class="dr-slice" data-slice="broader_legal" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">Broader Legal Support</span>
|
||||
<span class="dr-slice__badge">off</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Arbeidsmiljøloven, NOUer, statutes, government background</p>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<details class="advanced-panel" id="drAdvanced">
|
||||
<summary class="advanced-toggle">Advanced controls</summary>
|
||||
<div class="dr-control-grid">
|
||||
<div class="dr-control-card">
|
||||
<label>Sub-questions <span id="drSubQValue" class="dr-control-value">4</span></label>
|
||||
<input type="range" id="drSubQ" min="3" max="5" step="1" value="4">
|
||||
<small>How many angles the agent expands the question into before retrieval.</small>
|
||||
</div>
|
||||
<div class="dr-control-card">
|
||||
<label>Chunks / sub-Q <span id="drChunkLimitValue" class="dr-control-value">6</span></label>
|
||||
<input type="range" id="drChunkLimit" min="4" max="10" step="1" value="6">
|
||||
<small>How many corpus chunks the hybrid retriever pulls per sub-question.</small>
|
||||
</div>
|
||||
<div class="dr-control-card">
|
||||
<label>Similarity floor <span id="drSimValue" class="dr-control-value">0.30</span></label>
|
||||
<input type="range" id="drSim" min="0.20" max="0.60" step="0.05" value="0.30">
|
||||
<small>Minimum cosine similarity for uploaded-doc chunks to count as a match.</small>
|
||||
</div>
|
||||
<div class="dr-control-card">
|
||||
<label>Sources kept <span id="drTopKValue" class="dr-control-value">12</span></label>
|
||||
<input type="range" id="drTopK" min="8" max="14" step="1" value="12">
|
||||
<small>Top sources kept after dedupe + rerank to feed synthesis.</small>
|
||||
</div>
|
||||
<div class="dr-control-card">
|
||||
<label>Temperature <span id="drTempValue" class="dr-control-value">0.15</span></label>
|
||||
<input type="range" id="drTemp" min="0.05" max="0.40" step="0.05" value="0.15">
|
||||
<small>Synthesis creativity. Keep low for grounded legal briefs.</small>
|
||||
</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<div class="upload-zone" id="drUploadZone" role="region" aria-label="File upload">
|
||||
<input type="file" id="drUploadInput" multiple accept=".pdf,.docx,.txt" aria-label="Choose files">
|
||||
<div id="drUploadPrompt" class="upload-prompt">
|
||||
<span class="upload-icon" aria-hidden="true">⇧</span>
|
||||
<p>Drop up to 5 case files here, or <label for="drUploadInput" class="upload-browse">browse</label></p>
|
||||
<p class="upload-hint"><strong>PDF</strong>, <strong>DOCX</strong>, <strong>TXT</strong> — chunked + embedded in memory only, never stored.</p>
|
||||
</div>
|
||||
<div id="drUploadFileInfo" class="upload-file is-hidden">
|
||||
<ul id="drUploadFileList" class="upload-file-list"></ul>
|
||||
<button type="button" id="drUploadClear" class="upload-clear">× Clear</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<label class="input-label" for="drInput">Question or pasted text</label>
|
||||
<textarea id="drInput" name="drInput" rows="8" placeholder="Describe the legal question, paste case notes, or both. The agent will research the corpus from 3–5 angles."></textarea>
|
||||
|
||||
<div class="form-footer">
|
||||
<p id="drStatus" class="form-status" role="status" aria-live="polite"></p>
|
||||
<button id="drRunButton" type="submit">Run deep research</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<section id="drResults" class="results deep-research-results" aria-live="polite">
|
||||
<div class="empty-state">
|
||||
<h3>Ready</h3>
|
||||
<p>Pick slices, drop a case file or paste a question, then run. The agent will expand the question, retrieve from the corpus + your upload, rerank, and synthesise a cited brief.</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Source modal -->
|
||||
<div id="drSourceModal" class="dr-source-modal is-hidden" role="dialog" aria-modal="true" aria-labelledby="drSourceModalTitle">
|
||||
<div class="dr-source-modal__dialog">
|
||||
<header class="dr-source-modal__head">
|
||||
<div>
|
||||
<p class="eyebrow" id="drSourceModalEyebrow">Source</p>
|
||||
<h3 id="drSourceModalTitle"></h3>
|
||||
</div>
|
||||
<button type="button" id="drSourceModalClose" class="upload-clear" aria-label="Close">×</button>
|
||||
</header>
|
||||
<div class="dr-source-modal__body">
|
||||
<aside class="dr-source-modal__meta" id="drSourceModalMeta"></aside>
|
||||
<div class="dr-source-modal__text" id="drSourceModalText"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Hidden stubs so tools.js element refs don't crash on this page -->
|
||||
<div class="is-hidden" id="languageControl" aria-hidden="true"><input type="radio" name="language" value="en" checked></div>
|
||||
<div class="is-hidden" id="redactionControl" aria-hidden="true"></div>
|
||||
<div class="is-hidden" id="audioZone" aria-hidden="true">
|
||||
<input type="file" id="audioInput" style="display:none">
|
||||
<div id="audioPrompt"></div>
|
||||
<div id="audioFileInfo"><ol id="audioQueueList"></ol><button type="button" id="audioClear"></button></div>
|
||||
</div>
|
||||
<div class="is-hidden" id="diarizeControl" aria-hidden="true">
|
||||
<input type="checkbox" id="diarizeCheck">
|
||||
<input type="number" id="numSpeakersInput">
|
||||
</div>
|
||||
<div class="is-hidden" id="transcribeLangControl" aria-hidden="true"><input type="radio" name="transcribeLang" value="no" checked></div>
|
||||
<div class="is-hidden" id="vocabControl" aria-hidden="true">
|
||||
<div id="vocabPresets"></div>
|
||||
<textarea id="initPromptInput"></textarea>
|
||||
</div>
|
||||
<div class="is-hidden" id="aliasSection" aria-hidden="true">
|
||||
<button type="button" id="addAliasRow"></button>
|
||||
<div id="aliasRows"></div>
|
||||
</div>
|
||||
<div class="is-hidden" id="exemptSection" aria-hidden="true">
|
||||
<button type="button" id="addExemptRow"></button>
|
||||
<div id="exemptRows"></div>
|
||||
</div>
|
||||
<?php require_once __DIR__ . '/includes/layout_footer.php'; ?>
|
||||
@@ -0,0 +1,727 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/bootstrap.php';
|
||||
require_once __DIR__ . '/AzureOpenAiGateway.php';
|
||||
|
||||
final class DbnDeepResearchAgent
|
||||
{
|
||||
private const MAX_SEED_CHARS = 16000;
|
||||
private const MAX_UPLOAD_CHARS = 64000;
|
||||
private const CHUNK_WORDS = 600;
|
||||
private const CHUNK_OVERLAP_WORDS = 75;
|
||||
private const MIN_CHUNK_WORDS = 50;
|
||||
private const POOL_CAP = 30;
|
||||
|
||||
private DbnAzureOpenAiGateway $azure;
|
||||
private ?AiGateway $ai = null;
|
||||
private array $uploadVecs = [];
|
||||
private array $stepTimings = [];
|
||||
|
||||
public function __construct(?DbnAzureOpenAiGateway $azure = null)
|
||||
{
|
||||
$this->azure = $azure ?: new DbnAzureOpenAiGateway();
|
||||
}
|
||||
|
||||
public function run(
|
||||
string $seedQuery,
|
||||
string $pastedText,
|
||||
array $uploadedFiles,
|
||||
array $sliceSelection,
|
||||
string $engine,
|
||||
string $language,
|
||||
array $controls
|
||||
): array {
|
||||
$seedQuery = trim($seedQuery);
|
||||
$pastedText = trim($pastedText);
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini';
|
||||
$language = in_array($language, ['en', 'no'], true) ? $language : 'en';
|
||||
|
||||
$controls = $this->normalizeControls($controls);
|
||||
|
||||
if ($seedQuery === '' && $pastedText === '' && empty($uploadedFiles)) {
|
||||
dbnToolsAbort('Provide a question, paste text, or upload at least one file.', 422, 'missing_seed');
|
||||
}
|
||||
|
||||
$client = dbnToolsRequireClient();
|
||||
$package = $this->requireFamilyPackage((int)$client['id']);
|
||||
|
||||
dbnToolsBootCaveau();
|
||||
$aiPortalRoot = dbnToolsAiPortalRoot();
|
||||
require_once $aiPortalRoot . '/platform/includes/dbn_v6.php';
|
||||
require_once $aiPortalRoot . '/lib/ai/AiGateway.php';
|
||||
|
||||
$this->ai = new AiGateway();
|
||||
$this->uploadVecs = [];
|
||||
$this->stepTimings = [];
|
||||
|
||||
$trace = [];
|
||||
$seedDescription = $this->buildSeedDescription($seedQuery, $pastedText, $uploadedFiles);
|
||||
|
||||
// STEP 1: Query interpretation — build research brief
|
||||
$stepStart = microtime(true);
|
||||
$interpretation = $this->interpretSeed($seedDescription, $language);
|
||||
$this->stepTimings['interpretation'] = $this->elapsedMs($stepStart);
|
||||
$trace[] = $this->trace(
|
||||
'Query interpretation',
|
||||
$interpretation['detail'],
|
||||
'complete'
|
||||
);
|
||||
|
||||
// STEP 2: Query expansion
|
||||
$stepStart = microtime(true);
|
||||
$expansion = $this->expandQueries($seedDescription, $interpretation['brief'], $controls['sub_q_count'], $language);
|
||||
$this->stepTimings['expansion'] = $this->elapsedMs($stepStart);
|
||||
$subQuestions = $expansion['questions'];
|
||||
$expansionStatus = $expansion['fallback'] ? 'warning' : 'complete';
|
||||
$trace[] = $this->trace(
|
||||
'Query expansion',
|
||||
$expansion['fallback']
|
||||
? 'Could not parse sub-questions; falling back to retrieving on the seed query alone.'
|
||||
: sprintf('Generated %d sub-questions to research the corpus from multiple angles.', count($subQuestions)),
|
||||
$expansionStatus
|
||||
);
|
||||
|
||||
// STEP 3: Slice resolution
|
||||
$stepStart = microtime(true);
|
||||
$sliceSelectionNormalized = dbnV6NormalizeSliceSelection($sliceSelection);
|
||||
if (!array_filter($sliceSelectionNormalized)) {
|
||||
dbnToolsAbort('Enable at least one corpus slice before running deep research.', 422, 'no_slices');
|
||||
}
|
||||
$ragDb = dbnToolsRagDb();
|
||||
try {
|
||||
$sharedDocIds = dbnV6ResolveSelectedDocIds($ragDb, $sliceSelectionNormalized);
|
||||
$sliceStatus = 'complete';
|
||||
$sliceDetail = sprintf(
|
||||
'%d slice(s) active → %d candidate documents constrain the corpus search.',
|
||||
count(array_filter($sliceSelectionNormalized)),
|
||||
count($sharedDocIds)
|
||||
);
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN deep research slice resolve failed: ' . $e->getMessage());
|
||||
$sharedDocIds = [];
|
||||
$sliceStatus = 'warning';
|
||||
$sliceDetail = 'Slice resolution failed; corpus search will run unconstrained.';
|
||||
}
|
||||
$this->stepTimings['slice_resolution'] = $this->elapsedMs($stepStart);
|
||||
$trace[] = $this->trace('Slice resolution', $sliceDetail, $sliceStatus);
|
||||
|
||||
// STEP 4: Upload indexing (in-memory, ephemeral)
|
||||
$stepStart = microtime(true);
|
||||
$uploadChunks = [];
|
||||
foreach ($uploadedFiles as $idx => $file) {
|
||||
$filename = (string)($file['filename'] ?? sprintf('upload-%d', $idx + 1));
|
||||
$text = (string)($file['text'] ?? '');
|
||||
$uploadChunks = array_merge($uploadChunks, $this->splitIntoChunks($text, $filename, $idx));
|
||||
}
|
||||
$uploadStatus = 'complete';
|
||||
$uploadDetail = sprintf('%d upload file(s) → %d in-memory chunks indexed with nomic-embed-text.', count($uploadedFiles), count($uploadChunks));
|
||||
if ($uploadChunks) {
|
||||
try {
|
||||
$texts = array_map(fn(array $c) => $c['text'], $uploadChunks);
|
||||
$vecs = $this->ai->embedBatch($texts, 'nomic-embed-text');
|
||||
if (count($vecs) === count($uploadChunks)) {
|
||||
foreach ($uploadChunks as $i => $chunk) {
|
||||
$this->uploadVecs[] = [
|
||||
'meta' => $chunk,
|
||||
'vec' => $vecs[$i],
|
||||
];
|
||||
}
|
||||
} else {
|
||||
$uploadStatus = 'warning';
|
||||
$uploadDetail = 'Upload embedding returned an unexpected count; uploaded chunks will not participate in retrieval.';
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN deep research upload embed failed: ' . $e->getMessage());
|
||||
$uploadStatus = 'warning';
|
||||
$uploadDetail = 'Upload embedding gateway unreachable; uploaded chunks will not participate in retrieval.';
|
||||
$this->uploadVecs = [];
|
||||
}
|
||||
} elseif (empty($uploadedFiles)) {
|
||||
$uploadDetail = 'No files uploaded; agent will research the corpus only.';
|
||||
}
|
||||
$this->stepTimings['upload_indexing'] = $this->elapsedMs($stepStart);
|
||||
$trace[] = $this->trace('Upload indexing', $uploadDetail, $uploadStatus);
|
||||
|
||||
// STEP 5: Retrieval (per sub-question)
|
||||
$stepStart = microtime(true);
|
||||
$retrievalQueries = $subQuestions ?: [[
|
||||
'id' => 'q1',
|
||||
'question' => $seedQuery !== '' ? $seedQuery : ($interpretation['brief'] ?: 'legal research'),
|
||||
'rationale' => 'Seed query (no sub-question expansion).',
|
||||
]];
|
||||
|
||||
try {
|
||||
$rag = new ClientRagPipeline((int)$client['id'], 'http://10.0.1.10:4000', 60);
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsAbort('Could not initialise the retrieval pipeline.', 503, 'rag_init_failed');
|
||||
}
|
||||
|
||||
$rawPool = [];
|
||||
$retrievalWarnings = 0;
|
||||
foreach ($retrievalQueries as $sq) {
|
||||
try {
|
||||
$corpusChunks = $rag->searchAll(
|
||||
$sq['question'],
|
||||
$controls['chunk_limit'],
|
||||
null,
|
||||
[
|
||||
'search_private' => false,
|
||||
'search_shared' => true,
|
||||
'package_ids' => [(int)$package['id']],
|
||||
'shared_doc_ids' => $sharedDocIds,
|
||||
'chunk_limit' => $controls['chunk_limit'],
|
||||
'search_method' => 'hybrid',
|
||||
'reranker_enabled' => true,
|
||||
]
|
||||
);
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN deep research sub-Q retrieval failed: ' . $e->getMessage());
|
||||
$corpusChunks = [];
|
||||
$retrievalWarnings++;
|
||||
}
|
||||
foreach ($corpusChunks as $chunk) {
|
||||
$rawPool[] = $this->normalizeCorpusChunk($chunk, $sq['id']);
|
||||
}
|
||||
|
||||
// Upload chunk retrieval via cosine sim
|
||||
if (!empty($this->uploadVecs)) {
|
||||
$uploadHits = $this->retrieveFromUploads($sq['question'], $controls['chunk_limit'], $controls['similarity_threshold']);
|
||||
foreach ($uploadHits as $hit) {
|
||||
$hit['matched_sub_questions'] = [$sq['id']];
|
||||
$rawPool[] = $hit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$merged = $this->mergeAndDedupe($rawPool, self::POOL_CAP);
|
||||
$this->stepTimings['retrieval'] = $this->elapsedMs($stepStart);
|
||||
$retrievalStatus = $retrievalWarnings > 0 ? 'warning' : 'complete';
|
||||
$trace[] = $this->trace(
|
||||
'Retrieval',
|
||||
sprintf(
|
||||
'%d sub-question(s) × hybrid + RRF + rerank → %d raw chunks → %d unique after dedupe.',
|
||||
count($retrievalQueries),
|
||||
count($rawPool),
|
||||
count($merged)
|
||||
),
|
||||
$retrievalStatus
|
||||
);
|
||||
|
||||
// Cap pool to reranker top-K for synthesis
|
||||
$synthesisPool = array_slice($merged, 0, $controls['reranker_top_k']);
|
||||
$numberedSources = $this->numberSources($synthesisPool);
|
||||
|
||||
// STEP 6: Synthesis
|
||||
$stepStart = microtime(true);
|
||||
$synthesis = $this->synthesise(
|
||||
$seedDescription,
|
||||
$interpretation['brief'],
|
||||
$retrievalQueries,
|
||||
$numberedSources,
|
||||
$engine,
|
||||
$language,
|
||||
$controls['temperature']
|
||||
);
|
||||
$this->stepTimings['synthesis'] = $this->elapsedMs($stepStart);
|
||||
$trace[] = $this->trace(
|
||||
'Synthesis',
|
||||
sprintf('%s synthesised the brief using %d grounded source(s).', $synthesis['deploy_label'], count($numberedSources)),
|
||||
'complete'
|
||||
);
|
||||
|
||||
// STEP 7: Confidence
|
||||
$confidence = $this->citationConfidence($numberedSources);
|
||||
$trace[] = $this->trace(
|
||||
'Citation confidence',
|
||||
sprintf('%s confidence based on %d source(s) and reranker score distribution.', ucfirst($confidence), count($numberedSources)),
|
||||
$confidence === 'low' ? 'warning' : 'complete'
|
||||
);
|
||||
|
||||
// Stitch sub-question chunk_ids
|
||||
$subQOut = [];
|
||||
foreach ($retrievalQueries as $sq) {
|
||||
$matchedChunks = array_values(array_filter(
|
||||
$numberedSources,
|
||||
fn(array $s) => in_array($sq['id'], $s['matched_sub_questions'] ?? [], true)
|
||||
));
|
||||
$subQOut[] = [
|
||||
'id' => $sq['id'],
|
||||
'question' => $sq['question'],
|
||||
'rationale' => $sq['rationale'] ?? '',
|
||||
'chunk_ids' => array_values(array_map(fn(array $s) => $s['chunk_id'], $matchedChunks)),
|
||||
];
|
||||
}
|
||||
|
||||
return [
|
||||
'tool' => 'deep_research',
|
||||
'language' => $language,
|
||||
'brief_markdown' => (string)($synthesis['json']['brief_markdown'] ?? $synthesis['json']['answer'] ?? ''),
|
||||
'sub_questions' => $subQOut,
|
||||
'sources' => $numberedSources,
|
||||
'what_we_found' => (string)($synthesis['json']['what_we_found'] ?? ''),
|
||||
'evidence_trail' => $numberedSources,
|
||||
'what_remains_uncertain' => $synthesis['json']['what_remains_uncertain'] ?? [],
|
||||
'next_practical_step' => (string)($synthesis['json']['next_practical_step'] ?? ''),
|
||||
'trace' => $trace,
|
||||
'trace_metadata' => [
|
||||
'chunk_count' => count($merged),
|
||||
'source_count' => count($numberedSources),
|
||||
'sub_question_count' => count($retrievalQueries),
|
||||
'upload_chunk_count' => count($this->uploadVecs),
|
||||
'deployment' => $synthesis['deploy_label'],
|
||||
'engine_used' => $engine,
|
||||
'citation_confidence' => $confidence,
|
||||
'elapsed_ms_per_step' => $this->stepTimings,
|
||||
'slices_active' => array_keys(array_filter($sliceSelectionNormalized)),
|
||||
],
|
||||
'disclaimer' => dbnToolsDisclaimer($language),
|
||||
];
|
||||
}
|
||||
|
||||
private function normalizeControls(array $controls): array
|
||||
{
|
||||
return [
|
||||
'sub_q_count' => max(3, min(5, (int)($controls['sub_q_count'] ?? 4))),
|
||||
'chunk_limit' => max(4, min(10, (int)($controls['chunk_limit'] ?? 6))),
|
||||
'similarity_threshold' => max(0.2, min(0.6, (float)($controls['similarity_threshold'] ?? 0.30))),
|
||||
'reranker_top_k' => max(8, min(14, (int)($controls['reranker_top_k'] ?? 12))),
|
||||
'temperature' => max(0.05, min(0.4, (float)($controls['temperature'] ?? 0.15))),
|
||||
];
|
||||
}
|
||||
|
||||
private function requireFamilyPackage(int $clientId): array
|
||||
{
|
||||
$package = dbnToolsFetchPackage('family-legal');
|
||||
if (!$package || empty($package['is_active'])) {
|
||||
dbnToolsAbort('The family-legal corpus package is not active.', 503, 'package_unavailable');
|
||||
}
|
||||
if (!dbnToolsHasActiveSubscription($clientId, (int)$package['id'])) {
|
||||
dbnToolsAbort('Do Better Norge does not have an active family-legal subscription.', 503, 'subscription_missing');
|
||||
}
|
||||
return $package;
|
||||
}
|
||||
|
||||
private function buildSeedDescription(string $seedQuery, string $pastedText, array $uploadedFiles): string
|
||||
{
|
||||
$parts = [];
|
||||
if ($seedQuery !== '') {
|
||||
$parts[] = "Question:\n" . mb_substr($seedQuery, 0, self::MAX_SEED_CHARS, 'UTF-8');
|
||||
}
|
||||
if ($pastedText !== '') {
|
||||
$parts[] = "Pasted text:\n" . mb_substr($pastedText, 0, self::MAX_SEED_CHARS, 'UTF-8');
|
||||
}
|
||||
foreach ($uploadedFiles as $idx => $file) {
|
||||
$filename = (string)($file['filename'] ?? sprintf('upload-%d', $idx + 1));
|
||||
$text = (string)($file['text'] ?? '');
|
||||
if ($text === '') {
|
||||
continue;
|
||||
}
|
||||
$parts[] = sprintf("Uploaded file [%s]:\n%s", $filename, mb_substr($text, 0, self::MAX_UPLOAD_CHARS, 'UTF-8'));
|
||||
}
|
||||
return implode("\n\n", $parts);
|
||||
}
|
||||
|
||||
private function interpretSeed(string $seedDescription, string $language): array
|
||||
{
|
||||
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
||||
$prompt = <<<PROMPT
|
||||
You are reviewing the input below to set up a deep legal research pass against the Do Better Norge family-law corpus.
|
||||
|
||||
Input:
|
||||
{$seedDescription}
|
||||
|
||||
In {$locale}, produce JSON with:
|
||||
{
|
||||
"brief": "1-3 sentence description of what the user is trying to research (≤ 220 chars)",
|
||||
"key_signals": ["short keywords or terms that should drive retrieval"]
|
||||
}
|
||||
PROMPT;
|
||||
|
||||
try {
|
||||
$raw = $this->azure->chatText([
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
], ['json' => true, 'temperature' => 0.1, 'max_tokens' => 400, 'timeout' => 30]);
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
if (is_array($json) && !empty($json['brief'])) {
|
||||
$signals = $json['key_signals'] ?? [];
|
||||
$signalText = is_array($signals) ? implode(', ', array_slice($signals, 0, 6)) : '';
|
||||
return [
|
||||
'brief' => (string)$json['brief'],
|
||||
'detail' => sprintf('Research focus: %s%s', (string)$json['brief'], $signalText ? ' — signals: ' . $signalText : ''),
|
||||
];
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN deep research interpretation failed: ' . $e->getMessage());
|
||||
}
|
||||
|
||||
return [
|
||||
'brief' => '',
|
||||
'detail' => 'Interpretation step skipped — proceeding with raw seed input.',
|
||||
];
|
||||
}
|
||||
|
||||
private function expandQueries(string $seedDescription, string $brief, int $targetCount, string $language): array
|
||||
{
|
||||
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
||||
$prompt = <<<PROMPT
|
||||
You are decomposing a Do Better Norge legal-research request into {$targetCount} focused sub-questions that should each be answered by the legal corpus (Norwegian family law, child welfare, ECHR/Hague).
|
||||
|
||||
Research brief:
|
||||
{$brief}
|
||||
|
||||
Raw input:
|
||||
{$seedDescription}
|
||||
|
||||
Return JSON only:
|
||||
{
|
||||
"sub_questions": [
|
||||
{"id":"q1","question":"... ({$locale})","rationale":"why this angle matters (≤ 140 chars)"}
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Exactly {$targetCount} sub-questions, no more, no fewer.
|
||||
- Each sub-question must be answerable with Norwegian family-law, child-welfare, or ECHR sources.
|
||||
- Each sub-question must explore a DIFFERENT angle (statute interpretation, procedural fairness, ECHR case law, evidence/factual frame, comparative authority).
|
||||
- Sub-questions must be self-contained — readable without seeing the seed text.
|
||||
- Write the questions in {$locale}.
|
||||
PROMPT;
|
||||
|
||||
try {
|
||||
$raw = $this->azure->chatText([
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
], ['json' => true, 'temperature' => 0.2, 'max_tokens' => 700, 'timeout' => 35]);
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
$items = is_array($json['sub_questions'] ?? null) ? $json['sub_questions'] : [];
|
||||
$normalized = [];
|
||||
foreach ($items as $i => $item) {
|
||||
if (!is_array($item) || empty($item['question'])) {
|
||||
continue;
|
||||
}
|
||||
$normalized[] = [
|
||||
'id' => 'q' . ($i + 1),
|
||||
'question' => trim((string)$item['question']),
|
||||
'rationale' => trim((string)($item['rationale'] ?? '')),
|
||||
];
|
||||
if (count($normalized) >= $targetCount) break;
|
||||
}
|
||||
if (count($normalized) >= 2) {
|
||||
return ['questions' => $normalized, 'fallback' => false];
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN deep research expansion failed: ' . $e->getMessage());
|
||||
}
|
||||
|
||||
return ['questions' => [], 'fallback' => true];
|
||||
}
|
||||
|
||||
private function splitIntoChunks(string $text, string $filename, int $fileIdx): array
|
||||
{
|
||||
$text = preg_replace('/\s+/u', ' ', trim($text)) ?? '';
|
||||
if ($text === '') {
|
||||
return [];
|
||||
}
|
||||
$words = preg_split('/\s+/u', $text, -1, PREG_SPLIT_NO_EMPTY) ?: [];
|
||||
if (!$words) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$chunks = [];
|
||||
$i = 0;
|
||||
$chunkIdx = 0;
|
||||
$total = count($words);
|
||||
while ($i < $total) {
|
||||
$slice = array_slice($words, $i, self::CHUNK_WORDS);
|
||||
if (count($slice) >= self::MIN_CHUNK_WORDS || $i === 0) {
|
||||
$chunks[] = [
|
||||
'chunk_id' => sprintf('upload:%d:%d', $fileIdx, $chunkIdx),
|
||||
'file_index' => $fileIdx,
|
||||
'chunk_index'=> $chunkIdx,
|
||||
'filename' => $filename,
|
||||
'text' => implode(' ', $slice),
|
||||
];
|
||||
$chunkIdx++;
|
||||
}
|
||||
$advance = self::CHUNK_WORDS - self::CHUNK_OVERLAP_WORDS;
|
||||
if ($advance < 1) $advance = 1;
|
||||
$i += $advance;
|
||||
if (count($slice) < self::CHUNK_WORDS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return $chunks;
|
||||
}
|
||||
|
||||
private function retrieveFromUploads(string $question, int $limitPerSubQ, float $threshold): array
|
||||
{
|
||||
if (empty($this->uploadVecs)) {
|
||||
return [];
|
||||
}
|
||||
try {
|
||||
$qVec = $this->ai->embed($question, 'nomic-embed-text');
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN deep research sub-Q embed failed: ' . $e->getMessage());
|
||||
return [];
|
||||
}
|
||||
if (empty($qVec)) {
|
||||
return [];
|
||||
}
|
||||
$scored = [];
|
||||
foreach ($this->uploadVecs as $entry) {
|
||||
$sim = $this->cosineSim($qVec, $entry['vec']);
|
||||
if ($sim < $threshold) {
|
||||
continue;
|
||||
}
|
||||
$scored[] = [
|
||||
'chunk_id' => $entry['meta']['chunk_id'],
|
||||
'title' => 'uploaded: ' . $entry['meta']['filename'],
|
||||
'section' => null,
|
||||
'package_or_corpus' => 'Your upload',
|
||||
'excerpt' => dbnToolsExcerpt($entry['meta']['text'], 620),
|
||||
'chunk_text' => $entry['meta']['text'],
|
||||
'similarity' => round($sim, 4),
|
||||
'reranker_score' => null,
|
||||
'document_id' => null,
|
||||
'source_origin' => 'upload',
|
||||
'authority_type' => null,
|
||||
'jurisdiction' => null,
|
||||
];
|
||||
}
|
||||
usort($scored, fn(array $a, array $b) => ($b['similarity'] <=> $a['similarity']));
|
||||
$keep = (int)ceil($limitPerSubQ / 2);
|
||||
return array_slice($scored, 0, max(1, $keep));
|
||||
}
|
||||
|
||||
private function cosineSim(array $a, array $b): float
|
||||
{
|
||||
$len = min(count($a), count($b));
|
||||
if ($len === 0) return 0.0;
|
||||
$dot = 0.0;
|
||||
$na = 0.0;
|
||||
$nb = 0.0;
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
$x = (float)$a[$i];
|
||||
$y = (float)$b[$i];
|
||||
$dot += $x * $y;
|
||||
$na += $x * $x;
|
||||
$nb += $y * $y;
|
||||
}
|
||||
if ($na === 0.0 || $nb === 0.0) return 0.0;
|
||||
return $dot / (sqrt($na) * sqrt($nb));
|
||||
}
|
||||
|
||||
private function normalizeCorpusChunk(array $chunk, string $subQId): array
|
||||
{
|
||||
$similarity = isset($chunk['similarity']) ? round((float)$chunk['similarity'], 4) : null;
|
||||
$rerankerScore = isset($chunk['reranker_score']) ? round((float)$chunk['reranker_score'], 4) : null;
|
||||
return [
|
||||
'chunk_id' => isset($chunk['id']) ? (int)$chunk['id'] : null,
|
||||
'title' => (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source'),
|
||||
'section' => $chunk['section_title'] ?? null,
|
||||
'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Norge'),
|
||||
'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620),
|
||||
'chunk_text' => (string)($chunk['content'] ?? ''),
|
||||
'similarity' => $similarity,
|
||||
'reranker_score' => $rerankerScore,
|
||||
'document_id' => isset($chunk['document_id']) ? (int)$chunk['document_id'] : null,
|
||||
'source_origin' => 'corpus',
|
||||
'authority_type' => $chunk['authority_type'] ?? null,
|
||||
'jurisdiction' => $chunk['jurisdiction'] ?? null,
|
||||
'matched_sub_questions' => [$subQId],
|
||||
];
|
||||
}
|
||||
|
||||
private function mergeAndDedupe(array $rawPool, int $cap): array
|
||||
{
|
||||
$byKey = [];
|
||||
foreach ($rawPool as $chunk) {
|
||||
$key = ($chunk['source_origin'] ?? 'corpus') . ':' . ($chunk['chunk_id'] ?? bin2hex(random_bytes(4)));
|
||||
if (!isset($byKey[$key])) {
|
||||
$byKey[$key] = $chunk;
|
||||
continue;
|
||||
}
|
||||
$existing = $byKey[$key];
|
||||
$existing['matched_sub_questions'] = array_values(array_unique(array_merge(
|
||||
$existing['matched_sub_questions'] ?? [],
|
||||
$chunk['matched_sub_questions'] ?? []
|
||||
)));
|
||||
// Keep the higher similarity score
|
||||
if (($chunk['similarity'] ?? 0) > ($existing['similarity'] ?? 0)) {
|
||||
$existing['similarity'] = $chunk['similarity'];
|
||||
}
|
||||
if (($chunk['reranker_score'] ?? 0) > ($existing['reranker_score'] ?? 0)) {
|
||||
$existing['reranker_score'] = $chunk['reranker_score'];
|
||||
}
|
||||
$byKey[$key] = $existing;
|
||||
}
|
||||
$merged = array_values($byKey);
|
||||
usort($merged, function (array $a, array $b): int {
|
||||
$aScore = $a['reranker_score'] ?? $a['similarity'] ?? 0;
|
||||
$bScore = $b['reranker_score'] ?? $b['similarity'] ?? 0;
|
||||
return $bScore <=> $aScore;
|
||||
});
|
||||
return array_slice($merged, 0, $cap);
|
||||
}
|
||||
|
||||
private function numberSources(array $chunks): array
|
||||
{
|
||||
$out = [];
|
||||
foreach ($chunks as $i => $c) {
|
||||
$c['n'] = $i + 1;
|
||||
$out[] = $c;
|
||||
}
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function synthesise(
|
||||
string $seedDescription,
|
||||
string $brief,
|
||||
array $subQuestions,
|
||||
array $numberedSources,
|
||||
string $engine,
|
||||
string $language,
|
||||
float $temperature
|
||||
): array {
|
||||
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
||||
|
||||
if (empty($numberedSources)) {
|
||||
return [
|
||||
'json' => [
|
||||
'brief_markdown' => $language === 'no'
|
||||
? 'Jeg fant ikke tilstrekkelig kildestøtte i korpuset til å gi et grunnlagsbasert svar.'
|
||||
: 'I did not find enough source support in the corpus to give a grounded answer.',
|
||||
'what_we_found' => 'No retrieved sources passed the similarity threshold.',
|
||||
'what_remains_uncertain' => ['No corpus evidence retrieved for the given query and slice selection.'],
|
||||
'next_practical_step' => 'Try widening slice selection or rephrasing with more specific statutory or party terms.',
|
||||
],
|
||||
'deploy_label' => $engine === 'gpu' ? 'GPU (cuttlefish)' : ($engine === 'azure_full' ? 'gpt-4o' : $this->azure->chatDeployment()),
|
||||
];
|
||||
}
|
||||
|
||||
$sourcesContext = [];
|
||||
foreach ($numberedSources as $s) {
|
||||
$sourcesContext[] = sprintf(
|
||||
"[%d] (%s) %s%s\n Corpus: %s\n Excerpt: %s",
|
||||
$s['n'],
|
||||
$s['source_origin'] === 'upload' ? 'uploaded doc' : 'corpus',
|
||||
$s['title'],
|
||||
!empty($s['section']) ? ' — ' . $s['section'] : '',
|
||||
$s['package_or_corpus'],
|
||||
$s['excerpt']
|
||||
);
|
||||
}
|
||||
$sourcesText = implode("\n\n", $sourcesContext);
|
||||
|
||||
$subQText = '';
|
||||
if ($subQuestions) {
|
||||
$lines = array_map(
|
||||
fn(array $sq, int $i): string => sprintf('%d. (%s) %s', $i + 1, $sq['id'], $sq['question']),
|
||||
$subQuestions,
|
||||
array_keys($subQuestions)
|
||||
);
|
||||
$subQText = "\nSub-questions explored:\n" . implode("\n", $lines);
|
||||
}
|
||||
|
||||
$prompt = <<<PROMPT
|
||||
You are Do Better Norge Legal Tools running a deep-research synthesis. You MUST ground every claim in the numbered sources below, using inline `[n]` citation markers that map to the source list. Do NOT cite a source you did not use. Do NOT invent statutes, paragraph numbers, case names, dates, or parties.
|
||||
|
||||
User input:
|
||||
{$seedDescription}
|
||||
|
||||
Research brief:
|
||||
{$brief}
|
||||
{$subQText}
|
||||
|
||||
Sources (numbered):
|
||||
{$sourcesText}
|
||||
|
||||
Return JSON only in {$locale}:
|
||||
{
|
||||
"brief_markdown": "Markdown legal brief, 250-700 words, with inline [n] citation markers keyed to the sources above. Use short paragraphs. End with a one-line caveat. Do NOT include headings above level 3 (###).",
|
||||
"what_we_found": "1-2 sentence plain-language summary of the grounded finding",
|
||||
"what_remains_uncertain": ["gaps or caveats — what the corpus did not cover or where confidence is limited"],
|
||||
"next_practical_step": "one concrete next action the user can take"
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Every factual claim in `brief_markdown` must end with one or more `[n]` markers.
|
||||
- If no source supports a point, omit the point.
|
||||
- Respond in {$locale}.
|
||||
- Output valid JSON only — no markdown fences around the JSON.
|
||||
PROMPT;
|
||||
|
||||
$messages = [
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
];
|
||||
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 2200, 'timeout' => 120];
|
||||
|
||||
try {
|
||||
if ($engine === 'gpu') {
|
||||
$response = dbnToolsCallGpuLlm($messages, $opts);
|
||||
$deployLabel = 'GPU (cuttlefish)';
|
||||
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
||||
} elseif ($engine === 'azure_full') {
|
||||
$raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts);
|
||||
$deployLabel = 'gpt-4o';
|
||||
} else {
|
||||
$raw = $this->azure->chatText($messages, $opts);
|
||||
$deployLabel = $this->azure->chatDeployment();
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsAbort('Synthesis LLM request failed: ' . $e->getMessage(), 502, 'llm_error');
|
||||
}
|
||||
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
if (!is_array($json) || empty($json['brief_markdown'])) {
|
||||
// Salvage as plain markdown
|
||||
$json = [
|
||||
'brief_markdown' => $raw,
|
||||
'what_we_found' => 'Synthesis returned non-structured output; rendered as raw markdown.',
|
||||
'what_remains_uncertain' => ['Response format could not be validated as structured JSON.'],
|
||||
'next_practical_step' => 'Review the brief manually before relying on it.',
|
||||
];
|
||||
}
|
||||
|
||||
return [
|
||||
'json' => $json,
|
||||
'deploy_label' => $deployLabel,
|
||||
];
|
||||
}
|
||||
|
||||
private function citationConfidence(array $sources): string
|
||||
{
|
||||
if (!$sources) {
|
||||
return 'low';
|
||||
}
|
||||
$scores = array_values(array_filter(array_map(
|
||||
fn(array $s) => $s['reranker_score'] ?? $s['similarity'] ?? null,
|
||||
$sources
|
||||
), 'is_numeric'));
|
||||
$best = $scores ? max($scores) : 0;
|
||||
if (count($sources) >= 6 && $best >= 0.5) {
|
||||
return 'high';
|
||||
}
|
||||
if (count($sources) >= 3 && $best >= 0.35) {
|
||||
return 'medium';
|
||||
}
|
||||
return 'low';
|
||||
}
|
||||
|
||||
private function trace(string $label, string $detail, string $status = 'complete'): array
|
||||
{
|
||||
return [
|
||||
'label' => $label,
|
||||
'detail' => $detail,
|
||||
'status' => $status,
|
||||
];
|
||||
}
|
||||
|
||||
private function elapsedMs(float $start): int
|
||||
{
|
||||
return (int)round((microtime(true) - $start) * 1000);
|
||||
}
|
||||
}
|
||||
@@ -487,3 +487,192 @@ function dbnToolsExcerpt(string $text, int $limit = 520): string
|
||||
}
|
||||
return rtrim(mb_substr($text, 0, $limit - 1, 'UTF-8')) . '…';
|
||||
}
|
||||
|
||||
const DBN_TOOLS_EXTRACT_MAX_BYTES = 4 * 1024 * 1024;
|
||||
const DBN_TOOLS_EXTRACT_TEXT_LIMIT = 128000;
|
||||
const DBN_TOOLS_EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx'];
|
||||
|
||||
function dbnToolsExtractUploadedFile(array $file): array
|
||||
{
|
||||
$errCode = (int)($file['error'] ?? UPLOAD_ERR_NO_FILE);
|
||||
if ($errCode !== UPLOAD_ERR_OK) {
|
||||
$msg = match ($errCode) {
|
||||
UPLOAD_ERR_INI_SIZE, UPLOAD_ERR_FORM_SIZE => 'The file exceeds the allowed size limit.',
|
||||
UPLOAD_ERR_NO_TMP_DIR => 'No temporary directory is available.',
|
||||
UPLOAD_ERR_CANT_WRITE => 'Unable to save the uploaded file.',
|
||||
default => 'File upload failed.',
|
||||
};
|
||||
dbnToolsAbort($msg, 422, 'upload_error');
|
||||
}
|
||||
|
||||
$originalName = basename((string)($file['name'] ?? ''));
|
||||
$tmpPath = (string)($file['tmp_name'] ?? '');
|
||||
$size = (int)($file['size'] ?? 0);
|
||||
|
||||
if (!is_uploaded_file($tmpPath)) {
|
||||
dbnToolsAbort('Invalid file upload.', 400, 'invalid_upload');
|
||||
}
|
||||
if ($size === 0) {
|
||||
dbnToolsAbort('The uploaded file is empty.', 422, 'file_empty');
|
||||
}
|
||||
if ($size > DBN_TOOLS_EXTRACT_MAX_BYTES) {
|
||||
dbnToolsAbort('File exceeds the 4 MB limit.', 413, 'file_too_large');
|
||||
}
|
||||
|
||||
$ext = strtolower(pathinfo($originalName, PATHINFO_EXTENSION));
|
||||
if (!in_array($ext, DBN_TOOLS_EXTRACT_ALLOWED_EXTS, true)) {
|
||||
dbnToolsAbort('Unsupported file type. Upload a .pdf, .docx, or .txt file.', 422, 'unsupported_type');
|
||||
}
|
||||
|
||||
$text = match ($ext) {
|
||||
'txt' => dbnToolsExtractTxt($tmpPath),
|
||||
'pdf' => dbnToolsExtractPdf($tmpPath),
|
||||
'docx' => dbnToolsExtractDocx($tmpPath),
|
||||
};
|
||||
|
||||
$text = trim($text);
|
||||
if ($text === '') {
|
||||
dbnToolsAbort('No text could be extracted from this file.', 422, 'no_text');
|
||||
}
|
||||
|
||||
$truncated = false;
|
||||
if (mb_strlen($text, 'UTF-8') > DBN_TOOLS_EXTRACT_TEXT_LIMIT) {
|
||||
$text = mb_substr($text, 0, DBN_TOOLS_EXTRACT_TEXT_LIMIT, 'UTF-8');
|
||||
$truncated = true;
|
||||
}
|
||||
|
||||
return [
|
||||
'ok' => true,
|
||||
'text' => $text,
|
||||
'filename' => $originalName,
|
||||
'chars' => mb_strlen($text, 'UTF-8'),
|
||||
'truncated' => $truncated,
|
||||
];
|
||||
}
|
||||
|
||||
function dbnToolsExtractTxt(string $path): string
|
||||
{
|
||||
$content = file_get_contents($path);
|
||||
if ($content === false) {
|
||||
throw new DbnToolsHttpException('Unable to read the file.', 500, 'read_error');
|
||||
}
|
||||
return mb_convert_encoding($content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252');
|
||||
}
|
||||
|
||||
function dbnToolsExtractPdf(string $path): string
|
||||
{
|
||||
$cmd = 'pdftotext ' . escapeshellarg($path) . ' - 2>/dev/null';
|
||||
$output = shell_exec($cmd);
|
||||
if ($output === null || $output === false || trim($output) === '') {
|
||||
throw new DbnToolsHttpException(
|
||||
'PDF text extraction failed. The file may be image-only or encrypted.',
|
||||
422,
|
||||
'pdf_extract_failed'
|
||||
);
|
||||
}
|
||||
return $output;
|
||||
}
|
||||
|
||||
function dbnToolsExtractDocx(string $path): string
|
||||
{
|
||||
$zip = new ZipArchive();
|
||||
$result = $zip->open($path);
|
||||
if ($result !== true) {
|
||||
throw new DbnToolsHttpException('Unable to open the .docx file.', 422, 'docx_open_failed');
|
||||
}
|
||||
|
||||
$xml = $zip->getFromName('word/document.xml');
|
||||
$zip->close();
|
||||
|
||||
if ($xml === false) {
|
||||
throw new DbnToolsHttpException('No document content found in this .docx file.', 422, 'docx_no_content');
|
||||
}
|
||||
|
||||
$doc = new DOMDocument();
|
||||
libxml_use_internal_errors(true);
|
||||
$doc->loadXML($xml);
|
||||
libxml_clear_errors();
|
||||
|
||||
$xpath = new DOMXPath($doc);
|
||||
$xpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
|
||||
|
||||
$paragraphs = [];
|
||||
foreach ($xpath->query('//w:p') as $para) {
|
||||
$runs = [];
|
||||
foreach ($xpath->query('.//w:t', $para) as $t) {
|
||||
$runs[] = $t->textContent;
|
||||
}
|
||||
$paragraphs[] = implode('', $runs);
|
||||
}
|
||||
|
||||
return implode("\n", $paragraphs);
|
||||
}
|
||||
|
||||
function dbnToolsCallGpuLlm(array $messages, array $options = []): array
|
||||
{
|
||||
$url = 'http://10.0.1.10:4000/v1/chat/completions';
|
||||
$apiKey = (string)(dbnToolsEnv('LITELLM_MASTER_KEY') ?: 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d');
|
||||
$model = (string)($options['model'] ?? 'qwen2.5:14b');
|
||||
$timeout = (int)($options['timeout'] ?? 90);
|
||||
|
||||
$payload = [
|
||||
'model' => $model,
|
||||
'messages' => $messages,
|
||||
'temperature' => $options['temperature'] ?? 0.1,
|
||||
'max_tokens' => $options['max_tokens'] ?? 8000,
|
||||
];
|
||||
if (!empty($options['json'])) {
|
||||
$payload['response_format'] = ['type' => 'json_object'];
|
||||
}
|
||||
|
||||
$body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
$headers = [
|
||||
'Content-Type: application/json',
|
||||
'Authorization: Bearer ' . $apiKey,
|
||||
];
|
||||
|
||||
if (function_exists('curl_init')) {
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_POST => true,
|
||||
CURLOPT_POSTFIELDS => $body,
|
||||
CURLOPT_HTTPHEADER => $headers,
|
||||
CURLOPT_TIMEOUT => $timeout,
|
||||
]);
|
||||
$response = curl_exec($ch);
|
||||
$code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
||||
$err = curl_error($ch);
|
||||
curl_close($ch);
|
||||
|
||||
if ($response === false) {
|
||||
throw new RuntimeException('GPU LiteLLM request failed: ' . $err);
|
||||
}
|
||||
} else {
|
||||
$ctx = stream_context_create(['http' => [
|
||||
'method' => 'POST',
|
||||
'header' => implode("\r\n", $headers),
|
||||
'content' => $body,
|
||||
'timeout' => $timeout,
|
||||
'ignore_errors' => true,
|
||||
]]);
|
||||
$response = @file_get_contents($url, false, $ctx);
|
||||
$code = 0;
|
||||
if (isset($http_response_header[0]) && preg_match('/\s(\d{3})\s/', $http_response_header[0], $m)) {
|
||||
$code = (int)$m[1];
|
||||
}
|
||||
if ($response === false) {
|
||||
throw new RuntimeException('GPU LiteLLM request failed.');
|
||||
}
|
||||
}
|
||||
|
||||
$decoded = json_decode($response, true);
|
||||
if (!is_array($decoded)) {
|
||||
throw new RuntimeException('GPU LiteLLM returned non-JSON response.');
|
||||
}
|
||||
if ($code < 200 || $code >= 300) {
|
||||
$msg = $decoded['error']['message'] ?? ('HTTP ' . $code);
|
||||
throw new RuntimeException('GPU LiteLLM error: ' . $msg);
|
||||
}
|
||||
return $decoded;
|
||||
}
|
||||
|
||||
+7
-6
@@ -9,12 +9,13 @@ if (!dbnToolsIsAuthenticated()) {
|
||||
}
|
||||
|
||||
$navItems = [
|
||||
'ask' => ['Ask', 'Source-grounded'],
|
||||
'search' => ['Search', 'Legal sources'],
|
||||
'summarize' => ['Summarize', 'Pasted text'],
|
||||
'timeline' => ['Timeline', 'Events'],
|
||||
'redact' => ['Redact', 'Privacy'],
|
||||
'transcribe' => ['Transcribe', 'Audio'],
|
||||
'ask' => ['Ask', 'Source-grounded'],
|
||||
'search' => ['Search', 'Legal sources'],
|
||||
'deep-research' => ['Deep research', 'Agent + RAG'],
|
||||
'summarize' => ['Summarize', 'Pasted text'],
|
||||
'timeline' => ['Timeline', 'Events'],
|
||||
'redact' => ['Redact', 'Privacy'],
|
||||
'transcribe' => ['Transcribe', 'Audio'],
|
||||
];
|
||||
$toolName = $toolName ?? 'ask';
|
||||
$toolTitle = $toolTitle ?? 'Legal Tools';
|
||||
|
||||
@@ -18,5 +18,8 @@
|
||||
</section><!-- /workspace -->
|
||||
</main><!-- /appShell -->
|
||||
<script src="assets/js/tools.js" defer></script>
|
||||
<?php if (!empty($extraScripts) && is_array($extraScripts)): foreach ($extraScripts as $extraScript): ?>
|
||||
<script src="<?= htmlspecialchars((string)$extraScript) ?>" defer></script>
|
||||
<?php endforeach; endif; ?>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -91,7 +91,7 @@ if (dbnToolsIsAuthenticated()) {
|
||||
|
||||
<section class="cap-section">
|
||||
<div class="section-inner">
|
||||
<h2 class="section-heading">Six tools, one suite</h2>
|
||||
<h2 class="section-heading">Seven tools, one suite</h2>
|
||||
<div class="cap-grid">
|
||||
<div class="cap-card">
|
||||
<span class="cap-label">Ask</span>
|
||||
@@ -103,6 +103,11 @@ if (dbnToolsIsAuthenticated()) {
|
||||
<h3>Search</h3>
|
||||
<p>Retrieve up to seven relevant legal sources with titles, sections, and excerpts.</p>
|
||||
</div>
|
||||
<div class="cap-card">
|
||||
<span class="cap-label">Deep research</span>
|
||||
<h3>Deep research</h3>
|
||||
<p>Upload a case file or paste a question. An agent expands it into 3–5 angles, runs hybrid rank/rerank RAG across the corpus + your upload, and returns a cited brief.</p>
|
||||
</div>
|
||||
<div class="cap-card">
|
||||
<span class="cap-label">Summarize</span>
|
||||
<h3>Summarize</h3>
|
||||
|
||||
Reference in New Issue
Block a user