4cbe0a4ac4
New surface at /deep-research.php where the user pastes a question or uploads PDF/DOCX/TXT case files and a LLM-orchestrated agent researches the Do Better Norge legal corpus from 3-5 angles, with hybrid retrieval, cross-encoder rerank, and synthesis that emits an inline-[n]-cited markdown brief plus a numbered sources panel. Uploaded documents are chunked + embedded in memory only (nomic-embed-text via LiteLLM) and searched alongside the shared corpus during the same request — never persisted to disk, DB, or Qdrant. Reuses ClientRagPipeline::searchAll (hybrid + rerank), dbnV6 slice helpers, and the existing extract.php text-extraction logic via a new dbnToolsExtractUploadedFile() helper. Also adds dbnToolsCallGpuLlm() helper in bootstrap.php — fixes a latent bug where LegalTools.php was already calling that name with no definition. Search.php is unchanged.
68 lines
2.5 KiB
PHP
68 lines
2.5 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
require_once __DIR__ . '/../includes/bootstrap.php';
|
|
require_once __DIR__ . '/../includes/DeepResearchAgent.php';
|
|
|
|
dbnToolsRequireMethod('POST');
|
|
dbnToolsRequireAuth();
|
|
|
|
$isMultipart = stripos((string)($_SERVER['CONTENT_TYPE'] ?? ''), 'multipart/form-data') !== false;
|
|
|
|
if ($isMultipart) {
|
|
$payloadRaw = (string)($_POST['payload'] ?? '');
|
|
if ($payloadRaw === '') {
|
|
dbnToolsError('Multipart request is missing the "payload" JSON field.', 422, 'missing_payload');
|
|
}
|
|
$input = json_decode($payloadRaw, true);
|
|
if (!is_array($input)) {
|
|
dbnToolsError('Multipart "payload" field must be valid JSON.', 422, 'invalid_payload_json');
|
|
}
|
|
} else {
|
|
$input = dbnToolsJsonInput(120000);
|
|
}
|
|
|
|
$language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
|
|
|
|
dbnToolsWithTelemetry('deep_research', $language, function () use ($input, $language) {
|
|
$seedQuery = dbnToolsString($input, 'query', 4000, false);
|
|
$pastedText = dbnToolsString($input, 'paste_text', 64000, false);
|
|
$sliceInput = $input['slices'] ?? null;
|
|
$engine = (string)($input['engine'] ?? 'azure_mini');
|
|
$controls = is_array($input['controls'] ?? null) ? $input['controls'] : [];
|
|
|
|
$uploadedFiles = [];
|
|
if (!empty($_FILES['files']) && is_array($_FILES['files']['tmp_name'] ?? null)) {
|
|
$count = count($_FILES['files']['tmp_name']);
|
|
if ($count > 5) {
|
|
dbnToolsAbort('At most 5 files can be uploaded per request.', 413, 'too_many_files');
|
|
}
|
|
for ($i = 0; $i < $count; $i++) {
|
|
$file = [
|
|
'name' => $_FILES['files']['name'][$i] ?? '',
|
|
'type' => $_FILES['files']['type'][$i] ?? '',
|
|
'tmp_name' => $_FILES['files']['tmp_name'][$i] ?? '',
|
|
'error' => $_FILES['files']['error'][$i] ?? UPLOAD_ERR_NO_FILE,
|
|
'size' => $_FILES['files']['size'][$i] ?? 0,
|
|
];
|
|
$extracted = dbnToolsExtractUploadedFile($file);
|
|
$uploadedFiles[] = [
|
|
'filename' => $extracted['filename'],
|
|
'text' => $extracted['text'],
|
|
'chars' => $extracted['chars'],
|
|
'truncated' => $extracted['truncated'],
|
|
];
|
|
}
|
|
}
|
|
|
|
return (new DbnDeepResearchAgent())->run(
|
|
$seedQuery,
|
|
$pastedText,
|
|
$uploadedFiles,
|
|
is_array($sliceInput) ? $sliceInput : [],
|
|
$engine,
|
|
$language,
|
|
$controls
|
|
);
|
|
});
|