feat(corpus): add save-to-corpus + private corpus search scope

- POST /api/save-to-corpus.php — saves tool output text to user's default CaveauAI corpus via ClientRagPipeline
- api/case/upload.php — dual-writes uploaded PDFs to CaveauAI client_documents (best-effort)
- assets/js/corpus-save.js — shared <dialog> handler for .js-save-corpus buttons on all tool pages
- includes/layout_footer.php — injects corpus-save.js + shared save dialog markup
- korrespond/deep-research/barnevernet/discrepancy JS — save-to-corpus buttons on output sections
- api/search.php + LegalTools::search() — corpus_scope param ('shared'|'private'|'both'), merges personal CaveauAI corpus with shared legal library when 'both'
- includes/tool_form.php + assets/js/tools.js — corpus scope radio toggle shown on search tab
- api/user-docs.php — add POST upload method for non-SSO authenticated users

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-22 17:50:32 +02:00
parent ed329f9d05
commit b014638f39
13 changed files with 465 additions and 33 deletions
+45 -5
View File
@@ -40,11 +40,51 @@ if (strncmp($head, '%PDF-', 5) !== 0) {
try {
$doc = CaseStore::registerUpload($userId, $name, $tmp, $size);
CaseStore::caseEnqueueIngest((int)$doc['doc_id'], $userId);
dbnToolsRespond([
'ok' => true,
'doc_id' => $doc['doc_id'],
'filename' => $doc['filename'],
]);
} catch (Throwable $e) {
dbnToolsError($e->getMessage(), 400, 'upload_failed');
}
// Dual-write to CaveauAI corpus (best-effort — never fails the upload)
$caveauDocId = null;
$clientId = (int)($_SESSION['dbn_tools_client_id'] ?? 0);
if ($clientId > 0 && !empty($doc['storage_path'])) {
try {
dbnToolsBootCaveau();
$aiPortalRoot = dbnToolsAiPortalRoot();
$textExtractFile = $aiPortalRoot . '/platform/includes/text_extract.php';
if (is_file($textExtractFile)) {
require_once $textExtractFile;
$content = extractPdfText($doc['storage_path']);
if ($content !== '' && strlen($content) > 30) {
$caveauDb = getDb();
$corpusSt = $caveauDb->prepare(
'SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1'
);
$corpusSt->execute([$clientId]);
$corpusId = (int)($corpusSt->fetchColumn() ?: 0);
if ($corpusId > 0) {
$title = pathinfo($doc['filename'], PATHINFO_FILENAME);
$caveauDb->prepare("
INSERT INTO client_documents
(client_id, corpus_id, title, source_type, content, category,
import_method, word_count, status)
VALUES (?, ?, ?, 'pdf', ?, 'user-upload', 'dbn_upload', ?, 'pending')
")->execute([$clientId, $corpusId, $title, $content, str_word_count($content)]);
$caveauDocId = (int)$caveauDb->lastInsertId();
$rag = new ClientRagPipeline($clientId);
$rag->ingestDocument($caveauDocId);
}
}
}
} catch (Throwable $e) {
// Non-fatal: log and continue
error_log('[upload] CaveauAI dual-write failed for doc ' . ($doc['doc_id'] ?? '?') . ': ' . $e->getMessage());
}
}
dbnToolsRespond([
'ok' => true,
'doc_id' => $doc['doc_id'],
'filename' => $doc['filename'],
'caveau_doc_id' => $caveauDocId,
]);
+95
View File
@@ -0,0 +1,95 @@
<?php
/**
* POST /api/save-to-corpus.php
*
* Save tool output text into the user's CaveauAI corpus.
* Uses dbnToolsBootCaveau() to call ClientRagPipeline directly via filesystem include.
*
* Request body (JSON, max 500 KB):
* title string (required)
* content string (required, min 30 chars)
* source_tool string (optional, slug)
* tags string (optional, comma-separated)
*/
declare(strict_types=1);
require_once dirname(__DIR__) . '/includes/bootstrap.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
$user = dbnToolsAuthenticatedUser();
$clientId = (int)($user['client_id'] ?? 0);
if ($clientId <= 0) {
dbnToolsError('No linked CaveauAI workspace. Log in via the CaveauAI portal first.', 403, 'no_workspace');
}
$input = dbnToolsJsonInput(500_000);
$title = trim($input['title'] ?? '');
$content = trim($input['content'] ?? '');
$sourceTool = substr(preg_replace('/[^a-z0-9\-_]/', '', strtolower($input['source_tool'] ?? '')), 0, 64) ?: null;
$rawTags = trim($input['tags'] ?? '');
$tags = json_encode(
array_values(array_filter(array_map('trim', explode(',', $rawTags)))),
JSON_UNESCAPED_UNICODE
);
if ($title === '') {
dbnToolsError('title is required.', 400, 'bad_request');
}
if (strlen($content) < 30) {
dbnToolsError('content too short (min 30 chars).', 400, 'bad_request');
}
if (strlen($content) > 2_000_000) {
dbnToolsError('content exceeds 2 MB limit.', 400, 'too_large');
}
// Load CaveauAI platform (getDb, ClientRagPipeline, etc.)
dbnToolsBootCaveau();
try {
$db = getDb();
} catch (Throwable $e) {
dbnToolsError('CaveauAI database unavailable: ' . $e->getMessage(), 503, 'db_unavailable');
}
// Resolve default corpus for this client
$stmt = $db->prepare('SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1');
$stmt->execute([$clientId]);
$corpusId = (int)($stmt->fetchColumn() ?: 0);
if ($corpusId === 0) {
dbnToolsError(
'No default corpus found for your account. Set one up in the CaveauAI portal.',
409,
'no_corpus'
);
}
$wordCount = str_word_count($content);
$ins = $db->prepare("
INSERT INTO client_documents
(client_id, corpus_id, title, source_type, content, category,
tags, import_method, source_tool, word_count, status)
VALUES (?, ?, ?, 'text', ?, 'tool-output', ?, 'tool_output', ?, ?, 'pending')
");
$ins->execute([$clientId, $corpusId, $title, $content, $tags, $sourceTool, $wordCount]);
$docId = (int)$db->lastInsertId();
try {
$rag = new ClientRagPipeline($clientId);
$chunks = $rag->ingestDocument($docId);
} catch (Throwable $e) {
// Document is saved but not indexed — mark error and return partial success
$db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?")
->execute([$e->getMessage(), $docId]);
dbnToolsError(
'Saved to corpus but indexing failed: ' . $e->getMessage(),
500,
'index_failed',
['document_id' => $docId]
);
}
dbnToolsRespond(['ok' => true, 'document_id' => $docId, 'chunks' => $chunks], 201);
+4 -1
View File
@@ -17,5 +17,8 @@ dbnToolsWithTelemetry('search', $language, function () use ($input, $language):
$asOfDate = isset($input['as_of_date']) && preg_match('/^\d{4}(-\d{2}(-\d{2})?)?$/', $input['as_of_date'])
? $input['as_of_date']
: null;
return (new DbnLegalToolsService())->search($query, $language, $limit, $temporalMode, $asOfDate);
$scope = in_array($input['corpus_scope'] ?? '', ['shared', 'private', 'both'], true)
? $input['corpus_scope']
: 'both';
return (new DbnLegalToolsService())->search($query, $language, $limit, $temporalMode, $asOfDate, $scope);
});
+56 -14
View File
@@ -2,16 +2,18 @@
declare(strict_types=1);
/**
* GET /api/user-docs.php — list SSO user's uploaded documents
* GET /api/user-docs.php — list uploaded documents for current user
* DELETE /api/user-docs.php?id=X — remove a document
* POST /api/user-docs.php — upload a document (file field = 'file')
*
* Only available for SSO users (dbn_tools_sso_uid set in session).
* Reads from the shared dobetternorge.dbn_user_docs table, keyed by sso_uid.
* SSO users (dbn_tools_sso_uid) are keyed by their SSO uid.
* Other authenticated users are keyed by session_id() as a fallback.
* Reads/writes the shared dobetternorge.dbn_user_docs table.
* Requires DBN_DB_* env vars pointing at the dobetternorge database.
*/
require_once __DIR__ . '/../includes/bootstrap.php';
dbnToolsRequireMethod('GET', 'DELETE');
dbnToolsRequireMethod('GET', 'DELETE', 'POST');
if (!dbnToolsIsAuthenticated()) {
http_response_code(401);
@@ -20,13 +22,9 @@ if (!dbnToolsIsAuthenticated()) {
exit;
}
// Only SSO users have shared docs
// SSO uid for SSO users; session id as stable key for client sessions
$ssoUid = (string)($_SESSION['dbn_tools_sso_uid'] ?? '');
if ($ssoUid === '') {
header('Content-Type: application/json');
echo json_encode(['ok' => true, 'docs' => [], 'reason' => 'sso_only']);
exit;
}
$userKey = $ssoUid !== '' ? $ssoUid : 'sess_' . session_id();
header('Content-Type: application/json; charset=utf-8');
@@ -51,6 +49,50 @@ function dbnSharedDb(): ?PDO
$method = $_SERVER['REQUEST_METHOD'];
// ── POST — upload a document ──────────────────────────────────────────────────
if ($method === 'POST') {
if (empty($_FILES['file']) || !is_array($_FILES['file'])) {
http_response_code(422);
echo json_encode(['ok' => false, 'error' => 'No file uploaded.']);
exit;
}
try {
$extracted = dbnToolsExtractUploadedFile($_FILES['file']);
} catch (Throwable $e) {
http_response_code(422);
echo json_encode(['ok' => false, 'error' => $e->getMessage()]);
exit;
}
$docId = uniqid('wbd_', true);
$filename = basename((string)($_FILES['file']['name'] ?? 'document'));
$fileType = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
$chunks = isset($extracted['text']) ? max(1, (int)ceil(mb_strlen($extracted['text']) / 1000)) : 0;
$now = gmdate('Y-m-d H:i:s');
$db = dbnSharedDb();
if ($db) {
$db->prepare(
'INSERT INTO dbn_user_docs (id, user_id, filename, file_type, chunk_count, source, status, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)'
)->execute([$docId, $userKey, $filename, $fileType, $chunks, 'workbench', 'ready', $now]);
}
echo json_encode([
'ok' => true,
'doc' => [
'doc_id' => $docId,
'filename' => $filename,
'file_type' => $fileType,
'chunk_count' => $chunks,
'source' => 'workbench',
'created_at' => $now,
],
]);
exit;
}
// ── DELETE ────────────────────────────────────────────────────────────────────
if ($method === 'DELETE') {
$docId = trim($_GET['id'] ?? '');
@@ -63,10 +105,10 @@ if ($method === 'DELETE') {
$db = dbnSharedDb();
if ($db) {
$stmt = $db->prepare('SELECT id FROM dbn_user_docs WHERE id = ? AND user_id = ?');
$stmt->execute([$docId, $ssoUid]);
$stmt->execute([$docId, $userKey]);
if ($stmt->fetch()) {
$db->prepare('DELETE FROM dbn_user_docs WHERE id = ? AND user_id = ?')
->execute([$docId, $ssoUid]);
->execute([$docId, $userKey]);
// Delete Qdrant points for this doc
$qdrantUrl = 'http://10.0.2.10:6333';
@@ -74,7 +116,7 @@ if ($method === 'DELETE') {
'filter' => [
'must' => [
['key' => 'doc_id', 'match' => ['value' => $docId]],
['key' => 'user_id', 'match' => ['value' => $ssoUid]],
['key' => 'user_id', 'match' => ['value' => $userKey]],
],
],
];
@@ -108,7 +150,7 @@ $stmt = $db->prepare(
ORDER BY created_at DESC
LIMIT 50'
);
$stmt->execute([$ssoUid, 'ready']);
$stmt->execute([$userKey, 'ready']);
$rows = $stmt->fetchAll();
$docs = array_map(static fn($r) => [