feat(corpus): add save-to-corpus + private corpus search scope
- POST /api/save-to-corpus.php — saves tool output text to user's default CaveauAI corpus via ClientRagPipeline
- api/case/upload.php — dual-writes uploaded PDFs to CaveauAI client_documents (best-effort)
- assets/js/corpus-save.js — shared <dialog> handler for .js-save-corpus buttons on all tool pages
- includes/layout_footer.php — injects corpus-save.js + shared save dialog markup
- korrespond/deep-research/barnevernet/discrepancy JS — save-to-corpus buttons on output sections
- api/search.php + LegalTools::search() — corpus_scope param ('shared'|'private'|'both'), merges personal CaveauAI corpus with shared legal library when 'both'
- includes/tool_form.php + assets/js/tools.js — corpus scope radio toggle shown on search tab
- api/user-docs.php — add POST upload method for non-SSO authenticated users
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+45
-5
@@ -40,11 +40,51 @@ if (strncmp($head, '%PDF-', 5) !== 0) {
|
||||
try {
|
||||
$doc = CaseStore::registerUpload($userId, $name, $tmp, $size);
|
||||
CaseStore::caseEnqueueIngest((int)$doc['doc_id'], $userId);
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'doc_id' => $doc['doc_id'],
|
||||
'filename' => $doc['filename'],
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsError($e->getMessage(), 400, 'upload_failed');
|
||||
}
|
||||
|
||||
// Dual-write to CaveauAI corpus (best-effort — never fails the upload)
|
||||
$caveauDocId = null;
|
||||
$clientId = (int)($_SESSION['dbn_tools_client_id'] ?? 0);
|
||||
if ($clientId > 0 && !empty($doc['storage_path'])) {
|
||||
try {
|
||||
dbnToolsBootCaveau();
|
||||
$aiPortalRoot = dbnToolsAiPortalRoot();
|
||||
$textExtractFile = $aiPortalRoot . '/platform/includes/text_extract.php';
|
||||
if (is_file($textExtractFile)) {
|
||||
require_once $textExtractFile;
|
||||
$content = extractPdfText($doc['storage_path']);
|
||||
if ($content !== '' && strlen($content) > 30) {
|
||||
$caveauDb = getDb();
|
||||
$corpusSt = $caveauDb->prepare(
|
||||
'SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1'
|
||||
);
|
||||
$corpusSt->execute([$clientId]);
|
||||
$corpusId = (int)($corpusSt->fetchColumn() ?: 0);
|
||||
if ($corpusId > 0) {
|
||||
$title = pathinfo($doc['filename'], PATHINFO_FILENAME);
|
||||
$caveauDb->prepare("
|
||||
INSERT INTO client_documents
|
||||
(client_id, corpus_id, title, source_type, content, category,
|
||||
import_method, word_count, status)
|
||||
VALUES (?, ?, ?, 'pdf', ?, 'user-upload', 'dbn_upload', ?, 'pending')
|
||||
")->execute([$clientId, $corpusId, $title, $content, str_word_count($content)]);
|
||||
$caveauDocId = (int)$caveauDb->lastInsertId();
|
||||
$rag = new ClientRagPipeline($clientId);
|
||||
$rag->ingestDocument($caveauDocId);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
// Non-fatal: log and continue
|
||||
error_log('[upload] CaveauAI dual-write failed for doc ' . ($doc['doc_id'] ?? '?') . ': ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'doc_id' => $doc['doc_id'],
|
||||
'filename' => $doc['filename'],
|
||||
'caveau_doc_id' => $caveauDocId,
|
||||
]);
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
<?php
|
||||
/**
|
||||
* POST /api/save-to-corpus.php
|
||||
*
|
||||
* Save tool output text into the user's CaveauAI corpus.
|
||||
* Uses dbnToolsBootCaveau() to call ClientRagPipeline directly via filesystem include.
|
||||
*
|
||||
* Request body (JSON, max 500 KB):
|
||||
* title string (required)
|
||||
* content string (required, min 30 chars)
|
||||
* source_tool string (optional, slug)
|
||||
* tags string (optional, comma-separated)
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once dirname(__DIR__) . '/includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
$user = dbnToolsAuthenticatedUser();
|
||||
$clientId = (int)($user['client_id'] ?? 0);
|
||||
if ($clientId <= 0) {
|
||||
dbnToolsError('No linked CaveauAI workspace. Log in via the CaveauAI portal first.', 403, 'no_workspace');
|
||||
}
|
||||
|
||||
$input = dbnToolsJsonInput(500_000);
|
||||
$title = trim($input['title'] ?? '');
|
||||
$content = trim($input['content'] ?? '');
|
||||
$sourceTool = substr(preg_replace('/[^a-z0-9\-_]/', '', strtolower($input['source_tool'] ?? '')), 0, 64) ?: null;
|
||||
$rawTags = trim($input['tags'] ?? '');
|
||||
$tags = json_encode(
|
||||
array_values(array_filter(array_map('trim', explode(',', $rawTags)))),
|
||||
JSON_UNESCAPED_UNICODE
|
||||
);
|
||||
|
||||
if ($title === '') {
|
||||
dbnToolsError('title is required.', 400, 'bad_request');
|
||||
}
|
||||
if (strlen($content) < 30) {
|
||||
dbnToolsError('content too short (min 30 chars).', 400, 'bad_request');
|
||||
}
|
||||
if (strlen($content) > 2_000_000) {
|
||||
dbnToolsError('content exceeds 2 MB limit.', 400, 'too_large');
|
||||
}
|
||||
|
||||
// Load CaveauAI platform (getDb, ClientRagPipeline, etc.)
|
||||
dbnToolsBootCaveau();
|
||||
|
||||
try {
|
||||
$db = getDb();
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsError('CaveauAI database unavailable: ' . $e->getMessage(), 503, 'db_unavailable');
|
||||
}
|
||||
|
||||
// Resolve default corpus for this client
|
||||
$stmt = $db->prepare('SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1');
|
||||
$stmt->execute([$clientId]);
|
||||
$corpusId = (int)($stmt->fetchColumn() ?: 0);
|
||||
if ($corpusId === 0) {
|
||||
dbnToolsError(
|
||||
'No default corpus found for your account. Set one up in the CaveauAI portal.',
|
||||
409,
|
||||
'no_corpus'
|
||||
);
|
||||
}
|
||||
|
||||
$wordCount = str_word_count($content);
|
||||
|
||||
$ins = $db->prepare("
|
||||
INSERT INTO client_documents
|
||||
(client_id, corpus_id, title, source_type, content, category,
|
||||
tags, import_method, source_tool, word_count, status)
|
||||
VALUES (?, ?, ?, 'text', ?, 'tool-output', ?, 'tool_output', ?, ?, 'pending')
|
||||
");
|
||||
$ins->execute([$clientId, $corpusId, $title, $content, $tags, $sourceTool, $wordCount]);
|
||||
$docId = (int)$db->lastInsertId();
|
||||
|
||||
try {
|
||||
$rag = new ClientRagPipeline($clientId);
|
||||
$chunks = $rag->ingestDocument($docId);
|
||||
} catch (Throwable $e) {
|
||||
// Document is saved but not indexed — mark error and return partial success
|
||||
$db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?")
|
||||
->execute([$e->getMessage(), $docId]);
|
||||
dbnToolsError(
|
||||
'Saved to corpus but indexing failed: ' . $e->getMessage(),
|
||||
500,
|
||||
'index_failed',
|
||||
['document_id' => $docId]
|
||||
);
|
||||
}
|
||||
|
||||
dbnToolsRespond(['ok' => true, 'document_id' => $docId, 'chunks' => $chunks], 201);
|
||||
+4
-1
@@ -17,5 +17,8 @@ dbnToolsWithTelemetry('search', $language, function () use ($input, $language):
|
||||
$asOfDate = isset($input['as_of_date']) && preg_match('/^\d{4}(-\d{2}(-\d{2})?)?$/', $input['as_of_date'])
|
||||
? $input['as_of_date']
|
||||
: null;
|
||||
return (new DbnLegalToolsService())->search($query, $language, $limit, $temporalMode, $asOfDate);
|
||||
$scope = in_array($input['corpus_scope'] ?? '', ['shared', 'private', 'both'], true)
|
||||
? $input['corpus_scope']
|
||||
: 'both';
|
||||
return (new DbnLegalToolsService())->search($query, $language, $limit, $temporalMode, $asOfDate, $scope);
|
||||
});
|
||||
|
||||
+56
-14
@@ -2,16 +2,18 @@
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* GET /api/user-docs.php — list SSO user's uploaded documents
|
||||
* GET /api/user-docs.php — list uploaded documents for current user
|
||||
* DELETE /api/user-docs.php?id=X — remove a document
|
||||
* POST /api/user-docs.php — upload a document (file field = 'file')
|
||||
*
|
||||
* Only available for SSO users (dbn_tools_sso_uid set in session).
|
||||
* Reads from the shared dobetternorge.dbn_user_docs table, keyed by sso_uid.
|
||||
* SSO users (dbn_tools_sso_uid) are keyed by their SSO uid.
|
||||
* Other authenticated users are keyed by session_id() as a fallback.
|
||||
* Reads/writes the shared dobetternorge.dbn_user_docs table.
|
||||
* Requires DBN_DB_* env vars pointing at the dobetternorge database.
|
||||
*/
|
||||
require_once __DIR__ . '/../includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('GET', 'DELETE');
|
||||
dbnToolsRequireMethod('GET', 'DELETE', 'POST');
|
||||
|
||||
if (!dbnToolsIsAuthenticated()) {
|
||||
http_response_code(401);
|
||||
@@ -20,13 +22,9 @@ if (!dbnToolsIsAuthenticated()) {
|
||||
exit;
|
||||
}
|
||||
|
||||
// Only SSO users have shared docs
|
||||
// SSO uid for SSO users; session id as stable key for client sessions
|
||||
$ssoUid = (string)($_SESSION['dbn_tools_sso_uid'] ?? '');
|
||||
if ($ssoUid === '') {
|
||||
header('Content-Type: application/json');
|
||||
echo json_encode(['ok' => true, 'docs' => [], 'reason' => 'sso_only']);
|
||||
exit;
|
||||
}
|
||||
$userKey = $ssoUid !== '' ? $ssoUid : 'sess_' . session_id();
|
||||
|
||||
header('Content-Type: application/json; charset=utf-8');
|
||||
|
||||
@@ -51,6 +49,50 @@ function dbnSharedDb(): ?PDO
|
||||
|
||||
$method = $_SERVER['REQUEST_METHOD'];
|
||||
|
||||
// ── POST — upload a document ──────────────────────────────────────────────────
|
||||
if ($method === 'POST') {
|
||||
if (empty($_FILES['file']) || !is_array($_FILES['file'])) {
|
||||
http_response_code(422);
|
||||
echo json_encode(['ok' => false, 'error' => 'No file uploaded.']);
|
||||
exit;
|
||||
}
|
||||
|
||||
try {
|
||||
$extracted = dbnToolsExtractUploadedFile($_FILES['file']);
|
||||
} catch (Throwable $e) {
|
||||
http_response_code(422);
|
||||
echo json_encode(['ok' => false, 'error' => $e->getMessage()]);
|
||||
exit;
|
||||
}
|
||||
|
||||
$docId = uniqid('wbd_', true);
|
||||
$filename = basename((string)($_FILES['file']['name'] ?? 'document'));
|
||||
$fileType = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
|
||||
$chunks = isset($extracted['text']) ? max(1, (int)ceil(mb_strlen($extracted['text']) / 1000)) : 0;
|
||||
$now = gmdate('Y-m-d H:i:s');
|
||||
|
||||
$db = dbnSharedDb();
|
||||
if ($db) {
|
||||
$db->prepare(
|
||||
'INSERT INTO dbn_user_docs (id, user_id, filename, file_type, chunk_count, source, status, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)'
|
||||
)->execute([$docId, $userKey, $filename, $fileType, $chunks, 'workbench', 'ready', $now]);
|
||||
}
|
||||
|
||||
echo json_encode([
|
||||
'ok' => true,
|
||||
'doc' => [
|
||||
'doc_id' => $docId,
|
||||
'filename' => $filename,
|
||||
'file_type' => $fileType,
|
||||
'chunk_count' => $chunks,
|
||||
'source' => 'workbench',
|
||||
'created_at' => $now,
|
||||
],
|
||||
]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// ── DELETE ────────────────────────────────────────────────────────────────────
|
||||
if ($method === 'DELETE') {
|
||||
$docId = trim($_GET['id'] ?? '');
|
||||
@@ -63,10 +105,10 @@ if ($method === 'DELETE') {
|
||||
$db = dbnSharedDb();
|
||||
if ($db) {
|
||||
$stmt = $db->prepare('SELECT id FROM dbn_user_docs WHERE id = ? AND user_id = ?');
|
||||
$stmt->execute([$docId, $ssoUid]);
|
||||
$stmt->execute([$docId, $userKey]);
|
||||
if ($stmt->fetch()) {
|
||||
$db->prepare('DELETE FROM dbn_user_docs WHERE id = ? AND user_id = ?')
|
||||
->execute([$docId, $ssoUid]);
|
||||
->execute([$docId, $userKey]);
|
||||
|
||||
// Delete Qdrant points for this doc
|
||||
$qdrantUrl = 'http://10.0.2.10:6333';
|
||||
@@ -74,7 +116,7 @@ if ($method === 'DELETE') {
|
||||
'filter' => [
|
||||
'must' => [
|
||||
['key' => 'doc_id', 'match' => ['value' => $docId]],
|
||||
['key' => 'user_id', 'match' => ['value' => $ssoUid]],
|
||||
['key' => 'user_id', 'match' => ['value' => $userKey]],
|
||||
],
|
||||
],
|
||||
];
|
||||
@@ -108,7 +150,7 @@ $stmt = $db->prepare(
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 50'
|
||||
);
|
||||
$stmt->execute([$ssoUid, 'ready']);
|
||||
$stmt->execute([$userKey, 'ready']);
|
||||
$rows = $stmt->fetchAll();
|
||||
|
||||
$docs = array_map(static fn($r) => [
|
||||
|
||||
Reference in New Issue
Block a user