feat(corpus): add save-to-corpus + private corpus search scope

- POST /api/save-to-corpus.php — saves tool output text to user's default CaveauAI corpus via ClientRagPipeline
- api/case/upload.php — dual-writes uploaded PDFs to CaveauAI client_documents (best-effort)
- assets/js/corpus-save.js — shared <dialog> handler for .js-save-corpus buttons on all tool pages
- includes/layout_footer.php — injects corpus-save.js + shared save dialog markup
- korrespond/deep-research/barnevernet/discrepancy JS — save-to-corpus buttons on output sections
- api/search.php + LegalTools::search() — corpus_scope param ('shared'|'private'|'both'), merges personal CaveauAI corpus with shared legal library when 'both'
- includes/tool_form.php + assets/js/tools.js — corpus scope radio toggle shown on search tab
- api/user-docs.php — add POST upload method for non-SSO authenticated users

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-22 17:50:32 +02:00
parent ed329f9d05
commit b014638f39
13 changed files with 465 additions and 33 deletions
+95
View File
@@ -0,0 +1,95 @@
<?php
/**
* POST /api/save-to-corpus.php
*
* Save tool output text into the user's CaveauAI corpus.
* Uses dbnToolsBootCaveau() to call ClientRagPipeline directly via filesystem include.
*
* Request body (JSON, max 500 KB):
* title string (required)
* content string (required, min 30 chars)
* source_tool string (optional, slug)
* tags string (optional, comma-separated)
*/
declare(strict_types=1);
require_once dirname(__DIR__) . '/includes/bootstrap.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
$user = dbnToolsAuthenticatedUser();
$clientId = (int)($user['client_id'] ?? 0);
if ($clientId <= 0) {
dbnToolsError('No linked CaveauAI workspace. Log in via the CaveauAI portal first.', 403, 'no_workspace');
}
$input = dbnToolsJsonInput(500_000);
$title = trim($input['title'] ?? '');
$content = trim($input['content'] ?? '');
$sourceTool = substr(preg_replace('/[^a-z0-9\-_]/', '', strtolower($input['source_tool'] ?? '')), 0, 64) ?: null;
$rawTags = trim($input['tags'] ?? '');
$tags = json_encode(
array_values(array_filter(array_map('trim', explode(',', $rawTags)))),
JSON_UNESCAPED_UNICODE
);
if ($title === '') {
dbnToolsError('title is required.', 400, 'bad_request');
}
if (strlen($content) < 30) {
dbnToolsError('content too short (min 30 chars).', 400, 'bad_request');
}
if (strlen($content) > 2_000_000) {
dbnToolsError('content exceeds 2 MB limit.', 400, 'too_large');
}
// Load CaveauAI platform (getDb, ClientRagPipeline, etc.)
dbnToolsBootCaveau();
try {
$db = getDb();
} catch (Throwable $e) {
dbnToolsError('CaveauAI database unavailable: ' . $e->getMessage(), 503, 'db_unavailable');
}
// Resolve default corpus for this client
$stmt = $db->prepare('SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1');
$stmt->execute([$clientId]);
$corpusId = (int)($stmt->fetchColumn() ?: 0);
if ($corpusId === 0) {
dbnToolsError(
'No default corpus found for your account. Set one up in the CaveauAI portal.',
409,
'no_corpus'
);
}
$wordCount = str_word_count($content);
$ins = $db->prepare("
INSERT INTO client_documents
(client_id, corpus_id, title, source_type, content, category,
tags, import_method, source_tool, word_count, status)
VALUES (?, ?, ?, 'text', ?, 'tool-output', ?, 'tool_output', ?, ?, 'pending')
");
$ins->execute([$clientId, $corpusId, $title, $content, $tags, $sourceTool, $wordCount]);
$docId = (int)$db->lastInsertId();
try {
$rag = new ClientRagPipeline($clientId);
$chunks = $rag->ingestDocument($docId);
} catch (Throwable $e) {
// Document is saved but not indexed — mark error and return partial success
$db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?")
->execute([$e->getMessage(), $docId]);
dbnToolsError(
'Saved to corpus but indexing failed: ' . $e->getMessage(),
500,
'index_failed',
['document_id' => $docId]
);
}
dbnToolsRespond(['ok' => true, 'document_id' => $docId, 'chunks' => $chunks], 201);