feat(corpus): add save-to-corpus + private corpus search scope

- POST /api/save-to-corpus.php — saves tool output text to user's default CaveauAI corpus via ClientRagPipeline
- api/case/upload.php — dual-writes uploaded PDFs to CaveauAI client_documents (best-effort)
- assets/js/corpus-save.js — shared <dialog> handler for .js-save-corpus buttons on all tool pages
- includes/layout_footer.php — injects corpus-save.js + shared save dialog markup
- korrespond/deep-research/barnevernet/discrepancy JS — save-to-corpus buttons on output sections
- api/search.php + LegalTools::search() — corpus_scope param ('shared'|'private'|'both'), merges personal CaveauAI corpus with shared legal library when 'both'
- includes/tool_form.php + assets/js/tools.js — corpus scope radio toggle shown on search tab
- api/user-docs.php — add POST upload method for non-SSO authenticated users

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-22 17:50:32 +02:00
parent ed329f9d05
commit b014638f39
13 changed files with 465 additions and 33 deletions
+71 -13
View File
@@ -20,7 +20,8 @@ final class DbnLegalToolsService
string $language = 'en',
int $limit = 6,
string $temporalMode = 'disabled',
?string $asOfDate = null
?string $asOfDate = null,
string $scope = 'both'
): array {
$query = trim($query);
if (mb_strlen($query, 'UTF-8') < 3) {
@@ -28,15 +29,24 @@ final class DbnLegalToolsService
}
$limit = max(1, min(10, $limit));
$temporalMode = in_array($temporalMode, ['legal_conservative', 'disabled'], true) ? $temporalMode : 'disabled';
$scope = in_array($scope, ['shared', 'private', 'both'], true) ? $scope : 'both';
$scopeLabel = match ($scope) {
'private' => 'personal corpus only',
'shared' => 'Legal Library only',
default => 'Legal Library + personal corpus',
};
$trace = [
$this->trace('Query interpretation', 'Searching Do Better Norge private corpus plus the subscribed family-legal package.', 'complete'),
$this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode, private corpus enabled, shared package filter set to family-legal.', 'running'),
$this->trace('Query interpretation', "Searching Do Better Norge {$scopeLabel}.", 'complete'),
$this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode.', 'running'),
];
$client = dbnToolsRequireClient();
$package = $this->requireFamilyPackage((int)$client['id']);
// Personal corpus client_id from session (may be 0 if user has no linked workspace)
$personalClientId = (int)($_SESSION['dbn_tools_client_id'] ?? 0);
$chunks = [];
$retrievalNote = 'ClientRagPipeline keyword retrieval';
try {
@@ -52,16 +62,64 @@ final class DbnLegalToolsService
// Retrieval still works in keyword mode without gateway config.
}
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
$chunks = $rag->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => true,
'package_ids' => [(int)$package['id']],
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
'include_beta_website' => true,
]);
if ($scope === 'private') {
// Search only the user's personal corpus
if ($personalClientId > 0) {
$rag = new ClientRagPipeline($personalClientId, $gatewayUrl, 30);
$chunks = $rag->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => false,
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
]);
}
} elseif ($scope === 'shared') {
// Search only the shared legal library
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
$chunks = $rag->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => true,
'package_ids' => [(int)$package['id']],
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
'include_beta_website' => true,
]);
} else {
// 'both': shared library + personal corpus merged and re-ranked by score
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
$sharedChunks = $rag->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => true,
'package_ids' => [(int)$package['id']],
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
'include_beta_website' => true,
]);
$privateChunks = [];
if ($personalClientId > 0) {
try {
$ragPrivate = new ClientRagPipeline($personalClientId, $gatewayUrl, 30);
$privateChunks = $ragPrivate->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => false,
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
]);
} catch (Throwable $e) {
error_log('[search] personal corpus query failed for client ' . $personalClientId . ': ' . $e->getMessage());
}
}
// Merge by score descending, cap at $limit
$merged = array_merge($sharedChunks, $privateChunks);
usort($merged, fn($a, $b) => ($b['score'] ?? 0) <=> ($a['score'] ?? 0));
$chunks = array_slice($merged, 0, $limit);
}
// Apply temporal reranking after retrieval (optional)
if ($temporalMode === 'legal_conservative' && !empty($chunks)) {