feat(corpus): add save-to-corpus + private corpus search scope

- POST /api/save-to-corpus.php — saves tool output text to user's default CaveauAI corpus via ClientRagPipeline
- api/case/upload.php — dual-writes uploaded PDFs to CaveauAI client_documents (best-effort)
- assets/js/corpus-save.js — shared <dialog> handler for .js-save-corpus buttons on all tool pages
- includes/layout_footer.php — injects corpus-save.js + shared save dialog markup
- korrespond/deep-research/barnevernet/discrepancy JS — save-to-corpus buttons on output sections
- api/search.php + LegalTools::search() — corpus_scope param ('shared'|'private'|'both'), merges personal CaveauAI corpus with shared legal library when 'both'
- includes/tool_form.php + assets/js/tools.js — corpus scope radio toggle shown on search tab
- api/user-docs.php — add POST upload method for non-SSO authenticated users

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-22 17:50:32 +02:00
parent ed329f9d05
commit b014638f39
13 changed files with 465 additions and 33 deletions
+71 -13
View File
@@ -20,7 +20,8 @@ final class DbnLegalToolsService
string $language = 'en',
int $limit = 6,
string $temporalMode = 'disabled',
?string $asOfDate = null
?string $asOfDate = null,
string $scope = 'both'
): array {
$query = trim($query);
if (mb_strlen($query, 'UTF-8') < 3) {
@@ -28,15 +29,24 @@ final class DbnLegalToolsService
}
$limit = max(1, min(10, $limit));
$temporalMode = in_array($temporalMode, ['legal_conservative', 'disabled'], true) ? $temporalMode : 'disabled';
$scope = in_array($scope, ['shared', 'private', 'both'], true) ? $scope : 'both';
$scopeLabel = match ($scope) {
'private' => 'personal corpus only',
'shared' => 'Legal Library only',
default => 'Legal Library + personal corpus',
};
$trace = [
$this->trace('Query interpretation', 'Searching Do Better Norge private corpus plus the subscribed family-legal package.', 'complete'),
$this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode, private corpus enabled, shared package filter set to family-legal.', 'running'),
$this->trace('Query interpretation', "Searching Do Better Norge {$scopeLabel}.", 'complete'),
$this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode.', 'running'),
];
$client = dbnToolsRequireClient();
$package = $this->requireFamilyPackage((int)$client['id']);
// Personal corpus client_id from session (may be 0 if user has no linked workspace)
$personalClientId = (int)($_SESSION['dbn_tools_client_id'] ?? 0);
$chunks = [];
$retrievalNote = 'ClientRagPipeline keyword retrieval';
try {
@@ -52,16 +62,64 @@ final class DbnLegalToolsService
// Retrieval still works in keyword mode without gateway config.
}
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
$chunks = $rag->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => true,
'package_ids' => [(int)$package['id']],
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
'include_beta_website' => true,
]);
if ($scope === 'private') {
// Search only the user's personal corpus
if ($personalClientId > 0) {
$rag = new ClientRagPipeline($personalClientId, $gatewayUrl, 30);
$chunks = $rag->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => false,
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
]);
}
} elseif ($scope === 'shared') {
// Search only the shared legal library
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
$chunks = $rag->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => true,
'package_ids' => [(int)$package['id']],
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
'include_beta_website' => true,
]);
} else {
// 'both': shared library + personal corpus merged and re-ranked by score
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
$sharedChunks = $rag->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => true,
'package_ids' => [(int)$package['id']],
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
'include_beta_website' => true,
]);
$privateChunks = [];
if ($personalClientId > 0) {
try {
$ragPrivate = new ClientRagPipeline($personalClientId, $gatewayUrl, 30);
$privateChunks = $ragPrivate->searchAll($query, $limit, null, [
'search_private' => true,
'search_shared' => false,
'chunk_limit' => $limit,
'search_method' => 'keyword',
'min_private' => 0,
]);
} catch (Throwable $e) {
error_log('[search] personal corpus query failed for client ' . $personalClientId . ': ' . $e->getMessage());
}
}
// Merge by score descending, cap at $limit
$merged = array_merge($sharedChunks, $privateChunks);
usort($merged, fn($a, $b) => ($b['score'] ?? 0) <=> ($a['score'] ?? 0));
$chunks = array_slice($merged, 0, $limit);
}
// Apply temporal reranking after retrieval (optional)
if ($temporalMode === 'legal_conservative' && !empty($chunks)) {
+21
View File
@@ -26,5 +26,26 @@
<?php if (!empty($extraScripts) && is_array($extraScripts)): foreach ($extraScripts as $extraScript): ?>
<script src="<?= htmlspecialchars((string)$extraScript) ?>" defer></script>
<?php endforeach; endif; ?>
<script src="assets/js/corpus-save.js" defer></script>
<!-- Save-to-corpus dialog (shared across all tool pages) -->
<dialog id="save-corpus-dialog" class="save-corpus-dialog">
<form method="dialog" id="save-corpus-form">
<h3>Save to corpus</h3>
<p class="save-corpus-hint">This will be indexed and searchable in your private corpus.</p>
<label>
<span>Title <span aria-hidden="true">*</span></span>
<input id="save-corpus-title" type="text" required placeholder="Give this entry a title…" autocomplete="off">
</label>
<label>
<span>Tags <span class="save-corpus-optional">(comma-separated)</span></span>
<input id="save-corpus-tags" type="text" placeholder="e.g. barnevern, 2024, kjennelse">
</label>
<menu>
<button type="submit" class="btn-primary">Save to corpus</button>
<button type="button" id="save-corpus-cancel">Cancel</button>
</menu>
</form>
</dialog>
</body>
</html>
+7
View File
@@ -19,6 +19,13 @@
<input type="number" id="numSpeakersInput" name="num_speakers" min="2" max="10" placeholder="auto" class="num-speakers-input" aria-label="Expected speaker count">
</div>
<div class="control-row is-hidden" id="corpusScopeControl">
<span class="control-label">Search</span>
<label><input type="radio" name="corpusScope" value="both" checked> Legal Library + My Docs</label>
<label><input type="radio" name="corpusScope" value="shared"> Legal Library only</label>
<label><input type="radio" name="corpusScope" value="private"> My Docs only</label>
</div>
<div class="control-row is-hidden" id="redactionControl">
<span class="control-label">Mode</span>
<label><input type="radio" name="redactionMode" value="standard" checked> Standard</label>