From b014638f39d0c949dbab0044dcb54b20867b9213 Mon Sep 17 00:00:00 2001 From: davegilligan Date: Fri, 22 May 2026 17:50:32 +0200 Subject: [PATCH] feat(corpus): add save-to-corpus + private corpus search scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - POST /api/save-to-corpus.php — saves tool output text to user's default CaveauAI corpus via ClientRagPipeline - api/case/upload.php — dual-writes uploaded PDFs to CaveauAI client_documents (best-effort) - assets/js/corpus-save.js — shared handler for .js-save-corpus buttons on all tool pages - includes/layout_footer.php — injects corpus-save.js + shared save dialog markup - korrespond/deep-research/barnevernet/discrepancy JS — save-to-corpus buttons on output sections - api/search.php + LegalTools::search() — corpus_scope param ('shared'|'private'|'both'), merges personal CaveauAI corpus with shared legal library when 'both' - includes/tool_form.php + assets/js/tools.js — corpus scope radio toggle shown on search tab - api/user-docs.php — add POST upload method for non-SSO authenticated users Co-Authored-By: Claude Sonnet 4.6 --- api/case/upload.php | 50 +++++++++++++++-- api/save-to-corpus.php | 95 +++++++++++++++++++++++++++++++++ api/search.php | 5 +- api/user-docs.php | 70 +++++++++++++++++++----- assets/js/barnevernet.js | 15 ++++++ assets/js/corpus-save.js | 106 +++++++++++++++++++++++++++++++++++++ assets/js/deep-research.js | 15 ++++++ assets/js/discrepancy.js | 11 ++++ assets/js/korrespond.js | 12 +++++ assets/js/tools.js | 7 +++ includes/LegalTools.php | 84 ++++++++++++++++++++++++----- includes/layout_footer.php | 21 ++++++++ includes/tool_form.php | 7 +++ 13 files changed, 465 insertions(+), 33 deletions(-) create mode 100644 api/save-to-corpus.php create mode 100644 assets/js/corpus-save.js diff --git a/api/case/upload.php b/api/case/upload.php index 463d436..c793270 100644 --- a/api/case/upload.php +++ b/api/case/upload.php @@ -40,11 +40,51 @@ if (strncmp($head, '%PDF-', 5) !== 0) { try { $doc = CaseStore::registerUpload($userId, $name, $tmp, $size); CaseStore::caseEnqueueIngest((int)$doc['doc_id'], $userId); - dbnToolsRespond([ - 'ok' => true, - 'doc_id' => $doc['doc_id'], - 'filename' => $doc['filename'], - ]); } catch (Throwable $e) { dbnToolsError($e->getMessage(), 400, 'upload_failed'); } + +// Dual-write to CaveauAI corpus (best-effort — never fails the upload) +$caveauDocId = null; +$clientId = (int)($_SESSION['dbn_tools_client_id'] ?? 0); +if ($clientId > 0 && !empty($doc['storage_path'])) { + try { + dbnToolsBootCaveau(); + $aiPortalRoot = dbnToolsAiPortalRoot(); + $textExtractFile = $aiPortalRoot . '/platform/includes/text_extract.php'; + if (is_file($textExtractFile)) { + require_once $textExtractFile; + $content = extractPdfText($doc['storage_path']); + if ($content !== '' && strlen($content) > 30) { + $caveauDb = getDb(); + $corpusSt = $caveauDb->prepare( + 'SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1' + ); + $corpusSt->execute([$clientId]); + $corpusId = (int)($corpusSt->fetchColumn() ?: 0); + if ($corpusId > 0) { + $title = pathinfo($doc['filename'], PATHINFO_FILENAME); + $caveauDb->prepare(" + INSERT INTO client_documents + (client_id, corpus_id, title, source_type, content, category, + import_method, word_count, status) + VALUES (?, ?, ?, 'pdf', ?, 'user-upload', 'dbn_upload', ?, 'pending') + ")->execute([$clientId, $corpusId, $title, $content, str_word_count($content)]); + $caveauDocId = (int)$caveauDb->lastInsertId(); + $rag = new ClientRagPipeline($clientId); + $rag->ingestDocument($caveauDocId); + } + } + } + } catch (Throwable $e) { + // Non-fatal: log and continue + error_log('[upload] CaveauAI dual-write failed for doc ' . ($doc['doc_id'] ?? '?') . ': ' . $e->getMessage()); + } +} + +dbnToolsRespond([ + 'ok' => true, + 'doc_id' => $doc['doc_id'], + 'filename' => $doc['filename'], + 'caveau_doc_id' => $caveauDocId, +]); diff --git a/api/save-to-corpus.php b/api/save-to-corpus.php new file mode 100644 index 0000000..9698200 --- /dev/null +++ b/api/save-to-corpus.php @@ -0,0 +1,95 @@ + 2_000_000) { + dbnToolsError('content exceeds 2 MB limit.', 400, 'too_large'); +} + +// Load CaveauAI platform (getDb, ClientRagPipeline, etc.) +dbnToolsBootCaveau(); + +try { + $db = getDb(); +} catch (Throwable $e) { + dbnToolsError('CaveauAI database unavailable: ' . $e->getMessage(), 503, 'db_unavailable'); +} + +// Resolve default corpus for this client +$stmt = $db->prepare('SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1'); +$stmt->execute([$clientId]); +$corpusId = (int)($stmt->fetchColumn() ?: 0); +if ($corpusId === 0) { + dbnToolsError( + 'No default corpus found for your account. Set one up in the CaveauAI portal.', + 409, + 'no_corpus' + ); +} + +$wordCount = str_word_count($content); + +$ins = $db->prepare(" + INSERT INTO client_documents + (client_id, corpus_id, title, source_type, content, category, + tags, import_method, source_tool, word_count, status) + VALUES (?, ?, ?, 'text', ?, 'tool-output', ?, 'tool_output', ?, ?, 'pending') +"); +$ins->execute([$clientId, $corpusId, $title, $content, $tags, $sourceTool, $wordCount]); +$docId = (int)$db->lastInsertId(); + +try { + $rag = new ClientRagPipeline($clientId); + $chunks = $rag->ingestDocument($docId); +} catch (Throwable $e) { + // Document is saved but not indexed — mark error and return partial success + $db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?") + ->execute([$e->getMessage(), $docId]); + dbnToolsError( + 'Saved to corpus but indexing failed: ' . $e->getMessage(), + 500, + 'index_failed', + ['document_id' => $docId] + ); +} + +dbnToolsRespond(['ok' => true, 'document_id' => $docId, 'chunks' => $chunks], 201); diff --git a/api/search.php b/api/search.php index fb0b04d..13eb4ca 100644 --- a/api/search.php +++ b/api/search.php @@ -17,5 +17,8 @@ dbnToolsWithTelemetry('search', $language, function () use ($input, $language): $asOfDate = isset($input['as_of_date']) && preg_match('/^\d{4}(-\d{2}(-\d{2})?)?$/', $input['as_of_date']) ? $input['as_of_date'] : null; - return (new DbnLegalToolsService())->search($query, $language, $limit, $temporalMode, $asOfDate); + $scope = in_array($input['corpus_scope'] ?? '', ['shared', 'private', 'both'], true) + ? $input['corpus_scope'] + : 'both'; + return (new DbnLegalToolsService())->search($query, $language, $limit, $temporalMode, $asOfDate, $scope); }); diff --git a/api/user-docs.php b/api/user-docs.php index 6f1e2c4..9a401a5 100644 --- a/api/user-docs.php +++ b/api/user-docs.php @@ -2,16 +2,18 @@ declare(strict_types=1); /** - * GET /api/user-docs.php — list SSO user's uploaded documents + * GET /api/user-docs.php — list uploaded documents for current user * DELETE /api/user-docs.php?id=X — remove a document + * POST /api/user-docs.php — upload a document (file field = 'file') * - * Only available for SSO users (dbn_tools_sso_uid set in session). - * Reads from the shared dobetternorge.dbn_user_docs table, keyed by sso_uid. + * SSO users (dbn_tools_sso_uid) are keyed by their SSO uid. + * Other authenticated users are keyed by session_id() as a fallback. + * Reads/writes the shared dobetternorge.dbn_user_docs table. * Requires DBN_DB_* env vars pointing at the dobetternorge database. */ require_once __DIR__ . '/../includes/bootstrap.php'; -dbnToolsRequireMethod('GET', 'DELETE'); +dbnToolsRequireMethod('GET', 'DELETE', 'POST'); if (!dbnToolsIsAuthenticated()) { http_response_code(401); @@ -20,13 +22,9 @@ if (!dbnToolsIsAuthenticated()) { exit; } -// Only SSO users have shared docs +// SSO uid for SSO users; session id as stable key for client sessions $ssoUid = (string)($_SESSION['dbn_tools_sso_uid'] ?? ''); -if ($ssoUid === '') { - header('Content-Type: application/json'); - echo json_encode(['ok' => true, 'docs' => [], 'reason' => 'sso_only']); - exit; -} +$userKey = $ssoUid !== '' ? $ssoUid : 'sess_' . session_id(); header('Content-Type: application/json; charset=utf-8'); @@ -51,6 +49,50 @@ function dbnSharedDb(): ?PDO $method = $_SERVER['REQUEST_METHOD']; +// ── POST — upload a document ────────────────────────────────────────────────── +if ($method === 'POST') { + if (empty($_FILES['file']) || !is_array($_FILES['file'])) { + http_response_code(422); + echo json_encode(['ok' => false, 'error' => 'No file uploaded.']); + exit; + } + + try { + $extracted = dbnToolsExtractUploadedFile($_FILES['file']); + } catch (Throwable $e) { + http_response_code(422); + echo json_encode(['ok' => false, 'error' => $e->getMessage()]); + exit; + } + + $docId = uniqid('wbd_', true); + $filename = basename((string)($_FILES['file']['name'] ?? 'document')); + $fileType = strtolower(pathinfo($filename, PATHINFO_EXTENSION)); + $chunks = isset($extracted['text']) ? max(1, (int)ceil(mb_strlen($extracted['text']) / 1000)) : 0; + $now = gmdate('Y-m-d H:i:s'); + + $db = dbnSharedDb(); + if ($db) { + $db->prepare( + 'INSERT INTO dbn_user_docs (id, user_id, filename, file_type, chunk_count, source, status, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)' + )->execute([$docId, $userKey, $filename, $fileType, $chunks, 'workbench', 'ready', $now]); + } + + echo json_encode([ + 'ok' => true, + 'doc' => [ + 'doc_id' => $docId, + 'filename' => $filename, + 'file_type' => $fileType, + 'chunk_count' => $chunks, + 'source' => 'workbench', + 'created_at' => $now, + ], + ]); + exit; +} + // ── DELETE ──────────────────────────────────────────────────────────────────── if ($method === 'DELETE') { $docId = trim($_GET['id'] ?? ''); @@ -63,10 +105,10 @@ if ($method === 'DELETE') { $db = dbnSharedDb(); if ($db) { $stmt = $db->prepare('SELECT id FROM dbn_user_docs WHERE id = ? AND user_id = ?'); - $stmt->execute([$docId, $ssoUid]); + $stmt->execute([$docId, $userKey]); if ($stmt->fetch()) { $db->prepare('DELETE FROM dbn_user_docs WHERE id = ? AND user_id = ?') - ->execute([$docId, $ssoUid]); + ->execute([$docId, $userKey]); // Delete Qdrant points for this doc $qdrantUrl = 'http://10.0.2.10:6333'; @@ -74,7 +116,7 @@ if ($method === 'DELETE') { 'filter' => [ 'must' => [ ['key' => 'doc_id', 'match' => ['value' => $docId]], - ['key' => 'user_id', 'match' => ['value' => $ssoUid]], + ['key' => 'user_id', 'match' => ['value' => $userKey]], ], ], ]; @@ -108,7 +150,7 @@ $stmt = $db->prepare( ORDER BY created_at DESC LIMIT 50' ); -$stmt->execute([$ssoUid, 'ready']); +$stmt->execute([$userKey, 'ready']); $rows = $stmt->fetchAll(); $docs = array_map(static fn($r) => [ diff --git a/assets/js/barnevernet.js b/assets/js/barnevernet.js index a5c8c68..3d2b54f 100644 --- a/assets/js/barnevernet.js +++ b/assets/js/barnevernet.js @@ -813,6 +813,21 @@ els.results.appendChild(finalContainer.firstChild); } + // Save-to-corpus button + const briefEl = els.results.querySelector('.dr-brief'); + if (briefEl) { + briefEl.id = 'bvjBriefText'; + const saveBtn = document.createElement('button'); + saveBtn.type = 'button'; + saveBtn.className = 'js-save-corpus secondary-button'; + saveBtn.dataset.tool = 'barnevernet'; + saveBtn.dataset.contentId = 'bvjBriefText'; + saveBtn.dataset.suggestedTitle = 'BVJ analyse: ' + (document.getElementById('bvjQuestion')?.value?.slice(0, 80) ?? 'Svar'); + saveBtn.textContent = 'Save to corpus'; + saveBtn.style.marginTop = '12px'; + briefEl.insertAdjacentElement('afterend', saveBtn); + } + // Bind source card clicks els.results.querySelectorAll('.dr-source-card[data-source-n]').forEach((node) => { node.addEventListener('click', (e) => { diff --git a/assets/js/corpus-save.js b/assets/js/corpus-save.js new file mode 100644 index 0000000..4c186f4 --- /dev/null +++ b/assets/js/corpus-save.js @@ -0,0 +1,106 @@ +/** + * corpus-save.js — "Save to corpus" shared handler for all DBN tool pages. + * + * Buttons that trigger a save must have: + * class="js-save-corpus" + * data-content-id="" + * data-tool="" + * data-suggested-title="" (optional) + */ + +(function () { + 'use strict'; + + const dlg = document.getElementById('save-corpus-dialog'); + const form = document.getElementById('save-corpus-form'); + const titleIn = document.getElementById('save-corpus-title'); + const tagsIn = document.getElementById('save-corpus-tags'); + const cancelBtn = document.getElementById('save-corpus-cancel'); + + if (!dlg || !form) return; // dialog not present (e.g. not logged in) + + cancelBtn?.addEventListener('click', () => dlg.close()); + + let _pendingBtn = null; + let _pendingContent = ''; + let _pendingTool = ''; + + // Delegated click — catches buttons added dynamically by tool JS + document.addEventListener('click', (e) => { + const btn = e.target.closest('.js-save-corpus'); + if (!btn) return; + + const contentId = btn.dataset.contentId; + const el = contentId ? document.getElementById(contentId) : null; + const content = (el ? (el.value ?? el.textContent) : '').trim(); + + if (!content || content.length < 30) { + btn.textContent = 'Nothing to save'; + setTimeout(() => { btn.textContent = 'Save to corpus'; }, 2000); + return; + } + + _pendingBtn = btn; + _pendingContent = content; + _pendingTool = btn.dataset.tool ?? ''; + + titleIn.value = btn.dataset.suggestedTitle ?? ''; + tagsIn.value = ''; + dlg.showModal(); + titleIn.focus(); + titleIn.select(); + }); + + // Form submit inside dialog + form.addEventListener('submit', async (e) => { + e.preventDefault(); + dlg.close(); + + const btn = _pendingBtn; + const content = _pendingContent; + const title = titleIn.value.trim(); + const tags = tagsIn.value.trim(); + const tool = _pendingTool; + + if (!title || !content) return; + + if (btn) { + btn.disabled = true; + btn.textContent = 'Saving…'; + } + + try { + const resp = await fetch('api/save-to-corpus.php', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ title, content, source_tool: tool, tags }), + }); + + const data = await resp.json().catch(() => ({})); + + if (resp.ok && data.ok) { + if (btn) { + btn.textContent = '✓ Saved to corpus'; + btn.classList.add('js-save-corpus--saved'); + } + } else { + const msg = data.error ?? `Error ${resp.status}`; + if (btn) { + btn.textContent = 'Save failed'; + btn.disabled = false; + btn.title = msg; + } + console.error('[corpus-save] Save failed:', msg); + } + } catch (err) { + if (btn) { + btn.textContent = 'Network error'; + btn.disabled = false; + } + console.error('[corpus-save] Network error:', err); + } + + _pendingBtn = null; + _pendingContent = ''; + }); +}()); diff --git a/assets/js/deep-research.js b/assets/js/deep-research.js index 2e9f40d..9036b91 100644 --- a/assets/js/deep-research.js +++ b/assets/js/deep-research.js @@ -563,6 +563,21 @@ ${nextHtml} `; + // Save-to-corpus button (inject after brief block) + const briefEl = els.results.querySelector('.dr-brief'); + if (briefEl) { + briefEl.id = 'drBriefText'; + const saveBtn = document.createElement('button'); + saveBtn.type = 'button'; + saveBtn.className = 'js-save-corpus secondary-button'; + saveBtn.dataset.tool = 'deep-research'; + saveBtn.dataset.contentId = 'drBriefText'; + saveBtn.dataset.suggestedTitle = 'Research: ' + (document.getElementById('drQuery')?.value?.slice(0, 80) ?? 'Report'); + saveBtn.textContent = 'Save to corpus'; + saveBtn.style.marginTop = '12px'; + briefEl.insertAdjacentElement('afterend', saveBtn); + } + // Bind source-card click handlers (open modal) — but ignore clicks on inner els.results.querySelectorAll('.dr-source-card[data-source-n]').forEach((node) => { node.addEventListener('click', (e) => { diff --git a/assets/js/discrepancy.js b/assets/js/discrepancy.js index 4543e3a..de37885 100644 --- a/assets/js/discrepancy.js +++ b/assets/js/discrepancy.js @@ -551,6 +551,17 @@ els.results.appendChild(finalContainer.firstChild); } + // Save-to-corpus button (appended after final results) + const saveBtn = document.createElement('button'); + saveBtn.type = 'button'; + saveBtn.className = 'js-save-corpus secondary-button'; + saveBtn.dataset.tool = 'discrepancy'; + saveBtn.dataset.contentId = 'dcResults'; + saveBtn.dataset.suggestedTitle = 'Discrepancy report'; + saveBtn.textContent = 'Save to corpus'; + saveBtn.style.marginTop = '16px'; + els.results.appendChild(saveBtn); + // Bind tabs els.results.querySelectorAll('.dc-tab').forEach((btn) => { btn.addEventListener('click', () => { diff --git a/assets/js/korrespond.js b/assets/js/korrespond.js index 166d6e0..269178c 100644 --- a/assets/js/korrespond.js +++ b/assets/js/korrespond.js @@ -543,6 +543,12 @@
${esc(draftNo)}
+ ${isSameLang ? '' : `
@@ -554,6 +560,12 @@
${esc(draftUser)}
+ `} diff --git a/assets/js/tools.js b/assets/js/tools.js index a141905..d0fd1f7 100644 --- a/assets/js/tools.js +++ b/assets/js/tools.js @@ -910,6 +910,7 @@ document.addEventListener('DOMContentLoaded', () => { uploadFileList: document.querySelector('#uploadFileList'), uploadClear: document.querySelector('#uploadClear'), aliasSection: document.querySelector('#aliasSection'), + corpusScopeControl: document.querySelector('#corpusScopeControl'), addAliasRow: document.querySelector('#addAliasRow'), aliasRows: document.querySelector('#aliasRows'), audioZone: document.querySelector('#audioZone'), @@ -1013,6 +1014,7 @@ function setTool(toolName) { els.input.placeholder = tool.placeholder; } els.languageControl.classList.toggle('is-hidden', !tool.usesLanguage); + els.corpusScopeControl?.classList.toggle('is-hidden', toolName !== 'search'); els.redactionControl.classList.toggle('is-hidden', toolName !== 'redact'); els.uploadZone.classList.toggle('is-hidden', toolName !== 'redact' && toolName !== 'timeline'); els.aliasSection.classList.toggle('is-hidden', toolName !== 'redact'); @@ -1080,6 +1082,7 @@ async function runTool(event) { } if (state.activeTool === 'search') { payload.limit = 7; + payload.corpus_scope = currentCorpusScope(); } if (state.activeTool === 'redact') { lastOriginalText = text; @@ -1329,6 +1332,10 @@ function currentLanguage() { return document.querySelector('input[name="language"]:checked')?.value || 'en'; } +function currentCorpusScope() { + return document.querySelector('input[name="corpusScope"]:checked')?.value || 'both'; +} + function currentRedactionMode() { return document.querySelector('input[name="redactionMode"]:checked')?.value || 'standard'; } diff --git a/includes/LegalTools.php b/includes/LegalTools.php index 8462a56..c11273d 100644 --- a/includes/LegalTools.php +++ b/includes/LegalTools.php @@ -20,7 +20,8 @@ final class DbnLegalToolsService string $language = 'en', int $limit = 6, string $temporalMode = 'disabled', - ?string $asOfDate = null + ?string $asOfDate = null, + string $scope = 'both' ): array { $query = trim($query); if (mb_strlen($query, 'UTF-8') < 3) { @@ -28,15 +29,24 @@ final class DbnLegalToolsService } $limit = max(1, min(10, $limit)); $temporalMode = in_array($temporalMode, ['legal_conservative', 'disabled'], true) ? $temporalMode : 'disabled'; + $scope = in_array($scope, ['shared', 'private', 'both'], true) ? $scope : 'both'; + $scopeLabel = match ($scope) { + 'private' => 'personal corpus only', + 'shared' => 'Legal Library only', + default => 'Legal Library + personal corpus', + }; $trace = [ - $this->trace('Query interpretation', 'Searching Do Better Norge private corpus plus the subscribed family-legal package.', 'complete'), - $this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode, private corpus enabled, shared package filter set to family-legal.', 'running'), + $this->trace('Query interpretation', "Searching Do Better Norge {$scopeLabel}.", 'complete'), + $this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode.', 'running'), ]; $client = dbnToolsRequireClient(); $package = $this->requireFamilyPackage((int)$client['id']); + // Personal corpus client_id from session (may be 0 if user has no linked workspace) + $personalClientId = (int)($_SESSION['dbn_tools_client_id'] ?? 0); + $chunks = []; $retrievalNote = 'ClientRagPipeline keyword retrieval'; try { @@ -52,16 +62,64 @@ final class DbnLegalToolsService // Retrieval still works in keyword mode without gateway config. } - $rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30); - $chunks = $rag->searchAll($query, $limit, null, [ - 'search_private' => true, - 'search_shared' => true, - 'package_ids' => [(int)$package['id']], - 'chunk_limit' => $limit, - 'search_method' => 'keyword', - 'min_private' => 0, - 'include_beta_website' => true, - ]); + if ($scope === 'private') { + // Search only the user's personal corpus + if ($personalClientId > 0) { + $rag = new ClientRagPipeline($personalClientId, $gatewayUrl, 30); + $chunks = $rag->searchAll($query, $limit, null, [ + 'search_private' => true, + 'search_shared' => false, + 'chunk_limit' => $limit, + 'search_method' => 'keyword', + 'min_private' => 0, + ]); + } + } elseif ($scope === 'shared') { + // Search only the shared legal library + $rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30); + $chunks = $rag->searchAll($query, $limit, null, [ + 'search_private' => true, + 'search_shared' => true, + 'package_ids' => [(int)$package['id']], + 'chunk_limit' => $limit, + 'search_method' => 'keyword', + 'min_private' => 0, + 'include_beta_website' => true, + ]); + } else { + // 'both': shared library + personal corpus merged and re-ranked by score + $rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30); + $sharedChunks = $rag->searchAll($query, $limit, null, [ + 'search_private' => true, + 'search_shared' => true, + 'package_ids' => [(int)$package['id']], + 'chunk_limit' => $limit, + 'search_method' => 'keyword', + 'min_private' => 0, + 'include_beta_website' => true, + ]); + + $privateChunks = []; + if ($personalClientId > 0) { + try { + $ragPrivate = new ClientRagPipeline($personalClientId, $gatewayUrl, 30); + $privateChunks = $ragPrivate->searchAll($query, $limit, null, [ + 'search_private' => true, + 'search_shared' => false, + 'chunk_limit' => $limit, + 'search_method' => 'keyword', + 'min_private' => 0, + ]); + } catch (Throwable $e) { + error_log('[search] personal corpus query failed for client ' . $personalClientId . ': ' . $e->getMessage()); + } + } + + // Merge by score descending, cap at $limit + $merged = array_merge($sharedChunks, $privateChunks); + usort($merged, fn($a, $b) => ($b['score'] ?? 0) <=> ($a['score'] ?? 0)); + $chunks = array_slice($merged, 0, $limit); + } // Apply temporal reranking after retrieval (optional) if ($temporalMode === 'legal_conservative' && !empty($chunks)) { diff --git a/includes/layout_footer.php b/includes/layout_footer.php index ea28103..fddc882 100644 --- a/includes/layout_footer.php +++ b/includes/layout_footer.php @@ -26,5 +26,26 @@ + + + + +
+

Save to corpus

+

This will be indexed and searchable in your private corpus.

+ + + + + + +
+
diff --git a/includes/tool_form.php b/includes/tool_form.php index 7bc0277..5e7ed86 100644 --- a/includes/tool_form.php +++ b/includes/tool_form.php @@ -19,6 +19,13 @@ + +