feat(corpus): add save-to-corpus + private corpus search scope
- POST /api/save-to-corpus.php — saves tool output text to user's default CaveauAI corpus via ClientRagPipeline
- api/case/upload.php — dual-writes uploaded PDFs to CaveauAI client_documents (best-effort)
- assets/js/corpus-save.js — shared <dialog> handler for .js-save-corpus buttons on all tool pages
- includes/layout_footer.php — injects corpus-save.js + shared save dialog markup
- korrespond/deep-research/barnevernet/discrepancy JS — save-to-corpus buttons on output sections
- api/search.php + LegalTools::search() — corpus_scope param ('shared'|'private'|'both'), merges personal CaveauAI corpus with shared legal library when 'both'
- includes/tool_form.php + assets/js/tools.js — corpus scope radio toggle shown on search tab
- api/user-docs.php — add POST upload method for non-SSO authenticated users
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+43
-3
@@ -40,11 +40,51 @@ if (strncmp($head, '%PDF-', 5) !== 0) {
|
||||
try {
|
||||
$doc = CaseStore::registerUpload($userId, $name, $tmp, $size);
|
||||
CaseStore::caseEnqueueIngest((int)$doc['doc_id'], $userId);
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsError($e->getMessage(), 400, 'upload_failed');
|
||||
}
|
||||
|
||||
// Dual-write to CaveauAI corpus (best-effort — never fails the upload)
|
||||
$caveauDocId = null;
|
||||
$clientId = (int)($_SESSION['dbn_tools_client_id'] ?? 0);
|
||||
if ($clientId > 0 && !empty($doc['storage_path'])) {
|
||||
try {
|
||||
dbnToolsBootCaveau();
|
||||
$aiPortalRoot = dbnToolsAiPortalRoot();
|
||||
$textExtractFile = $aiPortalRoot . '/platform/includes/text_extract.php';
|
||||
if (is_file($textExtractFile)) {
|
||||
require_once $textExtractFile;
|
||||
$content = extractPdfText($doc['storage_path']);
|
||||
if ($content !== '' && strlen($content) > 30) {
|
||||
$caveauDb = getDb();
|
||||
$corpusSt = $caveauDb->prepare(
|
||||
'SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1'
|
||||
);
|
||||
$corpusSt->execute([$clientId]);
|
||||
$corpusId = (int)($corpusSt->fetchColumn() ?: 0);
|
||||
if ($corpusId > 0) {
|
||||
$title = pathinfo($doc['filename'], PATHINFO_FILENAME);
|
||||
$caveauDb->prepare("
|
||||
INSERT INTO client_documents
|
||||
(client_id, corpus_id, title, source_type, content, category,
|
||||
import_method, word_count, status)
|
||||
VALUES (?, ?, ?, 'pdf', ?, 'user-upload', 'dbn_upload', ?, 'pending')
|
||||
")->execute([$clientId, $corpusId, $title, $content, str_word_count($content)]);
|
||||
$caveauDocId = (int)$caveauDb->lastInsertId();
|
||||
$rag = new ClientRagPipeline($clientId);
|
||||
$rag->ingestDocument($caveauDocId);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
// Non-fatal: log and continue
|
||||
error_log('[upload] CaveauAI dual-write failed for doc ' . ($doc['doc_id'] ?? '?') . ': ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'doc_id' => $doc['doc_id'],
|
||||
'filename' => $doc['filename'],
|
||||
'caveau_doc_id' => $caveauDocId,
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsError($e->getMessage(), 400, 'upload_failed');
|
||||
}
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
<?php
|
||||
/**
|
||||
* POST /api/save-to-corpus.php
|
||||
*
|
||||
* Save tool output text into the user's CaveauAI corpus.
|
||||
* Uses dbnToolsBootCaveau() to call ClientRagPipeline directly via filesystem include.
|
||||
*
|
||||
* Request body (JSON, max 500 KB):
|
||||
* title string (required)
|
||||
* content string (required, min 30 chars)
|
||||
* source_tool string (optional, slug)
|
||||
* tags string (optional, comma-separated)
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once dirname(__DIR__) . '/includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
$user = dbnToolsAuthenticatedUser();
|
||||
$clientId = (int)($user['client_id'] ?? 0);
|
||||
if ($clientId <= 0) {
|
||||
dbnToolsError('No linked CaveauAI workspace. Log in via the CaveauAI portal first.', 403, 'no_workspace');
|
||||
}
|
||||
|
||||
$input = dbnToolsJsonInput(500_000);
|
||||
$title = trim($input['title'] ?? '');
|
||||
$content = trim($input['content'] ?? '');
|
||||
$sourceTool = substr(preg_replace('/[^a-z0-9\-_]/', '', strtolower($input['source_tool'] ?? '')), 0, 64) ?: null;
|
||||
$rawTags = trim($input['tags'] ?? '');
|
||||
$tags = json_encode(
|
||||
array_values(array_filter(array_map('trim', explode(',', $rawTags)))),
|
||||
JSON_UNESCAPED_UNICODE
|
||||
);
|
||||
|
||||
if ($title === '') {
|
||||
dbnToolsError('title is required.', 400, 'bad_request');
|
||||
}
|
||||
if (strlen($content) < 30) {
|
||||
dbnToolsError('content too short (min 30 chars).', 400, 'bad_request');
|
||||
}
|
||||
if (strlen($content) > 2_000_000) {
|
||||
dbnToolsError('content exceeds 2 MB limit.', 400, 'too_large');
|
||||
}
|
||||
|
||||
// Load CaveauAI platform (getDb, ClientRagPipeline, etc.)
|
||||
dbnToolsBootCaveau();
|
||||
|
||||
try {
|
||||
$db = getDb();
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsError('CaveauAI database unavailable: ' . $e->getMessage(), 503, 'db_unavailable');
|
||||
}
|
||||
|
||||
// Resolve default corpus for this client
|
||||
$stmt = $db->prepare('SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1');
|
||||
$stmt->execute([$clientId]);
|
||||
$corpusId = (int)($stmt->fetchColumn() ?: 0);
|
||||
if ($corpusId === 0) {
|
||||
dbnToolsError(
|
||||
'No default corpus found for your account. Set one up in the CaveauAI portal.',
|
||||
409,
|
||||
'no_corpus'
|
||||
);
|
||||
}
|
||||
|
||||
$wordCount = str_word_count($content);
|
||||
|
||||
$ins = $db->prepare("
|
||||
INSERT INTO client_documents
|
||||
(client_id, corpus_id, title, source_type, content, category,
|
||||
tags, import_method, source_tool, word_count, status)
|
||||
VALUES (?, ?, ?, 'text', ?, 'tool-output', ?, 'tool_output', ?, ?, 'pending')
|
||||
");
|
||||
$ins->execute([$clientId, $corpusId, $title, $content, $tags, $sourceTool, $wordCount]);
|
||||
$docId = (int)$db->lastInsertId();
|
||||
|
||||
try {
|
||||
$rag = new ClientRagPipeline($clientId);
|
||||
$chunks = $rag->ingestDocument($docId);
|
||||
} catch (Throwable $e) {
|
||||
// Document is saved but not indexed — mark error and return partial success
|
||||
$db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?")
|
||||
->execute([$e->getMessage(), $docId]);
|
||||
dbnToolsError(
|
||||
'Saved to corpus but indexing failed: ' . $e->getMessage(),
|
||||
500,
|
||||
'index_failed',
|
||||
['document_id' => $docId]
|
||||
);
|
||||
}
|
||||
|
||||
dbnToolsRespond(['ok' => true, 'document_id' => $docId, 'chunks' => $chunks], 201);
|
||||
+4
-1
@@ -17,5 +17,8 @@ dbnToolsWithTelemetry('search', $language, function () use ($input, $language):
|
||||
$asOfDate = isset($input['as_of_date']) && preg_match('/^\d{4}(-\d{2}(-\d{2})?)?$/', $input['as_of_date'])
|
||||
? $input['as_of_date']
|
||||
: null;
|
||||
return (new DbnLegalToolsService())->search($query, $language, $limit, $temporalMode, $asOfDate);
|
||||
$scope = in_array($input['corpus_scope'] ?? '', ['shared', 'private', 'both'], true)
|
||||
? $input['corpus_scope']
|
||||
: 'both';
|
||||
return (new DbnLegalToolsService())->search($query, $language, $limit, $temporalMode, $asOfDate, $scope);
|
||||
});
|
||||
|
||||
+56
-14
@@ -2,16 +2,18 @@
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* GET /api/user-docs.php — list SSO user's uploaded documents
|
||||
* GET /api/user-docs.php — list uploaded documents for current user
|
||||
* DELETE /api/user-docs.php?id=X — remove a document
|
||||
* POST /api/user-docs.php — upload a document (file field = 'file')
|
||||
*
|
||||
* Only available for SSO users (dbn_tools_sso_uid set in session).
|
||||
* Reads from the shared dobetternorge.dbn_user_docs table, keyed by sso_uid.
|
||||
* SSO users (dbn_tools_sso_uid) are keyed by their SSO uid.
|
||||
* Other authenticated users are keyed by session_id() as a fallback.
|
||||
* Reads/writes the shared dobetternorge.dbn_user_docs table.
|
||||
* Requires DBN_DB_* env vars pointing at the dobetternorge database.
|
||||
*/
|
||||
require_once __DIR__ . '/../includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('GET', 'DELETE');
|
||||
dbnToolsRequireMethod('GET', 'DELETE', 'POST');
|
||||
|
||||
if (!dbnToolsIsAuthenticated()) {
|
||||
http_response_code(401);
|
||||
@@ -20,13 +22,9 @@ if (!dbnToolsIsAuthenticated()) {
|
||||
exit;
|
||||
}
|
||||
|
||||
// Only SSO users have shared docs
|
||||
// SSO uid for SSO users; session id as stable key for client sessions
|
||||
$ssoUid = (string)($_SESSION['dbn_tools_sso_uid'] ?? '');
|
||||
if ($ssoUid === '') {
|
||||
header('Content-Type: application/json');
|
||||
echo json_encode(['ok' => true, 'docs' => [], 'reason' => 'sso_only']);
|
||||
exit;
|
||||
}
|
||||
$userKey = $ssoUid !== '' ? $ssoUid : 'sess_' . session_id();
|
||||
|
||||
header('Content-Type: application/json; charset=utf-8');
|
||||
|
||||
@@ -51,6 +49,50 @@ function dbnSharedDb(): ?PDO
|
||||
|
||||
$method = $_SERVER['REQUEST_METHOD'];
|
||||
|
||||
// ── POST — upload a document ──────────────────────────────────────────────────
|
||||
if ($method === 'POST') {
|
||||
if (empty($_FILES['file']) || !is_array($_FILES['file'])) {
|
||||
http_response_code(422);
|
||||
echo json_encode(['ok' => false, 'error' => 'No file uploaded.']);
|
||||
exit;
|
||||
}
|
||||
|
||||
try {
|
||||
$extracted = dbnToolsExtractUploadedFile($_FILES['file']);
|
||||
} catch (Throwable $e) {
|
||||
http_response_code(422);
|
||||
echo json_encode(['ok' => false, 'error' => $e->getMessage()]);
|
||||
exit;
|
||||
}
|
||||
|
||||
$docId = uniqid('wbd_', true);
|
||||
$filename = basename((string)($_FILES['file']['name'] ?? 'document'));
|
||||
$fileType = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
|
||||
$chunks = isset($extracted['text']) ? max(1, (int)ceil(mb_strlen($extracted['text']) / 1000)) : 0;
|
||||
$now = gmdate('Y-m-d H:i:s');
|
||||
|
||||
$db = dbnSharedDb();
|
||||
if ($db) {
|
||||
$db->prepare(
|
||||
'INSERT INTO dbn_user_docs (id, user_id, filename, file_type, chunk_count, source, status, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)'
|
||||
)->execute([$docId, $userKey, $filename, $fileType, $chunks, 'workbench', 'ready', $now]);
|
||||
}
|
||||
|
||||
echo json_encode([
|
||||
'ok' => true,
|
||||
'doc' => [
|
||||
'doc_id' => $docId,
|
||||
'filename' => $filename,
|
||||
'file_type' => $fileType,
|
||||
'chunk_count' => $chunks,
|
||||
'source' => 'workbench',
|
||||
'created_at' => $now,
|
||||
],
|
||||
]);
|
||||
exit;
|
||||
}
|
||||
|
||||
// ── DELETE ────────────────────────────────────────────────────────────────────
|
||||
if ($method === 'DELETE') {
|
||||
$docId = trim($_GET['id'] ?? '');
|
||||
@@ -63,10 +105,10 @@ if ($method === 'DELETE') {
|
||||
$db = dbnSharedDb();
|
||||
if ($db) {
|
||||
$stmt = $db->prepare('SELECT id FROM dbn_user_docs WHERE id = ? AND user_id = ?');
|
||||
$stmt->execute([$docId, $ssoUid]);
|
||||
$stmt->execute([$docId, $userKey]);
|
||||
if ($stmt->fetch()) {
|
||||
$db->prepare('DELETE FROM dbn_user_docs WHERE id = ? AND user_id = ?')
|
||||
->execute([$docId, $ssoUid]);
|
||||
->execute([$docId, $userKey]);
|
||||
|
||||
// Delete Qdrant points for this doc
|
||||
$qdrantUrl = 'http://10.0.2.10:6333';
|
||||
@@ -74,7 +116,7 @@ if ($method === 'DELETE') {
|
||||
'filter' => [
|
||||
'must' => [
|
||||
['key' => 'doc_id', 'match' => ['value' => $docId]],
|
||||
['key' => 'user_id', 'match' => ['value' => $ssoUid]],
|
||||
['key' => 'user_id', 'match' => ['value' => $userKey]],
|
||||
],
|
||||
],
|
||||
];
|
||||
@@ -108,7 +150,7 @@ $stmt = $db->prepare(
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 50'
|
||||
);
|
||||
$stmt->execute([$ssoUid, 'ready']);
|
||||
$stmt->execute([$userKey, 'ready']);
|
||||
$rows = $stmt->fetchAll();
|
||||
|
||||
$docs = array_map(static fn($r) => [
|
||||
|
||||
@@ -813,6 +813,21 @@
|
||||
els.results.appendChild(finalContainer.firstChild);
|
||||
}
|
||||
|
||||
// Save-to-corpus button
|
||||
const briefEl = els.results.querySelector('.dr-brief');
|
||||
if (briefEl) {
|
||||
briefEl.id = 'bvjBriefText';
|
||||
const saveBtn = document.createElement('button');
|
||||
saveBtn.type = 'button';
|
||||
saveBtn.className = 'js-save-corpus secondary-button';
|
||||
saveBtn.dataset.tool = 'barnevernet';
|
||||
saveBtn.dataset.contentId = 'bvjBriefText';
|
||||
saveBtn.dataset.suggestedTitle = 'BVJ analyse: ' + (document.getElementById('bvjQuestion')?.value?.slice(0, 80) ?? 'Svar');
|
||||
saveBtn.textContent = 'Save to corpus';
|
||||
saveBtn.style.marginTop = '12px';
|
||||
briefEl.insertAdjacentElement('afterend', saveBtn);
|
||||
}
|
||||
|
||||
// Bind source card clicks
|
||||
els.results.querySelectorAll('.dr-source-card[data-source-n]').forEach((node) => {
|
||||
node.addEventListener('click', (e) => {
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
* corpus-save.js — "Save to corpus" shared handler for all DBN tool pages.
|
||||
*
|
||||
* Buttons that trigger a save must have:
|
||||
* class="js-save-corpus"
|
||||
* data-content-id="<id of element containing the text to save>"
|
||||
* data-tool="<source_tool slug, e.g. korrespond>"
|
||||
* data-suggested-title="<pre-filled title string>" (optional)
|
||||
*/
|
||||
|
||||
(function () {
|
||||
'use strict';
|
||||
|
||||
const dlg = document.getElementById('save-corpus-dialog');
|
||||
const form = document.getElementById('save-corpus-form');
|
||||
const titleIn = document.getElementById('save-corpus-title');
|
||||
const tagsIn = document.getElementById('save-corpus-tags');
|
||||
const cancelBtn = document.getElementById('save-corpus-cancel');
|
||||
|
||||
if (!dlg || !form) return; // dialog not present (e.g. not logged in)
|
||||
|
||||
cancelBtn?.addEventListener('click', () => dlg.close());
|
||||
|
||||
let _pendingBtn = null;
|
||||
let _pendingContent = '';
|
||||
let _pendingTool = '';
|
||||
|
||||
// Delegated click — catches buttons added dynamically by tool JS
|
||||
document.addEventListener('click', (e) => {
|
||||
const btn = e.target.closest('.js-save-corpus');
|
||||
if (!btn) return;
|
||||
|
||||
const contentId = btn.dataset.contentId;
|
||||
const el = contentId ? document.getElementById(contentId) : null;
|
||||
const content = (el ? (el.value ?? el.textContent) : '').trim();
|
||||
|
||||
if (!content || content.length < 30) {
|
||||
btn.textContent = 'Nothing to save';
|
||||
setTimeout(() => { btn.textContent = 'Save to corpus'; }, 2000);
|
||||
return;
|
||||
}
|
||||
|
||||
_pendingBtn = btn;
|
||||
_pendingContent = content;
|
||||
_pendingTool = btn.dataset.tool ?? '';
|
||||
|
||||
titleIn.value = btn.dataset.suggestedTitle ?? '';
|
||||
tagsIn.value = '';
|
||||
dlg.showModal();
|
||||
titleIn.focus();
|
||||
titleIn.select();
|
||||
});
|
||||
|
||||
// Form submit inside dialog
|
||||
form.addEventListener('submit', async (e) => {
|
||||
e.preventDefault();
|
||||
dlg.close();
|
||||
|
||||
const btn = _pendingBtn;
|
||||
const content = _pendingContent;
|
||||
const title = titleIn.value.trim();
|
||||
const tags = tagsIn.value.trim();
|
||||
const tool = _pendingTool;
|
||||
|
||||
if (!title || !content) return;
|
||||
|
||||
if (btn) {
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Saving…';
|
||||
}
|
||||
|
||||
try {
|
||||
const resp = await fetch('api/save-to-corpus.php', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ title, content, source_tool: tool, tags }),
|
||||
});
|
||||
|
||||
const data = await resp.json().catch(() => ({}));
|
||||
|
||||
if (resp.ok && data.ok) {
|
||||
if (btn) {
|
||||
btn.textContent = '✓ Saved to corpus';
|
||||
btn.classList.add('js-save-corpus--saved');
|
||||
}
|
||||
} else {
|
||||
const msg = data.error ?? `Error ${resp.status}`;
|
||||
if (btn) {
|
||||
btn.textContent = 'Save failed';
|
||||
btn.disabled = false;
|
||||
btn.title = msg;
|
||||
}
|
||||
console.error('[corpus-save] Save failed:', msg);
|
||||
}
|
||||
} catch (err) {
|
||||
if (btn) {
|
||||
btn.textContent = 'Network error';
|
||||
btn.disabled = false;
|
||||
}
|
||||
console.error('[corpus-save] Network error:', err);
|
||||
}
|
||||
|
||||
_pendingBtn = null;
|
||||
_pendingContent = '';
|
||||
});
|
||||
}());
|
||||
@@ -563,6 +563,21 @@
|
||||
${nextHtml}
|
||||
`;
|
||||
|
||||
// Save-to-corpus button (inject after brief block)
|
||||
const briefEl = els.results.querySelector('.dr-brief');
|
||||
if (briefEl) {
|
||||
briefEl.id = 'drBriefText';
|
||||
const saveBtn = document.createElement('button');
|
||||
saveBtn.type = 'button';
|
||||
saveBtn.className = 'js-save-corpus secondary-button';
|
||||
saveBtn.dataset.tool = 'deep-research';
|
||||
saveBtn.dataset.contentId = 'drBriefText';
|
||||
saveBtn.dataset.suggestedTitle = 'Research: ' + (document.getElementById('drQuery')?.value?.slice(0, 80) ?? 'Report');
|
||||
saveBtn.textContent = 'Save to corpus';
|
||||
saveBtn.style.marginTop = '12px';
|
||||
briefEl.insertAdjacentElement('afterend', saveBtn);
|
||||
}
|
||||
|
||||
// Bind source-card click handlers (open modal) — but ignore clicks on inner <a>
|
||||
els.results.querySelectorAll('.dr-source-card[data-source-n]').forEach((node) => {
|
||||
node.addEventListener('click', (e) => {
|
||||
|
||||
@@ -551,6 +551,17 @@
|
||||
els.results.appendChild(finalContainer.firstChild);
|
||||
}
|
||||
|
||||
// Save-to-corpus button (appended after final results)
|
||||
const saveBtn = document.createElement('button');
|
||||
saveBtn.type = 'button';
|
||||
saveBtn.className = 'js-save-corpus secondary-button';
|
||||
saveBtn.dataset.tool = 'discrepancy';
|
||||
saveBtn.dataset.contentId = 'dcResults';
|
||||
saveBtn.dataset.suggestedTitle = 'Discrepancy report';
|
||||
saveBtn.textContent = 'Save to corpus';
|
||||
saveBtn.style.marginTop = '16px';
|
||||
els.results.appendChild(saveBtn);
|
||||
|
||||
// Bind tabs
|
||||
els.results.querySelectorAll('.dc-tab').forEach((btn) => {
|
||||
btn.addEventListener('click', () => {
|
||||
|
||||
@@ -543,6 +543,12 @@
|
||||
</div>
|
||||
</div>
|
||||
<pre class="korr-draft-body" id="korrDraftNo">${esc(draftNo)}</pre>
|
||||
<button type="button" class="js-save-corpus secondary-button"
|
||||
data-tool="korrespond"
|
||||
data-content-id="korrDraftNo"
|
||||
data-suggested-title="${esc((data.output_type || 'Brev') + ' — ' + (data.recipient_body || ''))}">
|
||||
Save to corpus
|
||||
</button>
|
||||
</div>
|
||||
${isSameLang ? '' : `
|
||||
<div class="korr-draft-col">
|
||||
@@ -554,6 +560,12 @@
|
||||
</div>
|
||||
</div>
|
||||
<pre class="korr-draft-body" id="korrDraftUser">${esc(draftUser)}</pre>
|
||||
<button type="button" class="js-save-corpus secondary-button"
|
||||
data-tool="korrespond"
|
||||
data-content-id="korrDraftUser"
|
||||
data-suggested-title="${esc((data.output_type || 'Brev') + ' — ' + (data.recipient_body || '') + ' (translation)')}">
|
||||
Save to corpus
|
||||
</button>
|
||||
</div>`}
|
||||
</div>
|
||||
|
||||
|
||||
@@ -910,6 +910,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
uploadFileList: document.querySelector('#uploadFileList'),
|
||||
uploadClear: document.querySelector('#uploadClear'),
|
||||
aliasSection: document.querySelector('#aliasSection'),
|
||||
corpusScopeControl: document.querySelector('#corpusScopeControl'),
|
||||
addAliasRow: document.querySelector('#addAliasRow'),
|
||||
aliasRows: document.querySelector('#aliasRows'),
|
||||
audioZone: document.querySelector('#audioZone'),
|
||||
@@ -1013,6 +1014,7 @@ function setTool(toolName) {
|
||||
els.input.placeholder = tool.placeholder;
|
||||
}
|
||||
els.languageControl.classList.toggle('is-hidden', !tool.usesLanguage);
|
||||
els.corpusScopeControl?.classList.toggle('is-hidden', toolName !== 'search');
|
||||
els.redactionControl.classList.toggle('is-hidden', toolName !== 'redact');
|
||||
els.uploadZone.classList.toggle('is-hidden', toolName !== 'redact' && toolName !== 'timeline');
|
||||
els.aliasSection.classList.toggle('is-hidden', toolName !== 'redact');
|
||||
@@ -1080,6 +1082,7 @@ async function runTool(event) {
|
||||
}
|
||||
if (state.activeTool === 'search') {
|
||||
payload.limit = 7;
|
||||
payload.corpus_scope = currentCorpusScope();
|
||||
}
|
||||
if (state.activeTool === 'redact') {
|
||||
lastOriginalText = text;
|
||||
@@ -1329,6 +1332,10 @@ function currentLanguage() {
|
||||
return document.querySelector('input[name="language"]:checked')?.value || 'en';
|
||||
}
|
||||
|
||||
function currentCorpusScope() {
|
||||
return document.querySelector('input[name="corpusScope"]:checked')?.value || 'both';
|
||||
}
|
||||
|
||||
function currentRedactionMode() {
|
||||
return document.querySelector('input[name="redactionMode"]:checked')?.value || 'standard';
|
||||
}
|
||||
|
||||
+61
-3
@@ -20,7 +20,8 @@ final class DbnLegalToolsService
|
||||
string $language = 'en',
|
||||
int $limit = 6,
|
||||
string $temporalMode = 'disabled',
|
||||
?string $asOfDate = null
|
||||
?string $asOfDate = null,
|
||||
string $scope = 'both'
|
||||
): array {
|
||||
$query = trim($query);
|
||||
if (mb_strlen($query, 'UTF-8') < 3) {
|
||||
@@ -28,15 +29,24 @@ final class DbnLegalToolsService
|
||||
}
|
||||
$limit = max(1, min(10, $limit));
|
||||
$temporalMode = in_array($temporalMode, ['legal_conservative', 'disabled'], true) ? $temporalMode : 'disabled';
|
||||
$scope = in_array($scope, ['shared', 'private', 'both'], true) ? $scope : 'both';
|
||||
|
||||
$scopeLabel = match ($scope) {
|
||||
'private' => 'personal corpus only',
|
||||
'shared' => 'Legal Library only',
|
||||
default => 'Legal Library + personal corpus',
|
||||
};
|
||||
$trace = [
|
||||
$this->trace('Query interpretation', 'Searching Do Better Norge private corpus plus the subscribed family-legal package.', 'complete'),
|
||||
$this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode, private corpus enabled, shared package filter set to family-legal.', 'running'),
|
||||
$this->trace('Query interpretation', "Searching Do Better Norge {$scopeLabel}.", 'complete'),
|
||||
$this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode.', 'running'),
|
||||
];
|
||||
|
||||
$client = dbnToolsRequireClient();
|
||||
$package = $this->requireFamilyPackage((int)$client['id']);
|
||||
|
||||
// Personal corpus client_id from session (may be 0 if user has no linked workspace)
|
||||
$personalClientId = (int)($_SESSION['dbn_tools_client_id'] ?? 0);
|
||||
|
||||
$chunks = [];
|
||||
$retrievalNote = 'ClientRagPipeline keyword retrieval';
|
||||
try {
|
||||
@@ -52,6 +62,20 @@ final class DbnLegalToolsService
|
||||
// Retrieval still works in keyword mode without gateway config.
|
||||
}
|
||||
|
||||
if ($scope === 'private') {
|
||||
// Search only the user's personal corpus
|
||||
if ($personalClientId > 0) {
|
||||
$rag = new ClientRagPipeline($personalClientId, $gatewayUrl, 30);
|
||||
$chunks = $rag->searchAll($query, $limit, null, [
|
||||
'search_private' => true,
|
||||
'search_shared' => false,
|
||||
'chunk_limit' => $limit,
|
||||
'search_method' => 'keyword',
|
||||
'min_private' => 0,
|
||||
]);
|
||||
}
|
||||
} elseif ($scope === 'shared') {
|
||||
// Search only the shared legal library
|
||||
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
|
||||
$chunks = $rag->searchAll($query, $limit, null, [
|
||||
'search_private' => true,
|
||||
@@ -62,6 +86,40 @@ final class DbnLegalToolsService
|
||||
'min_private' => 0,
|
||||
'include_beta_website' => true,
|
||||
]);
|
||||
} else {
|
||||
// 'both': shared library + personal corpus merged and re-ranked by score
|
||||
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
|
||||
$sharedChunks = $rag->searchAll($query, $limit, null, [
|
||||
'search_private' => true,
|
||||
'search_shared' => true,
|
||||
'package_ids' => [(int)$package['id']],
|
||||
'chunk_limit' => $limit,
|
||||
'search_method' => 'keyword',
|
||||
'min_private' => 0,
|
||||
'include_beta_website' => true,
|
||||
]);
|
||||
|
||||
$privateChunks = [];
|
||||
if ($personalClientId > 0) {
|
||||
try {
|
||||
$ragPrivate = new ClientRagPipeline($personalClientId, $gatewayUrl, 30);
|
||||
$privateChunks = $ragPrivate->searchAll($query, $limit, null, [
|
||||
'search_private' => true,
|
||||
'search_shared' => false,
|
||||
'chunk_limit' => $limit,
|
||||
'search_method' => 'keyword',
|
||||
'min_private' => 0,
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
error_log('[search] personal corpus query failed for client ' . $personalClientId . ': ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// Merge by score descending, cap at $limit
|
||||
$merged = array_merge($sharedChunks, $privateChunks);
|
||||
usort($merged, fn($a, $b) => ($b['score'] ?? 0) <=> ($a['score'] ?? 0));
|
||||
$chunks = array_slice($merged, 0, $limit);
|
||||
}
|
||||
|
||||
// Apply temporal reranking after retrieval (optional)
|
||||
if ($temporalMode === 'legal_conservative' && !empty($chunks)) {
|
||||
|
||||
@@ -26,5 +26,26 @@
|
||||
<?php if (!empty($extraScripts) && is_array($extraScripts)): foreach ($extraScripts as $extraScript): ?>
|
||||
<script src="<?= htmlspecialchars((string)$extraScript) ?>" defer></script>
|
||||
<?php endforeach; endif; ?>
|
||||
<script src="assets/js/corpus-save.js" defer></script>
|
||||
|
||||
<!-- Save-to-corpus dialog (shared across all tool pages) -->
|
||||
<dialog id="save-corpus-dialog" class="save-corpus-dialog">
|
||||
<form method="dialog" id="save-corpus-form">
|
||||
<h3>Save to corpus</h3>
|
||||
<p class="save-corpus-hint">This will be indexed and searchable in your private corpus.</p>
|
||||
<label>
|
||||
<span>Title <span aria-hidden="true">*</span></span>
|
||||
<input id="save-corpus-title" type="text" required placeholder="Give this entry a title…" autocomplete="off">
|
||||
</label>
|
||||
<label>
|
||||
<span>Tags <span class="save-corpus-optional">(comma-separated)</span></span>
|
||||
<input id="save-corpus-tags" type="text" placeholder="e.g. barnevern, 2024, kjennelse">
|
||||
</label>
|
||||
<menu>
|
||||
<button type="submit" class="btn-primary">Save to corpus</button>
|
||||
<button type="button" id="save-corpus-cancel">Cancel</button>
|
||||
</menu>
|
||||
</form>
|
||||
</dialog>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -19,6 +19,13 @@
|
||||
<input type="number" id="numSpeakersInput" name="num_speakers" min="2" max="10" placeholder="auto" class="num-speakers-input" aria-label="Expected speaker count">
|
||||
</div>
|
||||
|
||||
<div class="control-row is-hidden" id="corpusScopeControl">
|
||||
<span class="control-label">Search</span>
|
||||
<label><input type="radio" name="corpusScope" value="both" checked> Legal Library + My Docs</label>
|
||||
<label><input type="radio" name="corpusScope" value="shared"> Legal Library only</label>
|
||||
<label><input type="radio" name="corpusScope" value="private"> My Docs only</label>
|
||||
</div>
|
||||
|
||||
<div class="control-row is-hidden" id="redactionControl">
|
||||
<span class="control-label">Mode</span>
|
||||
<label><input type="radio" name="redactionMode" value="standard" checked> Standard</label>
|
||||
|
||||
Reference in New Issue
Block a user