diff --git a/api/redact.php b/api/redact.php index ae6acd5..4126347 100644 --- a/api/redact.php +++ b/api/redact.php @@ -5,8 +5,15 @@ require_once __DIR__ . '/../includes/LegalTools.php'; dbnToolsRequireMethod('POST'); dbnToolsRequireAuth(); -$ftUid = dbnToolsFreeTierCheck('redact'); + +// Determine engine and its credit cost before the pre-flight credit check $input = dbnToolsJsonInput(400000); +$_validEngines = ['azure_mini', 'azure_full']; +$_engine = in_array((string)($input['engine'] ?? ''), $_validEngines, true) + ? (string)$input['engine'] : 'azure_mini'; +$_engineCredits = $_engine === 'azure_full' ? 2 : 1; + +$ftUid = dbnToolsFreeTierCheckAmount('redact', $_engineCredits); dbnToolsWithChargedTelemetry('redact', '', $ftUid, function () use ($input): array { $text = dbnToolsInjectDocContent($input, dbnToolsString($input, 'text', 128000, false)); @@ -17,7 +24,7 @@ dbnToolsWithChargedTelemetry('redact', '', $ftUid, function () use ($input): arr $region = dbnToolsNormalizeRegion($input['region'] ?? 'nordic'); $language = dbnToolsNormalizeLanguage($input['language'] ?? 'en'); - $validEngines = ['azure_mini', 'azure_full', 'gpu', 'regex']; + $validEngines = ['azure_mini', 'azure_full']; $engine = in_array((string)($input['engine'] ?? ''), $validEngines, true) ? (string)$input['engine'] : 'azure_mini'; @@ -67,4 +74,4 @@ dbnToolsWithChargedTelemetry('redact', '', $ftUid, function () use ($input): arr $text, $mode, $region, $language, $aliases, $engine, $outputFormat, $keepOfficials, $exemptNames, $redactTypes ); -}); +}, $_engineCredits); diff --git a/api/save-to-corpus.php b/api/save-to-corpus.php index 9698200..c3d3c9d 100644 --- a/api/save-to-corpus.php +++ b/api/save-to-corpus.php @@ -35,6 +35,13 @@ $tags = json_encode( JSON_UNESCAPED_UNICODE ); +$rawSourceDocIds = $input['source_doc_ids'] ?? null; +$sourceDocIdArr = is_array($rawSourceDocIds) + ? $rawSourceDocIds + : (is_string($rawSourceDocIds) ? array_filter(array_map('trim', explode(',', $rawSourceDocIds))) : []); +$firstSourceDocId = (int)(reset($sourceDocIdArr) ?: 0); +$sourceUrl = $firstSourceDocId > 0 ? "corpus-doc:{$firstSourceDocId}" : null; + if ($title === '') { dbnToolsError('title is required.', 400, 'bad_request'); } @@ -71,10 +78,10 @@ $wordCount = str_word_count($content); $ins = $db->prepare(" INSERT INTO client_documents (client_id, corpus_id, title, source_type, content, category, - tags, import_method, source_tool, word_count, status) - VALUES (?, ?, ?, 'text', ?, 'tool-output', ?, 'tool_output', ?, ?, 'pending') + tags, import_method, source_tool, source_url, word_count, status) + VALUES (?, ?, ?, 'text', ?, 'tool-output', ?, 'tool_output', ?, ?, ?, 'pending') "); -$ins->execute([$clientId, $corpusId, $title, $content, $tags, $sourceTool, $wordCount]); +$ins->execute([$clientId, $corpusId, $title, $content, $tags, $sourceTool, $sourceUrl, $wordCount]); $docId = (int)$db->lastInsertId(); try { diff --git a/assets/css/doc-picker.css b/assets/css/doc-picker.css index 1ef3313..0690b3c 100644 --- a/assets/css/doc-picker.css +++ b/assets/css/doc-picker.css @@ -270,3 +270,23 @@ transition: border-color 0.15s, color 0.15s; } .audio-corpus-upload:hover { border-color: var(--dbn-accent, #00205B); color: var(--dbn-accent, #00205B); } + + +/* ── Redacted document badge ─────────────────────────────────────────────── */ + +.doc-item__badge { + display: inline-block; + margin-left: 0.45em; + padding: 0.1em 0.45em; + border-radius: 4px; + font-size: 0.72rem; + font-weight: 600; + vertical-align: middle; + line-height: 1.4; +} + +.doc-item__badge--redact { + background: #ede8f7; + color: #5b35a8; + border: 1px solid rgba(91, 53, 168, 0.2); +} diff --git a/assets/css/tools.css b/assets/css/tools.css index 0baae97..774a013 100644 --- a/assets/css/tools.css +++ b/assets/css/tools.css @@ -2266,6 +2266,38 @@ p { cursor: progress; } +@keyframes redact-spin { + to { transform: rotate(360deg); } +} + +.redact-working { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + gap: 12px; + padding: 32px 16px; + border: 1px solid var(--line); + border-radius: 8px; + background: #fbfcfe; +} + +.redact-working p { + margin: 0; + color: var(--muted); + font-size: 0.92rem; +} + +.redact-working__spinner { + display: block; + width: 22px; + height: 22px; + border: 3px solid var(--line); + border-top-color: var(--teal); + border-radius: 50%; + animation: redact-spin 0.9s linear infinite; +} + /* ── Feedback widget ──────────────────────────────────────────── */ .feedback-widget { margin-top: 1.5rem; diff --git a/assets/js/corpus-save.js b/assets/js/corpus-save.js index 92a9c52..8543f04 100644 --- a/assets/js/corpus-save.js +++ b/assets/js/corpus-save.js @@ -41,6 +41,7 @@ } function bodyFor(kind, payload) { + const sourceDocIds = (payload.sourceDocIds || '').split(',').map(s => s.trim()).filter(Boolean); if (window.DBN_DASHBOARD) { return JSON.stringify({ title: payload.title, @@ -48,6 +49,7 @@ source_tool: payload.tool || 'dashboard-save', tags: payload.tags, kind, + ...(sourceDocIds.length ? { source_doc_ids: sourceDocIds } : {}), }); } return JSON.stringify({ @@ -55,6 +57,7 @@ content: payload.content, source_tool: payload.tool || '', tags: payload.tags, + ...(sourceDocIds.length ? { source_doc_ids: sourceDocIds } : {}), }); } @@ -74,9 +77,10 @@ } _pendingBtn = btn; - dlg.dataset.pendingContent = content; - dlg.dataset.pendingTool = btn.dataset.tool || ''; - dlg.dataset.pendingKind = 'tool_output'; + dlg.dataset.pendingContent = content; + dlg.dataset.pendingTool = btn.dataset.tool || ''; + dlg.dataset.pendingKind = 'tool_output'; + dlg.dataset.pendingSourceDocIds = btn.dataset.sourceDocIds || ''; titleIn.value = btn.dataset.suggestedTitle || ''; tagsIn.value = ''; @@ -90,12 +94,13 @@ e.preventDefault(); dlg.close(); - const btn = _pendingBtn; - const content = dlg.dataset.pendingContent || ''; - const tool = dlg.dataset.pendingTool || ''; - const kind = dlg.dataset.pendingKind || 'tool_output'; - const title = titleIn.value.trim(); - const tags = tagsIn.value.trim(); + const btn = _pendingBtn; + const content = dlg.dataset.pendingContent || ''; + const tool = dlg.dataset.pendingTool || ''; + const kind = dlg.dataset.pendingKind || 'tool_output'; + const sourceDocIds = dlg.dataset.pendingSourceDocIds || ''; + const title = titleIn.value.trim(); + const tags = tagsIn.value.trim(); if (!title || !content) return; @@ -109,7 +114,7 @@ method: 'POST', credentials: 'same-origin', headers: { 'Content-Type': 'application/json' }, - body: bodyFor(kind, { title, content, tool, tags }), + body: bodyFor(kind, { title, content, tool, tags, sourceDocIds }), }); const data = await resp.json().catch(() => ({})); @@ -146,6 +151,7 @@ delete dlg.dataset.pendingContent; delete dlg.dataset.pendingTool; delete dlg.dataset.pendingKind; + delete dlg.dataset.pendingSourceDocIds; }); function showToast(msg, isError) { diff --git a/assets/js/doc-picker.js b/assets/js/doc-picker.js index e6ed2db..9853ed4 100644 --- a/assets/js/doc-picker.js +++ b/assets/js/doc-picker.js @@ -97,10 +97,13 @@ .toLocaleDateString(undefined, { dateStyle: 'medium' })); } catch (_) {} } + var redactBadge = doc.source_tool === 'redact' + ? '✂ Redacted' + : ''; return '
' + '' + '
' - + '
' + esc(doc.title || 'Untitled') + '
' + + '
' + esc(doc.title || 'Untitled') + redactBadge + '
' + (meta.length ? '
' + esc(meta.join(' · ')) + '
' : '') + '
' + '
'; diff --git a/assets/js/tools.js b/assets/js/tools.js index 7fed5a7..9eda1c6 100644 --- a/assets/js/tools.js +++ b/assets/js/tools.js @@ -8,9 +8,7 @@ const REDACT_I18N = { redactEngine: 'Engine', redactEngineAzureMini: 'Azure gpt-4o-mini', redactEngineAzureFull: 'Azure gpt-4o', - redactEngineGpu: 'GPU (cuttlefish)', - redactEngineRegex: 'Regex only', - redactEngineHint: 'Azure engines use your BNL Azure credits. GPU runs the local LiteLLM proxy. Regex-only is instant and free but finds no names or organisations.', + redactEngineHint: 'gpt-4o-mini: 1 credit — fast, handles most documents well. gpt-4o: 2 credits — higher accuracy for complex or multi-person cases.', redactMode: 'Mode', redactModeStandard: 'Standard', redactModeStrict: 'Strict', @@ -42,7 +40,7 @@ const REDACT_I18N = { redactAliasAdd: 'Add', redactAliasHint: 'Replace a specific name with a custom bracketed label, e.g. "David Jr" → [Junior].', redactUploadAria: 'File upload', - redactUploadDrop: 'Drop up to 5 files here, or', + redactUploadDrop: 'Drop one file here, or', redactUploadBrowse: 'browse', redactUploadHint: 'text extracted in memory, never stored', redactUploadClear: '× Clear', @@ -63,9 +61,7 @@ const REDACT_I18N = { redactEngine: 'Motor', redactEngineAzureMini: 'Azure gpt-4o-mini', redactEngineAzureFull: 'Azure gpt-4o', - redactEngineGpu: 'GPU (cuttlefish)', - redactEngineRegex: 'Kun regex', - redactEngineHint: 'Azure-motorer bruker BNL Azure-kreditter. GPU kjører lokal LiteLLM-proxy. Kun regex er øyeblikkelig og gratis, men finner ingen navn eller organisasjoner.', + redactEngineHint: 'gpt-4o-mini: 1 kreditt — rask, håndterer de fleste dokumenter godt. gpt-4o: 2 kreditter — høyere nøyaktighet for komplekse eller flerpersonssaker.', redactMode: 'Modus', redactModeStandard: 'Standard', redactModeStrict: 'Strikt', @@ -97,7 +93,7 @@ const REDACT_I18N = { redactAliasAdd: 'Legg til', redactAliasHint: 'Erstatt et spesifikt navn med en egendefinert merkelapp, f.eks. «David Jr» → [Junior].', redactUploadAria: 'Filopplasting', - redactUploadDrop: 'Slipp opptil 5 filer her, eller', + redactUploadDrop: 'Slipp én fil her, eller', redactUploadBrowse: 'bla', redactUploadHint: 'tekst hentes i minnet, lagres aldri', redactUploadClear: '× Tøm', @@ -118,9 +114,7 @@ const REDACT_I18N = { redactEngine: 'Рушій', redactEngineAzureMini: 'Azure gpt-4o-mini', redactEngineAzureFull: 'Azure gpt-4o', - redactEngineGpu: 'GPU (cuttlefish)', - redactEngineRegex: 'Лише регулярні вирази', - redactEngineHint: 'Рушії Azure використовують кредити BNL Azure. GPU запускає локальний проксі LiteLLM. Лише regex — миттєво і безкоштовно, але не знаходить імен або організацій.', + redactEngineHint: 'gpt-4o-mini: 1 кредит — швидко, добре обробляє більшість документів. gpt-4o: 2 кредити — вища точність для складних або багатоособових справ.', redactMode: 'Режим', redactModeStandard: 'Стандартний', redactModeStrict: 'Суворий', @@ -152,7 +146,7 @@ const REDACT_I18N = { redactAliasAdd: 'Додати', redactAliasHint: 'Замініть конкретне ім\'я на власну мітку, напр. «David Jr» → [Junior].', redactUploadAria: 'Завантаження файлів', - redactUploadDrop: 'Перетягніть до 5 файлів сюди, або', + redactUploadDrop: 'Перетягніть один файл сюди, або', redactUploadBrowse: 'огляд', redactUploadHint: 'текст обробляється в пам\'яті, ніколи не зберігається', redactUploadClear: '× Очистити', @@ -173,9 +167,7 @@ const REDACT_I18N = { redactEngine: 'Silnik', redactEngineAzureMini: 'Azure gpt-4o-mini', redactEngineAzureFull: 'Azure gpt-4o', - redactEngineGpu: 'GPU (cuttlefish)', - redactEngineRegex: 'Tylko regex', - redactEngineHint: 'Silniki Azure używają kredytów Azure BNL. GPU korzysta z lokalnego proxy LiteLLM. Tylko regex jest natychmiastowy i bezpłatny, ale nie znajdzie imion ani organizacji.', + redactEngineHint: 'gpt-4o-mini: 1 kredyt — szybko, dobrze radzi sobie z większością dokumentów. gpt-4o: 2 kredyty — wyższa dokładność dla złożonych lub wieloosobowych spraw.', redactMode: 'Tryb', redactModeStandard: 'Standardowy', redactModeStrict: 'Ścisły', @@ -207,7 +199,7 @@ const REDACT_I18N = { redactAliasAdd: 'Dodaj', redactAliasHint: 'Zastąp konkretną nazwę własną etykietą, np. «David Jr» → [Junior].', redactUploadAria: 'Przesyłanie pliku', - redactUploadDrop: 'Upuść do 5 plików tutaj lub', + redactUploadDrop: 'Upuść jeden plik tutaj lub', redactUploadBrowse: 'przeglądaj', redactUploadHint: 'tekst wyodrębniany w pamięci, nigdy nie przechowywany', redactUploadClear: '× Wyczyść', @@ -1126,6 +1118,9 @@ async function runTool(event) { lastToolPayload = { ...payload }; setBusy(true); + if (state.activeTool === 'redact') { + els.results.innerHTML = '

Redacting document…

'; + } renderTrace([ { label: 'Query interpretation', detail: 'Preparing request.', status: 'running' }, ]); @@ -1218,7 +1213,7 @@ function setupUpload() { async function handleFiles(fileList) { const allowed = ['pdf', 'docx', 'txt']; - const files = Array.from(fileList).slice(0, 5); + const files = Array.from(fileList).slice(0, 1); for (const file of files) { const ext = file.name.split('.').pop().toLowerCase(); @@ -1228,7 +1223,7 @@ async function handleFiles(fileList) { } } - els.status.textContent = files.length === 1 ? `Extracting ${files[0].name}…` : `Extracting ${files.length} files…`; + els.status.textContent = `Extracting ${files[0].name}…`; setBusy(true); const parts = []; @@ -1256,9 +1251,7 @@ async function handleFiles(fileList) { if (data.truncated) anyTruncated = true; } - const combined = parts.length === 1 - ? parts[0].text - : parts.map((p) => `--- Document: ${p.filename} ---\n\n${p.text}`).join('\n\n'); + const combined = parts[0].text; const MAX_COMBINED = 128000; const combinedTruncated = combined.length > MAX_COMBINED; @@ -1271,9 +1264,7 @@ async function handleFiles(fileList) { els.uploadFileInfo.classList.remove('is-hidden'); const truncNote = (anyTruncated || combinedTruncated) ? ' — truncated to 128 000 char limit' : ''; - els.status.textContent = parts.length === 1 - ? `Extracted ${totalChars.toLocaleString()} chars from ${parts[0].filename}${truncNote}.` - : `Extracted ${totalChars.toLocaleString()} chars total from ${parts.length} files${truncNote}.`; + els.status.textContent = `Extracted ${totalChars.toLocaleString()} chars from ${parts[0].filename}${truncNote}.`; } catch (err) { els.status.textContent = err.message; resetUpload(); @@ -1560,10 +1551,13 @@ function renderMainFinding(data) { ? `` : ''; + const sourceDocIds = lastToolPayload?.doc_ids?.join(',') || ''; + const suggestedTitle = `Redacted document — ${new Date().toLocaleDateString()}`; const dlRow = `
+
`; return `${viewToggle}
${highlightRedactedText(lastRedactedText)}
${inventoryHtml}${upgradeBtn}${dlRow}`; diff --git a/includes/LegalTools.php b/includes/LegalTools.php index f19e483..25acb3a 100644 --- a/includes/LegalTools.php +++ b/includes/LegalTools.php @@ -1120,7 +1120,7 @@ PROMPT; // Build officials note $officialsNote = ''; if ($keepOfficials) { - $officialsNote = "\n\nOFFICIALS — for persons identified as JUDGE, EXPERT_WITNESS, or CASEWORKER in an official capacity: do NOT replace their name with a plain bracket tag. Instead use the format [ROLE: Name], e.g. [JUDGE: Andersen] or [EXPERT_WITNESS: Dr. Larsen]. Their name must remain visible inside the tag."; + $officialsNote = "\n\nOFFICIALS — for persons identified as JUDGE, ATTORNEY, EXPERT_WITNESS, or CASEWORKER in an official capacity: do NOT replace their name with a plain bracket tag. Instead use the format [ROLE: Name], e.g. [JUDGE: Andersen], [ATTORNEY: Skretting] or [EXPERT_WITNESS: Dr. Larsen]. Their name must remain visible inside the tag."; } $allowedTypesNote = ''; @@ -1363,7 +1363,7 @@ PROMPT; private function applyGenericTags(string $text): string { // Collapse contextual role tags (e.g. [FATHER], [JUDGE: Andersen], [CHILD_1]) → [PERSON] - $text = preg_replace('/\[(?:FATHER|MOTHER|CHILD(?:_\d+)?|GRANDPARENT|SIBLING|ATTORNEY|JUDGE(?::\s*[^\]]+)?|CASEWORKER(?::\s*[^\]]+)?|EXPERT_WITNESS(?::\s*[^\]]+)?|PERSON(?:_\d+)?)\]/u', '[PERSON]', $text) ?? $text; + $text = preg_replace('/\[(?:FATHER|MOTHER|CHILD(?:_\d+)?|GRANDPARENT|SIBLING|ATTORNEY(?::\s*[^\]]+)?|JUDGE(?::\s*[^\]]+)?|CASEWORKER(?::\s*[^\]]+)?|EXPERT_WITNESS(?::\s*[^\]]+)?|PERSON(?:_\d+)?)\]/u', '[PERSON]', $text) ?? $text; return $text; } @@ -1382,7 +1382,7 @@ PROMPT; // Replace named role tags (keeping consistent mapping per unique tag) $text = preg_replace_callback( - '/\[(FATHER|MOTHER|CHILD(?:_\d+)?|GRANDPARENT|SIBLING|ATTORNEY|JUDGE(?::\s*[^\]]+)?|CASEWORKER(?::\s*[^\]]+)?|EXPERT_WITNESS(?::\s*[^\]]+)?|PERSON(?:_\d+)?)\]/u', + '/\[(FATHER|MOTHER|CHILD(?:_\d+)?|GRANDPARENT|SIBLING|ATTORNEY(?::\s*[^\]]+)?|JUDGE(?::\s*[^\]]+)?|CASEWORKER(?::\s*[^\]]+)?|EXPERT_WITNESS(?::\s*[^\]]+)?|PERSON(?:_\d+)?)\]/u', function (array $m) use (&$nameCursor, &$personMap, $norwegianNames): string { $key = $m[1]; if (!isset($personMap[$key])) { diff --git a/includes/PricingCatalog.php b/includes/PricingCatalog.php index 60bd70b..9169f06 100644 --- a/includes/PricingCatalog.php +++ b/includes/PricingCatalog.php @@ -133,7 +133,7 @@ final class PricingCatalog 'translate' => 1, 'korrespond_refine' => 1, 'timeline' => 2, - 'redact' => 2, + 'redact' => 1, // minimum (gpt-4o-mini); azure_full overrides to 2 in api/redact.php 'barnevernet' => 3, 'advocate' => 3, 'korrespond' => 3, diff --git a/redact.php b/redact.php index 338111e..e9d577f 100644 --- a/redact.php +++ b/redact.php @@ -19,10 +19,8 @@ require_once __DIR__ . '/includes/layout.php'; Engine - - -

Azure engines use your BNL Azure credits. GPU runs the local LiteLLM proxy. Regex-only is instant and free but finds no names or organisations.

+

gpt-4o-mini: 1 credit — fast, handles most documents well. gpt-4o: 2 credits — higher accuracy for complex or multi-person cases.

Advanced settings @@ -95,10 +93,10 @@ require_once __DIR__ . '/includes/layout.php';
- +
-

Drop up to 5 files here, or

+

Drop one file here, or

PDF, DOCX, TXTtext extracted in memory, never stored