Redact: collapsible advanced settings, download TXT/DOCX/copy

- Wrap Mode/Region/Entities/Officials/Output/Exempt/Aliases in a
  <details> toggle so the form opens clean with only engine + input visible
- After redaction: Copy, Download .txt, Download .docx buttons appear
  below the redacted output (all four languages translated)
- New api/redact-download.php: returns plain text or a minimal valid
  DOCX built from scratch with ZipArchive (no external dependencies)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-15 00:33:50 +02:00
parent 8c12d5e778
commit 30915bcb09
4 changed files with 321 additions and 52 deletions
+129
View File
@@ -0,0 +1,129 @@
<?php
declare(strict_types=1);
require_once __DIR__ . '/../includes/LegalTools.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
$input = dbnToolsJsonInput(600000);
$text = dbnToolsString($input, 'text', 500000);
$format = in_array((string)($input['format'] ?? ''), ['txt', 'docx'], true)
? (string)$input['format']
: 'txt';
if ($format === 'txt') {
header('Content-Type: text/plain; charset=UTF-8');
header('Content-Disposition: attachment; filename="redacted.txt"');
header('Cache-Control: no-store');
echo $text;
exit;
}
// DOCX: minimal valid Office Open XML package built with ZipArchive
if (!class_exists('ZipArchive')) {
http_response_code(500);
echo json_encode(['error' => ['message' => 'ZipArchive extension not available.']]);
exit;
}
$docx = buildMinimalDocx($text);
header('Content-Type: application/vnd.openxmlformats-officedocument.wordprocessingml.document');
header('Content-Disposition: attachment; filename="redacted.docx"');
header('Content-Length: ' . strlen($docx));
header('Cache-Control: no-store');
echo $docx;
exit;
function buildMinimalDocx(string $text): string
{
$tmp = tempnam(sys_get_temp_dir(), 'dbn_docx_');
@unlink($tmp);
$tmp .= '.docx';
$zip = new ZipArchive();
$zip->open($tmp, ZipArchive::CREATE | ZipArchive::OVERWRITE);
$zip->addFromString('[Content_Types].xml', contentTypesXml());
$zip->addFromString('_rels/.rels', relsXml());
$zip->addFromString('word/document.xml', documentXml($text));
$zip->addFromString('word/_rels/document.xml.rels', wordRelsXml());
$zip->addFromString('docProps/app.xml', appXml());
$zip->addFromString('docProps/core.xml', coreXml());
$zip->close();
$bytes = file_get_contents($tmp);
@unlink($tmp);
return $bytes;
}
function documentXml(string $text): string
{
$lines = explode("\n", str_replace("\r\n", "\n", str_replace("\r", "\n", $text)));
$paras = [];
foreach ($lines as $line) {
$safe = htmlspecialchars($line, ENT_XML1 | ENT_COMPAT, 'UTF-8');
if ($safe === '') {
$paras[] = '<w:p/>';
} else {
$paras[] = '<w:p><w:r><w:rPr><w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/><w:sz w:val="20"/></w:rPr>'
. '<w:t xml:space="preserve">' . $safe . '</w:t></w:r></w:p>';
}
}
return '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
. '<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
. '<w:body>' . implode('', $paras)
. '<w:sectPr><w:pgSz w:w="12240" w:h="15840"/>'
. '<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440"/>'
. '</w:sectPr></w:body></w:document>';
}
function contentTypesXml(): string
{
return '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
. '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
. '<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
. '<Default Extension="xml" ContentType="application/xml"/>'
. '<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
. '<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>'
. '<Override PartName="/docProps/core.xml" ContentType="application/package/2006/metadata/core-properties+xml"/>'
. '</Types>';
}
function relsXml(): string
{
return '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
. '<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
. '<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>'
. '<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>'
. '<Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>'
. '</Relationships>';
}
function wordRelsXml(): string
{
return '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
. '<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"/>';
}
function appXml(): string
{
return '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
. '<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties">'
. '<Application>DoBetterNorge Redact</Application>'
. '</Properties>';
}
function coreXml(): string
{
$date = date('Y-m-d\TH:i:s\Z');
return '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
. '<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"'
. ' xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/"'
. ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">'
. '<dc:creator>DoBetterNorge</dc:creator>'
. '<dcterms:created xsi:type="dcterms:W3CDTF">' . $date . '</dcterms:created>'
. '</cp:coreProperties>';
}
+68
View File
@@ -1470,3 +1470,71 @@ p {
accent-color: var(--teal);
cursor: pointer;
}
/* ─── Advanced settings panel (Redact tool) ───────────────────────────────── */
.advanced-panel {
border-top: 1px solid var(--line);
margin-top: 0.6rem;
}
.advanced-toggle {
display: flex;
align-items: center;
gap: 0.4rem;
padding: 0.55rem 0;
font-size: 0.82rem;
font-weight: 500;
color: var(--teal);
cursor: pointer;
user-select: none;
list-style: none;
}
.advanced-toggle::-webkit-details-marker { display: none; }
.advanced-toggle::before {
content: '▸';
font-size: 0.7rem;
transition: transform 0.18s ease;
display: inline-block;
}
.advanced-panel[open] .advanced-toggle::before {
transform: rotate(90deg);
}
.advanced-panel[open] .advanced-toggle {
margin-bottom: 0.5rem;
}
/* ─── Redact download buttons ─────────────────────────────────────────────── */
.redact-downloads {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
margin-top: 1rem;
}
.redact-dl-btn {
padding: 0.38rem 0.9rem;
border-radius: 6px;
font-size: 0.82rem;
font-weight: 500;
background: var(--soft-teal);
color: var(--teal-dark);
border: 1px solid rgba(15, 118, 110, 0.2);
cursor: pointer;
transition: background 0.15s, border-color 0.15s;
}
.redact-dl-btn:hover {
background: #d0ede9;
border-color: var(--teal);
}
.redact-dl-btn:disabled {
opacity: 0.55;
cursor: progress;
}
+68 -1
View File
@@ -52,6 +52,11 @@ const REDACT_I18N = {
redactRunning: 'Redacting…',
redactReadyTitle: 'Ready',
redactReadyDesc: 'Paste text or upload a file, configure redaction options, then run.',
redactAdvancedToggle: 'Advanced settings',
redactDownloadTxt: 'Download .txt',
redactDownloadDocx: 'Download .docx',
redactCopy: 'Copy',
redactCopied: 'Copied!',
},
no: {
redactEngine: 'Motor',
@@ -101,6 +106,11 @@ const REDACT_I18N = {
redactRunning: 'Redigerer…',
redactReadyTitle: 'Klar',
redactReadyDesc: 'Lim inn tekst eller last opp en fil, konfigurer redigeringsalternativene, og kjør.',
redactAdvancedToggle: 'Avanserte innstillinger',
redactDownloadTxt: 'Last ned .txt',
redactDownloadDocx: 'Last ned .docx',
redactCopy: 'Kopier',
redactCopied: 'Kopiert!',
},
uk: {
redactEngine: 'Рушій',
@@ -150,6 +160,11 @@ const REDACT_I18N = {
redactRunning: 'Редагування…',
redactReadyTitle: 'Готово',
redactReadyDesc: 'Вставте текст або завантажте файл, налаштуйте параметри, запустіть.',
redactAdvancedToggle: 'Розширені налаштування',
redactDownloadTxt: 'Завантажити .txt',
redactDownloadDocx: 'Завантажити .docx',
redactCopy: 'Копіювати',
redactCopied: 'Скопійовано!',
},
pl: {
redactEngine: 'Silnik',
@@ -199,12 +214,18 @@ const REDACT_I18N = {
redactRunning: 'Redagowanie…',
redactReadyTitle: 'Gotowe',
redactReadyDesc: 'Wklej tekst lub wgraj plik, skonfiguruj opcje redakcji, uruchom.',
redactAdvancedToggle: 'Ustawienia zaawansowane',
redactDownloadTxt: 'Pobierz .txt',
redactDownloadDocx: 'Pobierz .docx',
redactCopy: 'Kopiuj',
redactCopied: 'Skopiowano!',
},
};
let lastTimelineEvents = [];
let audioQueue = []; // [{file, status: 'pending'|'processing'|'done'|'error', result}]
let lastTranscriptData = null;
let lastRedactedText = null;
const VOCAB_PRESETS = {
barnerett: 'Barnevernet, Fylkesnemnda, barnevernloven, barneloven, barnets beste, samvær, foreldreansvar, omsorgsovertakelse, sakkyndig, advokat, prosessfullmektig, dommer, vitne, tolk, bistandsadvokat, fosterforeldre, fosterhjem, akuttvedtak, statsforvalter, Bufetat, saksbehandler, rettslig medhold, begjæring, samtykke, tilsynsfører',
@@ -729,6 +750,9 @@ document.addEventListener('DOMContentLoaded', () => {
if (e.target.closest('#dlTxt')) downloadTranscriptTxt();
if (e.target.closest('#dlSrt')) downloadTranscriptSrt();
if (e.target.closest('#dlVtt')) downloadTranscriptVtt();
if (e.target.closest('#rdlCopy')) copyRedactedText();
if (e.target.closest('#rdlTxt')) downloadRedactedTxt();
if (e.target.closest('#rdlDocx')) downloadRedactedDocx();
});
const activeTool = document.body.dataset.activeTool || state.activeTool;
setTool(activeTool);
@@ -1074,7 +1098,14 @@ function renderMainFinding(data) {
return `<p class="answer">${escapeHtml(data.answer || data.what_we_found || '')}</p>`;
}
if (data.tool === 'redact') {
return `<pre class="redacted-output">${escapeHtml(data.redacted_text || '')}</pre>${renderEntityCounts(data.entity_counts)}`;
lastRedactedText = data.redacted_text || '';
const t = (k) => currentRedactT(k) || k;
const dlRow = `<div class="redact-downloads">
<button type="button" class="redact-dl-btn" id="rdlCopy">${t('redactCopy')}</button>
<button type="button" class="redact-dl-btn" id="rdlTxt">${t('redactDownloadTxt')}</button>
<button type="button" class="redact-dl-btn" id="rdlDocx">${t('redactDownloadDocx')}</button>
</div>`;
return `<pre class="redacted-output">${escapeHtml(lastRedactedText)}</pre>${renderEntityCounts(data.entity_counts)}${dlRow}`;
}
if (data.tool === 'timeline') {
lastTimelineEvents = data.events || [];
@@ -1459,6 +1490,42 @@ function downloadTranscriptVtt() {
downloadBlob(new Blob([lines.join('\n')], { type: 'text/vtt' }), 'transcript.vtt');
}
async function copyRedactedText() {
if (!lastRedactedText) return;
const btn = document.getElementById('rdlCopy');
await navigator.clipboard.writeText(lastRedactedText);
if (btn) {
const orig = btn.textContent;
btn.textContent = currentRedactT('redactCopied') || 'Copied!';
setTimeout(() => { btn.textContent = orig; }, 1800);
}
}
function downloadRedactedTxt() {
if (!lastRedactedText) return;
downloadBlob(new Blob([lastRedactedText], { type: 'text/plain' }), 'redacted.txt');
}
async function downloadRedactedDocx() {
if (!lastRedactedText) return;
const btn = document.getElementById('rdlDocx');
if (btn) btn.disabled = true;
try {
const resp = await fetch('/api/redact-download.php', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: lastRedactedText, format: 'docx' }),
});
if (!resp.ok) throw new Error('Download failed');
const blob = await resp.blob();
downloadBlob(blob, 'redacted.docx');
} catch (e) {
alert(e.message);
} finally {
if (btn) btn.disabled = false;
}
}
function resetAudio() {
audioQueue = [];
if (!els.audioInput) return;
+56 -51
View File
@@ -24,61 +24,66 @@ require_once __DIR__ . '/includes/layout.php';
</div>
<p class="upload-hint" data-i18n="redactEngineHint">Azure engines use your BNL Azure credits. GPU runs the local LiteLLM proxy. Regex-only is instant and free but finds no names or organisations.</p>
<div class="control-row" id="redactModeControl">
<span class="control-label" data-i18n="redactMode">Mode</span>
<label><input type="radio" name="redactionMode" value="standard" checked> <span data-i18n="redactModeStandard">Standard</span></label>
<label><input type="radio" name="redactionMode" value="strict"> <span data-i18n="redactModeStrict">Strict</span></label>
</div>
<p class="upload-hint" data-i18n="redactModeHint">Standard: regex patterns + LLM scan for names/orgs/places. Strict: also replaces any capitalised two-word phrase as a potential name — more aggressive, may produce false positives.</p>
<details class="advanced-panel" id="redactAdvanced">
<summary class="advanced-toggle" data-i18n="redactAdvancedToggle">Advanced settings</summary>
<div class="control-row" id="redactRegionControl">
<span class="control-label" data-i18n="redactRegion">Region</span>
<label><input type="radio" name="redactionRegion" value="nordic" checked> <span data-i18n="redactRegionNordic">Nordic</span></label>
<label><input type="radio" name="redactionRegion" value="european"> <span data-i18n="redactRegionEuropean">European</span></label>
<label><input type="radio" name="redactionRegion" value="echr"> <span data-i18n="redactRegionEchr">ECHR</span></label>
<label><input type="radio" name="redactionRegion" value="global"> <span data-i18n="redactRegionGlobal">Global</span></label>
</div>
<p class="upload-hint" data-i18n="redactRegionHint">Nordic: Norwegian fødselsnummer, phone, email, addresses. European: adds IBAN, SE personnummer, UK NI. ECHR: adds application numbers, DOB phrases. Global: adds US SSN, document numbers.</p>
<div class="control-row entity-toggles" id="redactEntityControl">
<span class="control-label" data-i18n="redactEntities">Redact</span>
<label><input type="checkbox" name="redactNames" id="redactNames" checked> <span data-i18n="redactEntityNames">Names</span></label>
<label><input type="checkbox" name="redactOrgs" id="redactOrgs" checked> <span data-i18n="redactEntityOrgs">Organisations</span></label>
<label><input type="checkbox" name="redactPlaces" id="redactPlaces" checked> <span data-i18n="redactEntityPlaces">Places</span></label>
<label><input type="checkbox" name="redactDob" id="redactDob" checked> <span data-i18n="redactEntityDob">Dates of birth</span></label>
</div>
<div class="control-row" id="redactOfficialsControl">
<span class="control-label" data-i18n="redactOfficials">Officials</span>
<label><input type="checkbox" name="keepOfficials" id="keepOfficialsCheck"> <span data-i18n="redactKeepOfficials">Keep official names (judges, experts)</span></label>
</div>
<p class="upload-hint" data-i18n="redactOfficialsHint">When checked, judges, expert witnesses and caseworkers keep their names in a labelled tag: [JUDGE: Andersen]. Uncheck to replace all names with generic role tags.</p>
<div class="control-row" id="redactOutputControl">
<span class="control-label" data-i18n="redactOutput">Output</span>
<label><input type="radio" name="outputFormat" value="contextual" checked> <span data-i18n="redactOutputContextual">Contextual tags</span> &#9733; <small class="control-hint">[FATHER], [JUDGE: Name]</small></label>
<label><input type="radio" name="outputFormat" value="generic"> <span data-i18n="redactOutputGeneric">Generic tags</span> <small class="control-hint">[PERSON], [ORG]</small></label>
<label><input type="radio" name="outputFormat" value="pseudonym"> <span data-i18n="redactOutputPseudo">Pseudonyms</span> <small class="control-hint">Ola Nordmann, +47 400 00 001</small></label>
</div>
<p class="upload-hint" data-i18n="redactOutputHint">Contextual: each person gets a role tag so their identity is traceable within the document. Generic: all names become [PERSON]. Pseudonyms: replaced with plausible fake Norwegian values.</p>
<div class="exempt-section" id="exemptSection">
<div class="alias-header">
<span class="control-label" data-i18n="redactExempt">Exempt names</span>
<button type="button" id="addExemptRow" class="alias-add-btn">+ <span data-i18n="redactExemptAdd">Add</span></button>
<div class="control-row" id="redactModeControl">
<span class="control-label" data-i18n="redactMode">Mode</span>
<label><input type="radio" name="redactionMode" value="standard" checked> <span data-i18n="redactModeStandard">Standard</span></label>
<label><input type="radio" name="redactionMode" value="strict"> <span data-i18n="redactModeStrict">Strict</span></label>
</div>
<div id="exemptRows"></div>
<p class="alias-hint" data-i18n="redactExemptHint">Names listed here will never be redacted, even if the AI would otherwise remove them — e.g. a judge or expert who must remain identifiable.</p>
</div>
<p class="upload-hint" data-i18n="redactModeHint">Standard: regex patterns + LLM scan for names/orgs/places. Strict: also replaces any capitalised two-word phrase as a potential name — more aggressive, may produce false positives.</p>
<div class="alias-section" id="aliasSection">
<div class="alias-header">
<span class="control-label" data-i18n="redactAliases">Name aliases</span>
<button type="button" id="addAliasRow" class="alias-add-btn">+ <span data-i18n="redactAliasAdd">Add</span></button>
<div class="control-row" id="redactRegionControl">
<span class="control-label" data-i18n="redactRegion">Region</span>
<label><input type="radio" name="redactionRegion" value="nordic" checked> <span data-i18n="redactRegionNordic">Nordic</span></label>
<label><input type="radio" name="redactionRegion" value="european"> <span data-i18n="redactRegionEuropean">European</span></label>
<label><input type="radio" name="redactionRegion" value="echr"> <span data-i18n="redactRegionEchr">ECHR</span></label>
<label><input type="radio" name="redactionRegion" value="global"> <span data-i18n="redactRegionGlobal">Global</span></label>
</div>
<div id="aliasRows"></div>
<p class="alias-hint" data-i18n="redactAliasHint">Replace a specific name with a custom bracketed label, e.g. &ldquo;David Jr&rdquo; &rarr; [Junior].</p>
</div>
<p class="upload-hint" data-i18n="redactRegionHint">Nordic: Norwegian fødselsnummer, phone, email, addresses. European: adds IBAN, SE personnummer, UK NI. ECHR: adds application numbers, DOB phrases. Global: adds US SSN, document numbers.</p>
<div class="control-row entity-toggles" id="redactEntityControl">
<span class="control-label" data-i18n="redactEntities">Redact</span>
<label><input type="checkbox" name="redactNames" id="redactNames" checked> <span data-i18n="redactEntityNames">Names</span></label>
<label><input type="checkbox" name="redactOrgs" id="redactOrgs" checked> <span data-i18n="redactEntityOrgs">Organisations</span></label>
<label><input type="checkbox" name="redactPlaces" id="redactPlaces" checked> <span data-i18n="redactEntityPlaces">Places</span></label>
<label><input type="checkbox" name="redactDob" id="redactDob" checked> <span data-i18n="redactEntityDob">Dates of birth</span></label>
</div>
<div class="control-row" id="redactOfficialsControl">
<span class="control-label" data-i18n="redactOfficials">Officials</span>
<label><input type="checkbox" name="keepOfficials" id="keepOfficialsCheck"> <span data-i18n="redactKeepOfficials">Keep official names (judges, experts)</span></label>
</div>
<p class="upload-hint" data-i18n="redactOfficialsHint">When checked, judges, expert witnesses and caseworkers keep their names in a labelled tag: [JUDGE: Andersen]. Uncheck to replace all names with generic role tags.</p>
<div class="control-row" id="redactOutputControl">
<span class="control-label" data-i18n="redactOutput">Output</span>
<label><input type="radio" name="outputFormat" value="contextual" checked> <span data-i18n="redactOutputContextual">Contextual tags</span> &#9733; <small class="control-hint">[FATHER], [JUDGE: Name]</small></label>
<label><input type="radio" name="outputFormat" value="generic"> <span data-i18n="redactOutputGeneric">Generic tags</span> <small class="control-hint">[PERSON], [ORG]</small></label>
<label><input type="radio" name="outputFormat" value="pseudonym"> <span data-i18n="redactOutputPseudo">Pseudonyms</span> <small class="control-hint">Ola Nordmann, +47 400 00 001</small></label>
</div>
<p class="upload-hint" data-i18n="redactOutputHint">Contextual: each person gets a role tag so their identity is traceable within the document. Generic: all names become [PERSON]. Pseudonyms: replaced with plausible fake Norwegian values.</p>
<div class="exempt-section" id="exemptSection">
<div class="alias-header">
<span class="control-label" data-i18n="redactExempt">Exempt names</span>
<button type="button" id="addExemptRow" class="alias-add-btn">+ <span data-i18n="redactExemptAdd">Add</span></button>
</div>
<div id="exemptRows"></div>
<p class="alias-hint" data-i18n="redactExemptHint">Names listed here will never be redacted, even if the AI would otherwise remove them — e.g. a judge or expert who must remain identifiable.</p>
</div>
<div class="alias-section" id="aliasSection">
<div class="alias-header">
<span class="control-label" data-i18n="redactAliases">Name aliases</span>
<button type="button" id="addAliasRow" class="alias-add-btn">+ <span data-i18n="redactAliasAdd">Add</span></button>
</div>
<div id="aliasRows"></div>
<p class="alias-hint" data-i18n="redactAliasHint">Replace a specific name with a custom bracketed label, e.g. &ldquo;David Jr&rdquo; &rarr; [Junior].</p>
</div>
</details>
<div class="upload-zone" id="uploadZone" role="region" aria-label="File upload" data-i18n-aria="redactUploadAria">
<input type="file" id="uploadInput" multiple accept=".pdf,.docx,.txt" aria-label="Choose files">