Redact tool: rich UI, multilingual, engine choice, output formats
- Custom inline form (EN/NO/UK/PL lang switcher) replacing generic stub - Engine selector: Azure gpt-4o-mini (default), gpt-4o, GPU cuttlefish, regex-only - Entity type toggles: names, organisations, places, dates of birth - Output formats: contextual role tags, generic [PERSON], Norwegian pseudonyms - Keep officials mode: judges/experts kept as [JUDGE: Andersen] format - Exempt names list: specific names excluded from redaction - Hint paragraphs explaining each option in all four languages - Backend: engine routing, callGpuLlm(), applyGenericTags(), applyPseudonymization() - AzureOpenAiGateway: withDeployment() clone pattern for per-call model override Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+35
-1
@@ -13,6 +13,18 @@ dbnToolsWithTelemetry('redact', '', function () use ($input): array {
|
||||
$region = dbnToolsNormalizeRegion($input['region'] ?? 'nordic');
|
||||
$language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
|
||||
|
||||
$validEngines = ['azure_mini', 'azure_full', 'gpu', 'regex'];
|
||||
$engine = in_array((string)($input['engine'] ?? ''), $validEngines, true)
|
||||
? (string)$input['engine']
|
||||
: 'azure_mini';
|
||||
|
||||
$validFormats = ['contextual', 'generic', 'pseudonym'];
|
||||
$outputFormat = in_array((string)($input['output_format'] ?? ''), $validFormats, true)
|
||||
? (string)$input['output_format']
|
||||
: 'contextual';
|
||||
|
||||
$keepOfficials = (bool)($input['keep_officials'] ?? false);
|
||||
|
||||
$aliases = [];
|
||||
$rawAliases = $input['aliases'] ?? [];
|
||||
if (is_array($rawAliases)) {
|
||||
@@ -28,5 +40,27 @@ dbnToolsWithTelemetry('redact', '', function () use ($input): array {
|
||||
}
|
||||
}
|
||||
|
||||
return (new DbnLegalToolsService())->redact($text, $mode, $region, $language, $aliases);
|
||||
$exemptNames = [];
|
||||
$rawExempt = $input['exempt_names'] ?? [];
|
||||
if (is_array($rawExempt)) {
|
||||
foreach (array_slice($rawExempt, 0, 20) as $name) {
|
||||
$name = substr(trim((string)$name), 0, 100);
|
||||
if ($name !== '') {
|
||||
$exemptNames[] = $name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$rawTypes = $input['redact_types'] ?? [];
|
||||
$redactTypes = [
|
||||
'names' => ($rawTypes['names'] ?? true) !== false,
|
||||
'orgs' => ($rawTypes['orgs'] ?? true) !== false,
|
||||
'places' => ($rawTypes['places'] ?? true) !== false,
|
||||
'dob' => ($rawTypes['dob'] ?? true) !== false,
|
||||
];
|
||||
|
||||
return (new DbnLegalToolsService())->redact(
|
||||
$text, $mode, $region, $language, $aliases,
|
||||
$engine, $outputFormat, $keepOfficials, $exemptNames, $redactTypes
|
||||
);
|
||||
});
|
||||
|
||||
@@ -1415,3 +1415,58 @@ p {
|
||||
}
|
||||
|
||||
.control-hint { font-size: 0.74rem; color: var(--muted); font-weight: 400; }
|
||||
|
||||
/* ─── Exempt names section (Redact tool) ──────────────────────────────────── */
|
||||
|
||||
.exempt-section {
|
||||
margin-top: 0.75rem;
|
||||
padding-top: 0.75rem;
|
||||
border-top: 1px solid var(--line);
|
||||
}
|
||||
|
||||
.exempt-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
}
|
||||
|
||||
.exempt-name-input {
|
||||
flex: 1;
|
||||
padding: 0.3rem 0.55rem;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 6px;
|
||||
font-size: 0.875rem;
|
||||
background: var(--panel);
|
||||
color: var(--ink);
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.exempt-name-input:focus {
|
||||
outline: 3px solid rgba(15, 118, 110, 0.28);
|
||||
outline-offset: 1px;
|
||||
border-color: var(--teal);
|
||||
}
|
||||
|
||||
/* ─── Entity type toggles (Redact tool) ───────────────────────────────────── */
|
||||
|
||||
.entity-toggles {
|
||||
flex-wrap: wrap;
|
||||
gap: 0.4rem 1.1rem;
|
||||
}
|
||||
|
||||
.entity-toggles label {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.3rem;
|
||||
font-size: 0.875rem;
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.entity-toggles input[type="checkbox"] {
|
||||
width: 15px;
|
||||
height: 15px;
|
||||
accent-color: var(--teal);
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
+304
-8
@@ -3,6 +3,205 @@ const state = {
|
||||
authenticated: Boolean(window.DBN_TOOLS_AUTHENTICATED),
|
||||
};
|
||||
|
||||
const REDACT_I18N = {
|
||||
en: {
|
||||
redactEngine: 'Engine',
|
||||
redactEngineAzureMini: 'Azure gpt-4o-mini',
|
||||
redactEngineAzureFull: 'Azure gpt-4o',
|
||||
redactEngineGpu: 'GPU (cuttlefish)',
|
||||
redactEngineRegex: 'Regex only',
|
||||
redactEngineHint: 'Azure engines use your BNL Azure credits. GPU runs the local LiteLLM proxy. Regex-only is instant and free but finds no names or organisations.',
|
||||
redactMode: 'Mode',
|
||||
redactModeStandard: 'Standard',
|
||||
redactModeStrict: 'Strict',
|
||||
redactModeHint: 'Standard: regex patterns + LLM scan for names/orgs/places. Strict: also replaces any capitalised two-word phrase as a potential name — more aggressive, may produce false positives.',
|
||||
redactRegion: 'Region',
|
||||
redactRegionNordic: 'Nordic',
|
||||
redactRegionEuropean: 'European',
|
||||
redactRegionEchr: 'ECHR',
|
||||
redactRegionGlobal: 'Global',
|
||||
redactRegionHint: 'Nordic: Norwegian fødselsnummer, phone, email, addresses. European: adds IBAN, SE personnummer, UK NI. ECHR: adds application numbers, DOB phrases. Global: adds US SSN, document numbers.',
|
||||
redactEntities: 'Redact',
|
||||
redactEntityNames: 'Names',
|
||||
redactEntityOrgs: 'Organisations',
|
||||
redactEntityPlaces: 'Places',
|
||||
redactEntityDob: 'Dates of birth',
|
||||
redactOfficials: 'Officials',
|
||||
redactKeepOfficials: 'Keep official names (judges, experts)',
|
||||
redactOfficialsHint: 'When checked, judges, expert witnesses and caseworkers keep their names in a labelled tag: [JUDGE: Andersen]. Uncheck to replace all names with generic role tags.',
|
||||
redactOutput: 'Output',
|
||||
redactOutputContextual: 'Contextual tags',
|
||||
redactOutputGeneric: 'Generic tags',
|
||||
redactOutputPseudo: 'Pseudonyms',
|
||||
redactOutputHint: 'Contextual: each person gets a role tag so their identity is traceable within the document. Generic: all names become [PERSON]. Pseudonyms: replaced with plausible fake Norwegian values.',
|
||||
redactExempt: 'Exempt names',
|
||||
redactExemptAdd: 'Add',
|
||||
redactExemptHint: 'Names listed here will never be redacted, even if the AI would otherwise remove them — e.g. a judge or expert who must remain identifiable.',
|
||||
redactExemptPlaceholder: 'Name to keep (e.g. Judge Andersen)',
|
||||
redactAliases: 'Name aliases',
|
||||
redactAliasAdd: 'Add',
|
||||
redactAliasHint: 'Replace a specific name with a custom bracketed label, e.g. "David Jr" → [Junior].',
|
||||
redactUploadAria: 'File upload',
|
||||
redactUploadDrop: 'Drop up to 5 files here, or',
|
||||
redactUploadBrowse: 'browse',
|
||||
redactUploadHint: 'text extracted in memory, never stored',
|
||||
redactUploadClear: '× Clear',
|
||||
redactInputLabel: 'Pasted text',
|
||||
redactInputPlaceholder: 'Paste text containing names, phone numbers, emails, addresses, or national ID numbers.',
|
||||
redactRun: 'Run',
|
||||
redactRunning: 'Redacting…',
|
||||
redactReadyTitle: 'Ready',
|
||||
redactReadyDesc: 'Paste text or upload a file, configure redaction options, then run.',
|
||||
},
|
||||
no: {
|
||||
redactEngine: 'Motor',
|
||||
redactEngineAzureMini: 'Azure gpt-4o-mini',
|
||||
redactEngineAzureFull: 'Azure gpt-4o',
|
||||
redactEngineGpu: 'GPU (cuttlefish)',
|
||||
redactEngineRegex: 'Kun regex',
|
||||
redactEngineHint: 'Azure-motorer bruker BNL Azure-kreditter. GPU kjører lokal LiteLLM-proxy. Kun regex er øyeblikkelig og gratis, men finner ingen navn eller organisasjoner.',
|
||||
redactMode: 'Modus',
|
||||
redactModeStandard: 'Standard',
|
||||
redactModeStrict: 'Strikt',
|
||||
redactModeHint: 'Standard: regex-mønstre + LLM-skanning for navn/org/steder. Strikt: erstatter også enhver stor-stav-kombinasjon som potensielt navn — mer aggressivt, kan gi falske positiver.',
|
||||
redactRegion: 'Region',
|
||||
redactRegionNordic: 'Nordisk',
|
||||
redactRegionEuropean: 'Europeisk',
|
||||
redactRegionEchr: 'EMD',
|
||||
redactRegionGlobal: 'Global',
|
||||
redactRegionHint: 'Nordisk: norsk fødselsnummer, telefon, e-post, adresser. Europeisk: legger til IBAN, SE personnummer, UK NI. EMD: legger til saksnummer, fødselsdatofraser. Global: legger til US SSN, dokumentnummer.',
|
||||
redactEntities: 'Rediger',
|
||||
redactEntityNames: 'Navn',
|
||||
redactEntityOrgs: 'Organisasjoner',
|
||||
redactEntityPlaces: 'Steder',
|
||||
redactEntityDob: 'Fødselsdatoer',
|
||||
redactOfficials: 'Offisielle',
|
||||
redactKeepOfficials: 'Behold offisielle navn (dommere, sakkyndige)',
|
||||
redactOfficialsHint: 'Når avkrysset beholder dommere, sakkyndige og saksbehandlere sine navn i en merket tagg: [DOMMER: Andersen]. Fjern haken for å erstatte alle navn med generiske rolletaggar.',
|
||||
redactOutput: 'Utdata',
|
||||
redactOutputContextual: 'Kontekstuelle taggar',
|
||||
redactOutputGeneric: 'Generiske taggar',
|
||||
redactOutputPseudo: 'Pseudonymer',
|
||||
redactOutputHint: 'Kontekstuell: hver person får en rolletagg slik at identiteten kan spores i dokumentet. Generisk: alle navn blir [PERSON]. Pseudonymer: erstattes med troverdige falske norske verdier.',
|
||||
redactExempt: 'Unntak',
|
||||
redactExemptAdd: 'Legg til',
|
||||
redactExemptHint: 'Navn oppført her vil aldri bli redigert, selv om AI ellers ville fjernet dem — f.eks. en dommer eller sakkyndig som må forbli identifiserbar.',
|
||||
redactExemptPlaceholder: 'Navn som skal beholdes (f.eks. Dommer Andersen)',
|
||||
redactAliases: 'Navnealiaser',
|
||||
redactAliasAdd: 'Legg til',
|
||||
redactAliasHint: 'Erstatt et spesifikt navn med en egendefinert merkelapp, f.eks. «David Jr» → [Junior].',
|
||||
redactUploadAria: 'Filopplasting',
|
||||
redactUploadDrop: 'Slipp opptil 5 filer her, eller',
|
||||
redactUploadBrowse: 'bla',
|
||||
redactUploadHint: 'tekst hentes i minnet, lagres aldri',
|
||||
redactUploadClear: '× Tøm',
|
||||
redactInputLabel: 'Limt inn tekst',
|
||||
redactInputPlaceholder: 'Lim inn tekst med navn, telefonnummer, e-poster, adresser eller personnummer.',
|
||||
redactRun: 'Kjør',
|
||||
redactRunning: 'Redigerer…',
|
||||
redactReadyTitle: 'Klar',
|
||||
redactReadyDesc: 'Lim inn tekst eller last opp en fil, konfigurer redigeringsalternativene, og kjør.',
|
||||
},
|
||||
uk: {
|
||||
redactEngine: 'Рушій',
|
||||
redactEngineAzureMini: 'Azure gpt-4o-mini',
|
||||
redactEngineAzureFull: 'Azure gpt-4o',
|
||||
redactEngineGpu: 'GPU (cuttlefish)',
|
||||
redactEngineRegex: 'Лише регулярні вирази',
|
||||
redactEngineHint: 'Рушії Azure використовують кредити BNL Azure. GPU запускає локальний проксі LiteLLM. Лише regex — миттєво і безкоштовно, але не знаходить імен або організацій.',
|
||||
redactMode: 'Режим',
|
||||
redactModeStandard: 'Стандартний',
|
||||
redactModeStrict: 'Суворий',
|
||||
redactModeHint: 'Стандарт: шаблони regex + LLM-сканування для імен/орг/місць. Суворий: також замінює будь-яку комбінацію слів з великої літери як потенційне ім\'я.',
|
||||
redactRegion: 'Регіон',
|
||||
redactRegionNordic: 'Nordisk',
|
||||
redactRegionEuropean: 'Європейський',
|
||||
redactRegionEchr: 'ЄСПЛ',
|
||||
redactRegionGlobal: 'Глобальний',
|
||||
redactRegionHint: 'Nordisk: норвезький фødselsnummer, телефон, email, адреси. Європейський: додає IBAN, SE personnummer, UK NI. ЄСПЛ: додає номери справ, фрази дати народження. Глобальний: додає US SSN.',
|
||||
redactEntities: 'Редагувати',
|
||||
redactEntityNames: 'Імена',
|
||||
redactEntityOrgs: 'Організації',
|
||||
redactEntityPlaces: 'Місця',
|
||||
redactEntityDob: 'Дати народження',
|
||||
redactOfficials: 'Офіційні особи',
|
||||
redactKeepOfficials: 'Зберігати офіційні імена (судді, експерти)',
|
||||
redactOfficialsHint: 'Якщо позначено, судді, експерти та соціальні працівники зберігають свої імена у позначеному тезі: [СУДДЯ: Andersen].',
|
||||
redactOutput: 'Вивід',
|
||||
redactOutputContextual: 'Контекстні теги',
|
||||
redactOutputGeneric: 'Загальні теги',
|
||||
redactOutputPseudo: 'Псевдоніми',
|
||||
redactOutputHint: 'Контекстний: кожна особа отримує тег ролі. Загальний: всі імена стають [PERSON]. Псевдоніми: замінюються правдоподібними норвезькими значеннями.',
|
||||
redactExempt: 'Виключені імена',
|
||||
redactExemptAdd: 'Додати',
|
||||
redactExemptHint: 'Імена, перелічені тут, ніколи не будуть відредаговані.',
|
||||
redactExemptPlaceholder: 'Ім\'я для збереження (напр. суддя Andersen)',
|
||||
redactAliases: 'Псевдоніми імен',
|
||||
redactAliasAdd: 'Додати',
|
||||
redactAliasHint: 'Замініть конкретне ім\'я на власну мітку, напр. «David Jr» → [Junior].',
|
||||
redactUploadAria: 'Завантаження файлів',
|
||||
redactUploadDrop: 'Перетягніть до 5 файлів сюди, або',
|
||||
redactUploadBrowse: 'огляд',
|
||||
redactUploadHint: 'текст обробляється в пам\'яті, ніколи не зберігається',
|
||||
redactUploadClear: '× Очистити',
|
||||
redactInputLabel: 'Вставлений текст',
|
||||
redactInputPlaceholder: 'Вставте текст з іменами, телефонами, адресами або ідентифікаційними номерами.',
|
||||
redactRun: 'Запустити',
|
||||
redactRunning: 'Редагування…',
|
||||
redactReadyTitle: 'Готово',
|
||||
redactReadyDesc: 'Вставте текст або завантажте файл, налаштуйте параметри, запустіть.',
|
||||
},
|
||||
pl: {
|
||||
redactEngine: 'Silnik',
|
||||
redactEngineAzureMini: 'Azure gpt-4o-mini',
|
||||
redactEngineAzureFull: 'Azure gpt-4o',
|
||||
redactEngineGpu: 'GPU (cuttlefish)',
|
||||
redactEngineRegex: 'Tylko regex',
|
||||
redactEngineHint: 'Silniki Azure używają kredytów Azure BNL. GPU korzysta z lokalnego proxy LiteLLM. Tylko regex jest natychmiastowy i bezpłatny, ale nie znajdzie imion ani organizacji.',
|
||||
redactMode: 'Tryb',
|
||||
redactModeStandard: 'Standardowy',
|
||||
redactModeStrict: 'Ścisły',
|
||||
redactModeHint: 'Standardowy: wzorce regex + skanowanie LLM dla imion/org/miejsc. Ścisły: zastępuje też każdą kombinację słów pisanych wielką literą jako potencjalne imię.',
|
||||
redactRegion: 'Region',
|
||||
redactRegionNordic: 'Nordycki',
|
||||
redactRegionEuropean: 'Europejski',
|
||||
redactRegionEchr: 'ETPC',
|
||||
redactRegionGlobal: 'Globalny',
|
||||
redactRegionHint: 'Nordycki: norweski fødselsnummer, telefon, email, adresy. Europejski: dodaje IBAN, SE personnummer, UK NI. ETPC: dodaje numery spraw, frazy daty urodzenia. Globalny: dodaje US SSN.',
|
||||
redactEntities: 'Redaguj',
|
||||
redactEntityNames: 'Imiona',
|
||||
redactEntityOrgs: 'Organizacje',
|
||||
redactEntityPlaces: 'Miejsca',
|
||||
redactEntityDob: 'Daty urodzenia',
|
||||
redactOfficials: 'Urzędnicy',
|
||||
redactKeepOfficials: 'Zachowaj oficjalne nazwy (sędziowie, eksperci)',
|
||||
redactOfficialsHint: 'Gdy zaznaczone, sędziowie, biegli i pracownicy socjalni zachowują swoje nazwiska w oznaczonym tagu: [SĘDZIA: Andersen].',
|
||||
redactOutput: 'Wyjście',
|
||||
redactOutputContextual: 'Tagi kontekstowe',
|
||||
redactOutputGeneric: 'Tagi ogólne',
|
||||
redactOutputPseudo: 'Pseudonimy',
|
||||
redactOutputHint: 'Kontekstowe: każda osoba otrzymuje tag roli. Ogólne: wszystkie imiona stają się [PERSON]. Pseudonimy: zastąpione wiarygodnymi fałszywymi wartościami norweskimi.',
|
||||
redactExempt: 'Zwolnione nazwy',
|
||||
redactExemptAdd: 'Dodaj',
|
||||
redactExemptHint: 'Nazwy tu wpisane nigdy nie zostaną zredagowane.',
|
||||
redactExemptPlaceholder: 'Nazwa do zachowania (np. Sędzia Andersen)',
|
||||
redactAliases: 'Aliasy nazw',
|
||||
redactAliasAdd: 'Dodaj',
|
||||
redactAliasHint: 'Zastąp konkretną nazwę własną etykietą, np. «David Jr» → [Junior].',
|
||||
redactUploadAria: 'Przesyłanie pliku',
|
||||
redactUploadDrop: 'Upuść do 5 plików tutaj lub',
|
||||
redactUploadBrowse: 'przeglądaj',
|
||||
redactUploadHint: 'tekst wyodrębniany w pamięci, nigdy nie przechowywany',
|
||||
redactUploadClear: '× Wyczyść',
|
||||
redactInputLabel: 'Wklejony tekst',
|
||||
redactInputPlaceholder: 'Wklej tekst zawierający imiona, numery telefonów, adresy lub numery identyfikacyjne.',
|
||||
redactRun: 'Uruchom',
|
||||
redactRunning: 'Redagowanie…',
|
||||
redactReadyTitle: 'Gotowe',
|
||||
redactReadyDesc: 'Wklej tekst lub wgraj plik, skonfiguruj opcje redakcji, uruchom.',
|
||||
},
|
||||
};
|
||||
|
||||
let lastTimelineEvents = [];
|
||||
let audioQueue = []; // [{file, status: 'pending'|'processing'|'done'|'error', result}]
|
||||
let lastTranscriptData = null;
|
||||
@@ -310,6 +509,89 @@ function applyTranscribeI18n(lang) {
|
||||
});
|
||||
}
|
||||
|
||||
function currentRedactT(key) {
|
||||
const t = REDACT_I18N[uiLang] || REDACT_I18N.en;
|
||||
return (key in t) ? t[key] : (REDACT_I18N.en[key] ?? key);
|
||||
}
|
||||
|
||||
function applyRedactI18n(lang) {
|
||||
uiLang = lang;
|
||||
localStorage.setItem('dbn-ui-lang', lang);
|
||||
document.querySelectorAll('[data-i18n]').forEach((el) => {
|
||||
const text = currentRedactT(el.dataset.i18n);
|
||||
if (text != null) el.textContent = text;
|
||||
});
|
||||
document.querySelectorAll('[data-i18n-placeholder]').forEach((el) => {
|
||||
const text = currentRedactT(el.dataset.i18nPlaceholder);
|
||||
if (text != null) el.placeholder = text;
|
||||
});
|
||||
document.querySelectorAll('[data-i18n-aria]').forEach((el) => {
|
||||
const text = currentRedactT(el.dataset.i18nAria);
|
||||
if (text != null) el.setAttribute('aria-label', text);
|
||||
});
|
||||
document.querySelectorAll('#redactLangSwitcher .lang-btn').forEach((btn) => {
|
||||
btn.classList.toggle('is-active', btn.dataset.lang === lang);
|
||||
});
|
||||
}
|
||||
|
||||
function currentRedactEngine() {
|
||||
return document.querySelector('input[name="redactEngine"]:checked')?.value || 'azure_mini';
|
||||
}
|
||||
|
||||
function currentOutputFormat() {
|
||||
return document.querySelector('input[name="outputFormat"]:checked')?.value || 'contextual';
|
||||
}
|
||||
|
||||
function currentKeepOfficials() {
|
||||
return document.getElementById('keepOfficialsCheck')?.checked ?? false;
|
||||
}
|
||||
|
||||
function currentRedactTypes() {
|
||||
return {
|
||||
names: document.getElementById('redactNames')?.checked ?? true,
|
||||
orgs: document.getElementById('redactOrgs')?.checked ?? true,
|
||||
places: document.getElementById('redactPlaces')?.checked ?? true,
|
||||
dob: document.getElementById('redactDob')?.checked ?? true,
|
||||
};
|
||||
}
|
||||
|
||||
function setupRedactControls() {
|
||||
const switcher = document.getElementById('redactLangSwitcher');
|
||||
if (!switcher) return;
|
||||
switcher.querySelectorAll('.lang-btn').forEach((btn) => {
|
||||
btn.addEventListener('click', () => applyRedactI18n(btn.dataset.lang));
|
||||
});
|
||||
applyRedactI18n(uiLang);
|
||||
}
|
||||
|
||||
function setupExemptNames() {
|
||||
const addBtn = document.getElementById('addExemptRow');
|
||||
const rows = document.getElementById('exemptRows');
|
||||
if (!addBtn || !rows) return;
|
||||
|
||||
addBtn.addEventListener('click', () => {
|
||||
const row = document.createElement('div');
|
||||
row.className = 'exempt-row';
|
||||
row.innerHTML = [
|
||||
`<input type="text" class="exempt-name-input" placeholder="${escapeHtml(currentRedactT('redactExemptPlaceholder'))}" maxlength="100">`,
|
||||
'<button type="button" class="alias-remove" aria-label="Remove exempt name">×</button>',
|
||||
].join('');
|
||||
rows.appendChild(row);
|
||||
row.querySelector('.exempt-name-input').focus();
|
||||
});
|
||||
|
||||
rows.addEventListener('click', (e) => {
|
||||
const btn = e.target.closest('.alias-remove');
|
||||
if (btn) btn.closest('.exempt-row').remove();
|
||||
});
|
||||
}
|
||||
|
||||
function getExemptNames() {
|
||||
return Array.from(document.querySelectorAll('#exemptRows .exempt-name-input'))
|
||||
.map((el) => el.value.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
const tools = {
|
||||
ask: {
|
||||
kind: 'Source-grounded Legal Ask',
|
||||
@@ -433,10 +715,15 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
setupAudio();
|
||||
setupTranscribeControls();
|
||||
setupVocabPresets();
|
||||
document.querySelectorAll('.lang-btn').forEach((btn) => {
|
||||
setupRedactControls();
|
||||
setupExemptNames();
|
||||
// Wire transcribe lang switcher (only present on transcribe page)
|
||||
document.querySelectorAll('#uiLangSwitcher .lang-btn').forEach((btn) => {
|
||||
btn.addEventListener('click', () => applyTranscribeI18n(btn.dataset.lang));
|
||||
});
|
||||
applyTranscribeI18n(uiLang);
|
||||
if (document.getElementById('uiLangSwitcher')) {
|
||||
applyTranscribeI18n(uiLang);
|
||||
}
|
||||
els.results.addEventListener('click', (e) => {
|
||||
if (e.target.closest('#exportCsvBtn')) exportTimelineCSV(lastTimelineEvents);
|
||||
if (e.target.closest('#dlTxt')) downloadTranscriptTxt();
|
||||
@@ -532,9 +819,14 @@ async function runTool(event) {
|
||||
payload.limit = 7;
|
||||
}
|
||||
if (state.activeTool === 'redact') {
|
||||
payload.mode = currentRedactionMode();
|
||||
payload.region = currentRedactionRegion();
|
||||
payload.aliases = getAliases();
|
||||
payload.mode = currentRedactionMode();
|
||||
payload.region = currentRedactionRegion();
|
||||
payload.aliases = getAliases();
|
||||
payload.engine = currentRedactEngine();
|
||||
payload.output_format = currentOutputFormat();
|
||||
payload.keep_officials = currentKeepOfficials();
|
||||
payload.exempt_names = getExemptNames();
|
||||
payload.redact_types = currentRedactTypes();
|
||||
}
|
||||
|
||||
setBusy(true);
|
||||
@@ -742,9 +1034,13 @@ async function postJson(url, payload) {
|
||||
function setBusy(isBusy) {
|
||||
const button = document.querySelector('#runButton');
|
||||
button.disabled = isBusy;
|
||||
button.textContent = isBusy
|
||||
? (state.activeTool === 'transcribe' ? currentUiT('running') : currentUiT('runningOther'))
|
||||
: currentUiT('run');
|
||||
if (state.activeTool === 'transcribe') {
|
||||
button.textContent = isBusy ? currentUiT('running') : currentUiT('run');
|
||||
} else if (state.activeTool === 'redact') {
|
||||
button.textContent = isBusy ? currentRedactT('redactRunning') : currentRedactT('redactRun');
|
||||
} else {
|
||||
button.textContent = isBusy ? currentUiT('runningOther') : currentUiT('run');
|
||||
}
|
||||
}
|
||||
|
||||
function currentLanguage() {
|
||||
|
||||
@@ -40,6 +40,13 @@ final class DbnAzureOpenAiGateway
|
||||
return $missing;
|
||||
}
|
||||
|
||||
public function withDeployment(string $deployment): static
|
||||
{
|
||||
$clone = clone $this;
|
||||
$clone->config['chat_deployment'] = $deployment;
|
||||
return $clone;
|
||||
}
|
||||
|
||||
public function chatDeployment(): string
|
||||
{
|
||||
return (string)$this->config['chat_deployment'];
|
||||
|
||||
+268
-30
@@ -343,15 +343,33 @@ PROMPT;
|
||||
];
|
||||
}
|
||||
|
||||
public function redact(string $text, string $mode = 'standard', string $region = 'nordic', string $language = 'en', array $aliases = []): array
|
||||
{
|
||||
$text = $this->requirePasteText($text);
|
||||
$mode = $mode === 'strict' ? 'strict' : 'standard';
|
||||
$region = in_array($region, ['nordic', 'european', 'echr', 'global'], true) ? $region : 'nordic';
|
||||
public function redact(
|
||||
string $text,
|
||||
string $mode = 'standard',
|
||||
string $region = 'nordic',
|
||||
string $language = 'en',
|
||||
array $aliases = [],
|
||||
string $engine = 'azure_mini',
|
||||
string $outputFormat = 'contextual',
|
||||
bool $keepOfficials = false,
|
||||
array $exemptNames = [],
|
||||
array $redactTypes = []
|
||||
): array {
|
||||
$text = $this->requirePasteText($text);
|
||||
$mode = $mode === 'strict' ? 'strict' : 'standard';
|
||||
$region = in_array($region, ['nordic', 'european', 'echr', 'global'], true) ? $region : 'nordic';
|
||||
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'regex'], true) ? $engine : 'azure_mini';
|
||||
$outputFormat = in_array($outputFormat, ['contextual', 'generic', 'pseudonym'], true) ? $outputFormat : 'contextual';
|
||||
|
||||
// Normalise entity-type flags (all on by default)
|
||||
$doNames = ($redactTypes['names'] ?? true) !== false;
|
||||
$doOrgs = ($redactTypes['orgs'] ?? true) !== false;
|
||||
$doPlaces = ($redactTypes['places'] ?? true) !== false;
|
||||
$doDob = ($redactTypes['dob'] ?? true) !== false;
|
||||
|
||||
// Pass 1 — deterministic regex
|
||||
[$preRedacted, $pass1Counts] = $this->deterministicRedaction($text, $mode, $region);
|
||||
$pass1Total = array_sum($pass1Counts);
|
||||
$pass1Total = array_sum($pass1Counts);
|
||||
$pass1Detail = $pass1Total
|
||||
? implode(', ', array_map(
|
||||
fn($k, $v) => "{$k}: {$v}",
|
||||
@@ -360,8 +378,15 @@ PROMPT;
|
||||
))
|
||||
: 'none detected';
|
||||
|
||||
$engineLabel = match ($engine) {
|
||||
'azure_full' => 'Azure gpt-4o',
|
||||
'gpu' => 'GPU (cuttlefish)',
|
||||
'regex' => 'Regex only',
|
||||
default => 'Azure gpt-4o-mini',
|
||||
};
|
||||
|
||||
$trace = [
|
||||
$this->trace('Query interpretation', "Redact PII from pasted text. Region: {$region}. Mode: {$mode}.", 'complete'),
|
||||
$this->trace('Query interpretation', "Redact PII from pasted text. Region: {$region}. Mode: {$mode}. Engine: {$engineLabel}.", 'complete'),
|
||||
$this->trace('Pass 1 — Deterministic patterns', "Applied {$region} pattern pack. {$pass1Detail}.", $pass1Total > 0 ? 'complete' : 'warning'),
|
||||
];
|
||||
|
||||
@@ -370,10 +395,14 @@ PROMPT;
|
||||
$pass2Counts = [];
|
||||
$llmDeployment = null;
|
||||
|
||||
$llmResult = $this->llmRedactionPass($preRedacted, $language, $aliases);
|
||||
$llmResult = $this->llmRedactionPass(
|
||||
$preRedacted, $language, $aliases, $engine,
|
||||
$keepOfficials, $exemptNames,
|
||||
$doNames, $doOrgs, $doPlaces, $doDob
|
||||
);
|
||||
|
||||
if (!empty($llmResult['skipped'])) {
|
||||
$trace[] = $this->trace('Pass 2 — LLM semantic scan', 'Skipped: ' . ($llmResult['reason'] ?? 'Azure not configured') . '.', 'warning');
|
||||
$trace[] = $this->trace('Pass 2 — LLM semantic scan', 'Skipped: ' . ($llmResult['reason'] ?? 'not configured') . '.', 'warning');
|
||||
} elseif (!empty($llmResult['error'])) {
|
||||
$trace[] = $this->trace('Pass 2 — LLM semantic scan', 'Skipped due to error: ' . dbnToolsExcerpt($llmResult['error'], 100) . '.', 'warning');
|
||||
} else {
|
||||
@@ -391,7 +420,8 @@ PROMPT;
|
||||
if ($original === '' || str_starts_with($original, '[')) {
|
||||
continue;
|
||||
}
|
||||
if (!preg_match('/^\[[A-Za-z0-9_\- ]+\]$/', $tag)) {
|
||||
// Allow [ROLE: Name] format when keepOfficials is on, else require plain bracket tag
|
||||
if (!preg_match('/^\[[A-Za-z0-9_\- ]+(?::\s*[^\]]+)?\]$/', $tag)) {
|
||||
$tag = '[IDENTIFIER]';
|
||||
}
|
||||
if (str_contains($finalRedacted, $original)) {
|
||||
@@ -405,12 +435,24 @@ PROMPT;
|
||||
? "{$applied} additional: " . implode(', ', array_map(fn($k, $v) => "{$k}: {$v}", array_keys($pass2Counts), $pass2Counts))
|
||||
: 'no additional entities found';
|
||||
|
||||
$trace[] = $this->trace('Pass 2 — LLM semantic scan', "Azure reviewed pre-redacted text for names, orgs, and places. {$pass2Detail}.", 'complete');
|
||||
$trace[] = $this->trace('Pass 2 — LLM semantic scan', "{$engineLabel} reviewed pre-redacted text for names, orgs, and places. {$pass2Detail}.", 'complete');
|
||||
}
|
||||
|
||||
// Apply output format post-processing
|
||||
$allCounts = array_merge($pass1Counts, $pass2Counts);
|
||||
if ($outputFormat === 'generic') {
|
||||
$finalRedacted = $this->applyGenericTags($finalRedacted);
|
||||
} elseif ($outputFormat === 'pseudonym') {
|
||||
$finalRedacted = $this->applyPseudonymization($finalRedacted, $allCounts);
|
||||
}
|
||||
|
||||
$allCounts = array_merge($pass1Counts, $pass2Counts);
|
||||
$categories = array_keys(array_filter($allCounts, fn($v): bool => $v > 0));
|
||||
|
||||
$trace[] = $this->trace('Output format', match ($outputFormat) {
|
||||
'generic' => 'All identifiers normalised to generic tags ([PERSON], [ORG], etc.).',
|
||||
'pseudonym' => 'Identifiers replaced with plausible pseudonymous values.',
|
||||
default => 'Contextual role tags used (e.g. [FATHER], [JUDGE: Name]).',
|
||||
}, 'complete');
|
||||
$trace[] = $this->trace('Uncertainty / missing evidence', 'Human review recommended for contextual identification and unusual formatting.', 'warning');
|
||||
$trace[] = $this->trace('Next practical step', 'Review the output and rerun in strict mode if the text will be shared broadly.', 'complete');
|
||||
|
||||
@@ -418,7 +460,9 @@ PROMPT;
|
||||
'tool' => 'redact',
|
||||
'mode' => $mode,
|
||||
'region' => $region,
|
||||
'what_we_found' => "Applied {$region} pattern pack" . ($llmDeployment ? ' and LLM semantic scan' : '') . '.',
|
||||
'engine_used' => $engineLabel,
|
||||
'output_format' => $outputFormat,
|
||||
'what_we_found' => "Applied {$region} pattern pack" . ($llmDeployment || $engine === 'gpu' ? " and {$engineLabel} semantic scan" : '') . '.',
|
||||
'redacted_text' => $finalRedacted,
|
||||
'detected_entity_categories' => $categories,
|
||||
'entity_counts' => $allCounts,
|
||||
@@ -429,7 +473,7 @@ PROMPT;
|
||||
'trace_metadata' => [
|
||||
'chunk_count' => 1,
|
||||
'source_count' => 1,
|
||||
'deployment' => $llmDeployment,
|
||||
'deployment' => $llmDeployment ?? $engineLabel,
|
||||
],
|
||||
'disclaimer' => 'Privacy support tool. Review before disclosure.',
|
||||
];
|
||||
@@ -793,15 +837,32 @@ PROMPT;
|
||||
]);
|
||||
}
|
||||
|
||||
private function llmRedactionPass(string $preRedacted, string $language = 'en', array $aliases = []): array
|
||||
{
|
||||
$missing = $this->azure->missingChatConfig();
|
||||
if ($missing) {
|
||||
return ['skipped' => true, 'reason' => 'Azure chat not configured (' . implode(', ', $missing) . ')'];
|
||||
private function llmRedactionPass(
|
||||
string $preRedacted,
|
||||
string $language = 'en',
|
||||
array $aliases = [],
|
||||
string $engine = 'azure_mini',
|
||||
bool $keepOfficials = false,
|
||||
array $exemptNames = [],
|
||||
bool $doNames = true,
|
||||
bool $doOrgs = true,
|
||||
bool $doPlaces = true,
|
||||
bool $doDob = true
|
||||
): array {
|
||||
if ($engine === 'regex') {
|
||||
return ['skipped' => true, 'reason' => 'Regex-only mode selected'];
|
||||
}
|
||||
|
||||
if ($engine !== 'gpu') {
|
||||
$missing = $this->azure->missingChatConfig();
|
||||
if ($missing) {
|
||||
return ['skipped' => true, 'reason' => 'Azure chat not configured (' . implode(', ', $missing) . ')'];
|
||||
}
|
||||
}
|
||||
|
||||
$languageNote = $language === 'no' ? "\n • The document may contain Norwegian or mixed-language content." : '';
|
||||
|
||||
// Build alias block
|
||||
$aliasBlock = '';
|
||||
if (!empty($aliases)) {
|
||||
$lines = [];
|
||||
@@ -817,6 +878,32 @@ PROMPT;
|
||||
}
|
||||
}
|
||||
|
||||
// Build exempt names block
|
||||
$exemptBlock = '';
|
||||
if (!empty($exemptNames)) {
|
||||
$quoted = array_map(fn($n) => '"' . str_replace(['"', "\n"], ['\\"', ' '], $n) . '"', array_slice($exemptNames, 0, 20));
|
||||
$exemptBlock = "\n\nEXEMPT NAMES — these names must NOT be redacted under any circumstances:\n " . implode(', ', $quoted);
|
||||
}
|
||||
|
||||
// Build entity-type restriction note
|
||||
$skipTypes = [];
|
||||
if (!$doOrgs) $skipTypes[] = 'organisation names';
|
||||
if (!$doPlaces) $skipTypes[] = 'place names';
|
||||
if (!$doDob) $skipTypes[] = 'dates of birth';
|
||||
if (!$doNames) $skipTypes[] = 'person names';
|
||||
$skipNote = $skipTypes ? "\n\nSKIP these entity types — do NOT redact them: " . implode(', ', $skipTypes) . '.' : '';
|
||||
|
||||
// Build officials note
|
||||
$officialsNote = '';
|
||||
if ($keepOfficials) {
|
||||
$officialsNote = "\n\nOFFICIALS — for persons identified as JUDGE, EXPERT_WITNESS, or CASEWORKER in an official capacity: do NOT replace their name with a plain bracket tag. Instead use the format [ROLE: Name], e.g. [JUDGE: Andersen] or [EXPERT_WITNESS: Dr. Larsen]. Their name must remain visible inside the tag.";
|
||||
}
|
||||
|
||||
$allowedTypesNote = '';
|
||||
if (!$doNames) {
|
||||
$allowedTypesNote = "\n\nDo NOT include person_name entries in your output.";
|
||||
}
|
||||
|
||||
$system = <<<PROMPT
|
||||
You are a privacy redaction assistant for legal documents (ECHR judgements, Norwegian family law cases, EU child welfare documents). The text below has already had mechanical identifiers replaced with placeholder tags in [BRACKETS].
|
||||
|
||||
@@ -827,7 +914,7 @@ Assign each person a consistent contextual tag used for every occurrence of thei
|
||||
• Family roles: FATHER, MOTHER, CHILD, CHILD_1, CHILD_2, GRANDPARENT, SIBLING
|
||||
• Professional roles: ATTORNEY, JUDGE, CASEWORKER, EXPERT_WITNESS
|
||||
• Generic fallback: PERSON_1, PERSON_2 (use only when role cannot be determined)
|
||||
The same individual MUST receive the same tag every time they appear.{$aliasBlock}
|
||||
The same individual MUST receive the same tag every time they appear.{$aliasBlock}{$exemptBlock}{$officialsNote}{$skipNote}{$allowedTypesNote}
|
||||
|
||||
Return ONLY a valid JSON object:
|
||||
{"redactions":[{"original":"exact text as it appears","type":"person_name","tag":"[FATHER]"}]}
|
||||
@@ -848,16 +935,23 @@ Rules:
|
||||
• Short common words, conjunctions, and prepositions are NOT PII.{$languageNote}
|
||||
PROMPT;
|
||||
|
||||
$messages = [
|
||||
['role' => 'system', 'content' => $system],
|
||||
['role' => 'user', 'content' => $preRedacted],
|
||||
];
|
||||
$chatOptions = ['temperature' => 0.1, 'max_tokens' => 8000, 'json' => true, 'timeout' => 90];
|
||||
|
||||
try {
|
||||
$response = $this->azure->chat([
|
||||
['role' => 'system', 'content' => $system],
|
||||
['role' => 'user', 'content' => $preRedacted],
|
||||
], [
|
||||
'temperature' => 0.1,
|
||||
'max_tokens' => 8000,
|
||||
'json' => true,
|
||||
'timeout' => 90,
|
||||
]);
|
||||
if ($engine === 'gpu') {
|
||||
$response = $this->callGpuLlm($messages, $chatOptions);
|
||||
$deployLabel = 'GPU (cuttlefish)';
|
||||
} elseif ($engine === 'azure_full') {
|
||||
$response = $this->azure->withDeployment('gpt-4o')->chat($messages, $chatOptions);
|
||||
$deployLabel = 'gpt-4o';
|
||||
} else {
|
||||
$response = $this->azure->chat($messages, $chatOptions);
|
||||
$deployLabel = $this->azure->chatDeployment();
|
||||
}
|
||||
|
||||
$content = (string)($response['choices'][0]['message']['content'] ?? '');
|
||||
$json = $this->azure->decodeJsonObject($content);
|
||||
@@ -869,7 +963,7 @@ PROMPT;
|
||||
return [
|
||||
'skipped' => false,
|
||||
'entities' => is_array($json['redactions']) ? $json['redactions'] : [],
|
||||
'deployment' => $this->azure->chatDeployment(),
|
||||
'deployment' => $deployLabel,
|
||||
];
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN tools LLM redaction pass failed: ' . $e->getMessage());
|
||||
@@ -877,6 +971,150 @@ PROMPT;
|
||||
}
|
||||
}
|
||||
|
||||
private function callGpuLlm(array $messages, array $options = []): array
|
||||
{
|
||||
$url = 'http://10.0.1.10:4000/v1/chat/completions';
|
||||
$apiKey = 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d';
|
||||
$model = 'qwen2.5:14b';
|
||||
$timeout = (int)($options['timeout'] ?? 90);
|
||||
|
||||
$payload = [
|
||||
'model' => $model,
|
||||
'messages' => $messages,
|
||||
'temperature' => $options['temperature'] ?? 0.1,
|
||||
'max_tokens' => $options['max_tokens'] ?? 8000,
|
||||
];
|
||||
if (!empty($options['json'])) {
|
||||
$payload['response_format'] = ['type' => 'json_object'];
|
||||
}
|
||||
|
||||
$body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
$headers = [
|
||||
'Content-Type: application/json',
|
||||
'Authorization: Bearer ' . $apiKey,
|
||||
];
|
||||
|
||||
if (function_exists('curl_init')) {
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_POST => true,
|
||||
CURLOPT_POSTFIELDS => $body,
|
||||
CURLOPT_HTTPHEADER => $headers,
|
||||
CURLOPT_TIMEOUT => $timeout,
|
||||
]);
|
||||
$response = curl_exec($ch);
|
||||
$code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
||||
$err = curl_error($ch);
|
||||
curl_close($ch);
|
||||
|
||||
if ($response === false) {
|
||||
throw new RuntimeException('GPU LiteLLM request failed: ' . $err);
|
||||
}
|
||||
} else {
|
||||
$ctx = stream_context_create(['http' => [
|
||||
'method' => 'POST',
|
||||
'header' => implode("\r\n", $headers),
|
||||
'content' => $body,
|
||||
'timeout' => $timeout,
|
||||
'ignore_errors' => true,
|
||||
]]);
|
||||
$response = @file_get_contents($url, false, $ctx);
|
||||
$code = 0;
|
||||
if (isset($http_response_header[0]) && preg_match('/\s(\d{3})\s/', $http_response_header[0], $m)) {
|
||||
$code = (int)$m[1];
|
||||
}
|
||||
if ($response === false) {
|
||||
throw new RuntimeException('GPU LiteLLM request failed.');
|
||||
}
|
||||
}
|
||||
|
||||
$decoded = json_decode($response, true);
|
||||
if (!is_array($decoded)) {
|
||||
throw new RuntimeException('GPU LiteLLM returned non-JSON response.');
|
||||
}
|
||||
if ($code < 200 || $code >= 300) {
|
||||
$msg = $decoded['error']['message'] ?? ('HTTP ' . $code);
|
||||
throw new RuntimeException('GPU LiteLLM error: ' . $msg);
|
||||
}
|
||||
return $decoded;
|
||||
}
|
||||
|
||||
private function applyGenericTags(string $text): string
|
||||
{
|
||||
// Collapse contextual role tags (e.g. [FATHER], [JUDGE: Andersen], [CHILD_1]) → [PERSON]
|
||||
$text = preg_replace('/\[(?:FATHER|MOTHER|CHILD(?:_\d+)?|GRANDPARENT|SIBLING|ATTORNEY|JUDGE(?::\s*[^\]]+)?|CASEWORKER(?::\s*[^\]]+)?|EXPERT_WITNESS(?::\s*[^\]]+)?|PERSON(?:_\d+)?)\]/u', '[PERSON]', $text) ?? $text;
|
||||
return $text;
|
||||
}
|
||||
|
||||
private function applyPseudonymization(string $text, array $allCounts): string
|
||||
{
|
||||
$norwegianNames = [
|
||||
'Ola Nordmann', 'Per Hansen', 'Kari Larsen', 'Anne Berg', 'Erik Dahl',
|
||||
'Ingrid Holm', 'Lars Moen', 'Silje Bakke', 'Tor Haugen', 'Eva Strand',
|
||||
];
|
||||
$nameCursor = 0;
|
||||
$phoneBase = 1;
|
||||
$emailCursor = 0;
|
||||
$addrCursor = 1;
|
||||
$orgCursor = 1;
|
||||
$personMap = [];
|
||||
|
||||
// Replace named role tags (keeping consistent mapping per unique tag)
|
||||
$text = preg_replace_callback(
|
||||
'/\[(FATHER|MOTHER|CHILD(?:_\d+)?|GRANDPARENT|SIBLING|ATTORNEY|JUDGE(?::\s*[^\]]+)?|CASEWORKER(?::\s*[^\]]+)?|EXPERT_WITNESS(?::\s*[^\]]+)?|PERSON(?:_\d+)?)\]/u',
|
||||
function (array $m) use (&$nameCursor, &$personMap, $norwegianNames): string {
|
||||
$key = $m[1];
|
||||
if (!isset($personMap[$key])) {
|
||||
$personMap[$key] = $norwegianNames[$nameCursor % count($norwegianNames)];
|
||||
$nameCursor++;
|
||||
}
|
||||
return $personMap[$key];
|
||||
},
|
||||
$text
|
||||
) ?? $text;
|
||||
|
||||
$text = preg_replace_callback('/\[PHONE\]/', function () use (&$phoneBase): string {
|
||||
return sprintf('+47 400 00 %03d', $phoneBase++);
|
||||
}, $text) ?? $text;
|
||||
|
||||
$text = preg_replace_callback('/\[EMAIL\]/', function () use (&$emailCursor): string {
|
||||
$letter = chr(ord('a') + ($emailCursor % 26));
|
||||
$emailCursor++;
|
||||
return "person.{$letter}@example.no";
|
||||
}, $text) ?? $text;
|
||||
|
||||
$text = preg_replace_callback('/\[ADDRESS\]/', function () use (&$addrCursor): string {
|
||||
return "Eksempelveien {$addrCursor}, 0001 Oslo";
|
||||
}, $text) ?? $text;
|
||||
|
||||
$text = preg_replace_callback('/\[ORG\]/', function () use (&$orgCursor): string {
|
||||
return "Eksempel AS ({$orgCursor})";
|
||||
}, $text) ?? $text;
|
||||
|
||||
$text = preg_replace_callback('/\[FNR\]/', function (): string {
|
||||
return '010100XXXXX';
|
||||
}, $text) ?? $text;
|
||||
|
||||
$text = preg_replace_callback('/\[(?:SE_PERSONNUMMER|FR_INSEE|UK_NI|SSN|NAT_ID|DOC_NO|ECHR_APP_NO)\]/', function (): string {
|
||||
return '[ID-REDACTED]';
|
||||
}, $text) ?? $text;
|
||||
|
||||
$text = preg_replace_callback('/\[PLACE\]/', function (): string {
|
||||
return 'Eksempelby';
|
||||
}, $text) ?? $text;
|
||||
|
||||
$text = preg_replace_callback('/\[DOB\]/', function (): string {
|
||||
return '01.01.0000';
|
||||
}, $text) ?? $text;
|
||||
|
||||
$text = preg_replace_callback('/\[IBAN\]/', function (): string {
|
||||
return 'NO00 0000 00 00000';
|
||||
}, $text) ?? $text;
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
private function uncertaintySummary(mixed $uncertainty): string
|
||||
{
|
||||
if (is_array($uncertainty)) {
|
||||
|
||||
+120
-1
@@ -6,5 +6,124 @@ $toolKind = 'Redaction Assistant';
|
||||
$toolBadge = 'deterministic first';
|
||||
require_once __DIR__ . '/includes/layout.php';
|
||||
?>
|
||||
<?php require_once __DIR__ . '/includes/tool_form.php'; ?>
|
||||
<form id="toolForm" class="tool-form">
|
||||
|
||||
<div class="lang-switcher" id="redactLangSwitcher" role="group" aria-label="UI language">
|
||||
<button type="button" class="lang-btn is-active" data-lang="en">🇬🇧 EN</button>
|
||||
<button type="button" class="lang-btn" data-lang="no">🇳🇴 NO</button>
|
||||
<button type="button" class="lang-btn" data-lang="uk">🇺🇦 UK</button>
|
||||
<button type="button" class="lang-btn" data-lang="pl">🇵🇱 PL</button>
|
||||
</div>
|
||||
|
||||
<div class="control-row" id="redactEngineControl">
|
||||
<span class="control-label" data-i18n="redactEngine">Engine</span>
|
||||
<label><input type="radio" name="redactEngine" value="azure_mini" checked id="redactEngineAzureMini"> <span data-i18n="redactEngineAzureMini">Azure gpt-4o-mini</span> ★ <small class="control-hint">(fast)</small></label>
|
||||
<label><input type="radio" name="redactEngine" value="azure_full" id="redactEngineAzureFull"> <span data-i18n="redactEngineAzureFull">Azure gpt-4o</span> <small class="control-hint">(best)</small></label>
|
||||
<label><input type="radio" name="redactEngine" value="gpu" id="redactEngineGpu"> <span data-i18n="redactEngineGpu">GPU (cuttlefish)</span> <small class="control-hint">(local)</small></label>
|
||||
<label><input type="radio" name="redactEngine" value="regex" id="redactEngineRegex"> <span data-i18n="redactEngineRegex">Regex only</span> <small class="control-hint">(free)</small></label>
|
||||
</div>
|
||||
<p class="upload-hint" data-i18n="redactEngineHint">Azure engines use your BNL Azure credits. GPU runs the local LiteLLM proxy. Regex-only is instant and free but finds no names or organisations.</p>
|
||||
|
||||
<div class="control-row" id="redactModeControl">
|
||||
<span class="control-label" data-i18n="redactMode">Mode</span>
|
||||
<label><input type="radio" name="redactionMode" value="standard" checked> <span data-i18n="redactModeStandard">Standard</span></label>
|
||||
<label><input type="radio" name="redactionMode" value="strict"> <span data-i18n="redactModeStrict">Strict</span></label>
|
||||
</div>
|
||||
<p class="upload-hint" data-i18n="redactModeHint">Standard: regex patterns + LLM scan for names/orgs/places. Strict: also replaces any capitalised two-word phrase as a potential name — more aggressive, may produce false positives.</p>
|
||||
|
||||
<div class="control-row" id="redactRegionControl">
|
||||
<span class="control-label" data-i18n="redactRegion">Region</span>
|
||||
<label><input type="radio" name="redactionRegion" value="nordic" checked> <span data-i18n="redactRegionNordic">Nordic</span></label>
|
||||
<label><input type="radio" name="redactionRegion" value="european"> <span data-i18n="redactRegionEuropean">European</span></label>
|
||||
<label><input type="radio" name="redactionRegion" value="echr"> <span data-i18n="redactRegionEchr">ECHR</span></label>
|
||||
<label><input type="radio" name="redactionRegion" value="global"> <span data-i18n="redactRegionGlobal">Global</span></label>
|
||||
</div>
|
||||
<p class="upload-hint" data-i18n="redactRegionHint">Nordic: Norwegian fødselsnummer, phone, email, addresses. European: adds IBAN, SE personnummer, UK NI. ECHR: adds application numbers, DOB phrases. Global: adds US SSN, document numbers.</p>
|
||||
|
||||
<div class="control-row entity-toggles" id="redactEntityControl">
|
||||
<span class="control-label" data-i18n="redactEntities">Redact</span>
|
||||
<label><input type="checkbox" name="redactNames" id="redactNames" checked> <span data-i18n="redactEntityNames">Names</span></label>
|
||||
<label><input type="checkbox" name="redactOrgs" id="redactOrgs" checked> <span data-i18n="redactEntityOrgs">Organisations</span></label>
|
||||
<label><input type="checkbox" name="redactPlaces" id="redactPlaces" checked> <span data-i18n="redactEntityPlaces">Places</span></label>
|
||||
<label><input type="checkbox" name="redactDob" id="redactDob" checked> <span data-i18n="redactEntityDob">Dates of birth</span></label>
|
||||
</div>
|
||||
|
||||
<div class="control-row" id="redactOfficialsControl">
|
||||
<span class="control-label" data-i18n="redactOfficials">Officials</span>
|
||||
<label><input type="checkbox" name="keepOfficials" id="keepOfficialsCheck"> <span data-i18n="redactKeepOfficials">Keep official names (judges, experts)</span></label>
|
||||
</div>
|
||||
<p class="upload-hint" data-i18n="redactOfficialsHint">When checked, judges, expert witnesses and caseworkers keep their names in a labelled tag: [JUDGE: Andersen]. Uncheck to replace all names with generic role tags.</p>
|
||||
|
||||
<div class="control-row" id="redactOutputControl">
|
||||
<span class="control-label" data-i18n="redactOutput">Output</span>
|
||||
<label><input type="radio" name="outputFormat" value="contextual" checked> <span data-i18n="redactOutputContextual">Contextual tags</span> ★ <small class="control-hint">[FATHER], [JUDGE: Name]</small></label>
|
||||
<label><input type="radio" name="outputFormat" value="generic"> <span data-i18n="redactOutputGeneric">Generic tags</span> <small class="control-hint">[PERSON], [ORG]</small></label>
|
||||
<label><input type="radio" name="outputFormat" value="pseudonym"> <span data-i18n="redactOutputPseudo">Pseudonyms</span> <small class="control-hint">Ola Nordmann, +47 400 00 001</small></label>
|
||||
</div>
|
||||
<p class="upload-hint" data-i18n="redactOutputHint">Contextual: each person gets a role tag so their identity is traceable within the document. Generic: all names become [PERSON]. Pseudonyms: replaced with plausible fake Norwegian values.</p>
|
||||
|
||||
<div class="exempt-section" id="exemptSection">
|
||||
<div class="alias-header">
|
||||
<span class="control-label" data-i18n="redactExempt">Exempt names</span>
|
||||
<button type="button" id="addExemptRow" class="alias-add-btn">+ <span data-i18n="redactExemptAdd">Add</span></button>
|
||||
</div>
|
||||
<div id="exemptRows"></div>
|
||||
<p class="alias-hint" data-i18n="redactExemptHint">Names listed here will never be redacted, even if the AI would otherwise remove them — e.g. a judge or expert who must remain identifiable.</p>
|
||||
</div>
|
||||
|
||||
<div class="alias-section" id="aliasSection">
|
||||
<div class="alias-header">
|
||||
<span class="control-label" data-i18n="redactAliases">Name aliases</span>
|
||||
<button type="button" id="addAliasRow" class="alias-add-btn">+ <span data-i18n="redactAliasAdd">Add</span></button>
|
||||
</div>
|
||||
<div id="aliasRows"></div>
|
||||
<p class="alias-hint" data-i18n="redactAliasHint">Replace a specific name with a custom bracketed label, e.g. “David Jr” → [Junior].</p>
|
||||
</div>
|
||||
|
||||
<div class="upload-zone" id="uploadZone" role="region" aria-label="File upload" data-i18n-aria="redactUploadAria">
|
||||
<input type="file" id="uploadInput" multiple accept=".pdf,.docx,.txt" aria-label="Choose files">
|
||||
<div id="uploadPrompt" class="upload-prompt">
|
||||
<span class="upload-icon" aria-hidden="true">⇧</span>
|
||||
<p><span data-i18n="redactUploadDrop">Drop up to 5 files here, or</span> <label for="uploadInput" class="upload-browse" data-i18n="redactUploadBrowse">browse</label></p>
|
||||
<p class="upload-hint"><strong>PDF</strong>, <strong>DOCX</strong>, <strong>TXT</strong> — <span data-i18n="redactUploadHint">text extracted in memory, never stored</span></p>
|
||||
</div>
|
||||
<div id="uploadFileInfo" class="upload-file is-hidden">
|
||||
<ul id="uploadFileList" class="upload-file-list"></ul>
|
||||
<button type="button" id="uploadClear" class="upload-clear" data-i18n="redactUploadClear">× Clear</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<label class="input-label" for="toolInput" id="inputLabel" data-i18n="redactInputLabel">Pasted text</label>
|
||||
<textarea id="toolInput" name="toolInput" rows="10" required data-i18n-placeholder="redactInputPlaceholder" placeholder="Paste text containing names, phone numbers, emails, addresses, or national ID numbers."></textarea>
|
||||
|
||||
<div class="form-footer">
|
||||
<p id="toolStatus" class="form-status" role="status" aria-live="polite"></p>
|
||||
<button id="runButton" type="submit" data-i18n="redactRun">Run</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<section id="results" class="results" aria-live="polite">
|
||||
<div class="empty-state">
|
||||
<h3 data-i18n="redactReadyTitle">Ready</h3>
|
||||
<p data-i18n="redactReadyDesc">Paste text or upload a file, configure redaction options, then run.</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Hidden stubs so tools.js element refs don't crash on this page -->
|
||||
<div class="is-hidden" id="languageControl" aria-hidden="true"><input type="radio" name="language" value="en" checked></div>
|
||||
<div class="is-hidden" id="redactionControl" aria-hidden="true"></div>
|
||||
<div class="is-hidden" id="audioZone" aria-hidden="true">
|
||||
<input type="file" id="audioInput" style="display:none">
|
||||
<div id="audioPrompt"></div>
|
||||
<div id="audioFileInfo"><ol id="audioQueueList"></ol><button type="button" id="audioClear"></button></div>
|
||||
</div>
|
||||
<div class="is-hidden" id="diarizeControl" aria-hidden="true">
|
||||
<input type="checkbox" id="diarizeCheck">
|
||||
<input type="number" id="numSpeakersInput">
|
||||
</div>
|
||||
<div class="is-hidden" id="transcribeLangControl" aria-hidden="true"><input type="radio" name="transcribeLang" value="no" checked></div>
|
||||
<div class="is-hidden" id="vocabControl" aria-hidden="true">
|
||||
<div id="vocabPresets"></div>
|
||||
<textarea id="initPromptInput"></textarea>
|
||||
</div>
|
||||
<?php require_once __DIR__ . '/includes/layout_footer.php'; ?>
|
||||
|
||||
Reference in New Issue
Block a user