Files
daveadmin b84827ecea Add Transcribe docs (about/guide/tech) + refresh Redact docs
- New: transcribe-about.php, transcribe-guide.php, transcribe-tech.php
  with full en/no/uk/pl translations (3-engine cascade, diarization, vocab)
- New: translations/transcribe-about|guide|tech.php (4-lang strings)
- New: scripts/translate-pages.php (Azure gpt-4o CLI translation helper)
- Add korr-doc-links nav to transcribe.php
- Refresh redact-about|guide|tech.php — point to assets/images/redact/
- Fix all "never written to disk" wording in redact translations
- Add Min Sak/corpus save workflow to redact guide and tech privacy section
- redact.php upload hint: correct in-memory wording

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 10:28:06 +02:00

320 lines
14 KiB
PHP

<?php
declare(strict_types=1);
require_once __DIR__ . '/includes/bootstrap.php';
$uiLang = dbnToolsCurrentLanguage();
$isAuthed = dbnToolsIsAuthenticated();
$langPath = '/redact-tech.php';
$toolsLogin = 'https://dobetternorge.no/tools-login.php?return=' . urlencode('/redact.php');
$registerUrl = 'https://dobetternorge.no/register.php';
$_pt = require __DIR__ . '/translations/redact-tech.php';
$t = $_pt[$uiLang] ?? $_pt['en'];
?>
<!doctype html>
<html lang="<?= htmlspecialchars($uiLang) ?>">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>How Redact works — Two-pass pipeline, regional patterns, entity classification</title>
<meta name="description" content="Technical deep-dive: how Redact uses a two-pass pipeline combining deterministic regex patterns with GPT-4o entity recognition to anonymise Norwegian legal documents.">
<meta name="robots" content="index, follow">
<link rel="canonical" href="https://tools.dobetternorge.no/redact-tech.php">
<meta property="og:title" content="How Redact works — Two-pass redaction pipeline">
<meta property="og:description" content="Pass 1: regex pattern matching. Pass 2: LLM entity sweep. Regional rule sets, output format generation, alias substitution.">
<meta name="theme-color" content="#00205B">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Crimson+Pro:wght@400;600;700&family=IBM+Plex+Sans:wght@400;500;600;700&family=IBM+Plex+Mono:wght@400;500&display=swap">
<link rel="stylesheet" href="assets/css/tools.css">
<link rel="stylesheet" href="assets/css/dbn-tools-redesign.css">
</head>
<body class="kdoc-page">
<header class="lt-nav">
<a href="https://dobetternorge.no" class="lt-nav__brand">
<picture>
<source srcset="assets/images/logo-header.webp" type="image/webp">
<img class="lt-nav__logo" src="assets/images/logo-header.png" alt="Do Better Norge" width="140" height="36" loading="eager">
</picture>
<span class="lt-nav__badge">Legal Tools</span>
</a>
<div class="lt-nav__right">
<nav class="shell-lang-switcher" aria-label="Language">
<?php foreach (dbnToolsSupportedLanguages() as $langCode): ?>
<a href="<?= htmlspecialchars($langPath . '?lang=' . $langCode) ?>" class="<?= $langCode === $uiLang ? 'is-active' : '' ?>"><?= htmlspecialchars(dbnToolsLanguageLabel($langCode)) ?></a>
<?php endforeach; ?>
</nav>
<?php if ($isAuthed): ?>
<a href="/redact.php" class="lt-nav__cta lt-nav__cta--enter"><?= htmlspecialchars($t['nav_open']) ?></a>
<?php else: ?>
<a href="<?= htmlspecialchars($toolsLogin) ?>" class="lt-nav__cta"><?= htmlspecialchars($t['nav_signin']) ?></a>
<?php endif; ?>
</div>
</header>
<nav class="kdoc-doc-nav" aria-label="Redact documentation">
<div class="kdoc-doc-nav__inner">
<a href="/redact-about.php"><?= htmlspecialchars($t['nav_about']) ?></a>
<a href="/redact-guide.php"><?= htmlspecialchars($t['nav_guide']) ?></a>
<a href="/redact-tech.php" class="is-active"><?= htmlspecialchars($t['nav_howit']) ?></a>
<?php if ($isAuthed): ?><a href="/redact.php"><?= htmlspecialchars($t['nav_opentool']) ?></a><?php endif; ?>
</div>
</nav>
<!-- Hero -->
<section class="kdoc-hero" style="background: linear-gradient(rgba(5,15,40,0.85),rgba(5,15,40,0.92)), url('assets/images/redact/hero-tech.png') center/cover no-repeat;">
<div class="kdoc-hero__inner">
<p class="kdoc-hero__kicker"><?= htmlspecialchars($t['hero_kicker']) ?></p>
<h1 class="kdoc-hero__title"><?= htmlspecialchars($t['hero_title']) ?></h1>
<p class="kdoc-hero__sub"><?= htmlspecialchars($t['hero_sub']) ?></p>
<div class="kdoc-hero__stats">
<div class="kdoc-hero__stat">
<strong>2</strong>
<span><?= htmlspecialchars($t['stat1']) ?></span>
</div>
<div class="kdoc-hero__stat">
<strong>4</strong>
<span><?= htmlspecialchars($t['stat2']) ?></span>
</div>
<div class="kdoc-hero__stat">
<strong>3</strong>
<span><?= htmlspecialchars($t['stat3']) ?></span>
</div>
<div class="kdoc-hero__stat">
<strong>2</strong>
<span><?= htmlspecialchars($t['stat4']) ?></span>
</div>
</div>
</div>
</section>
<!-- Architecture overview -->
<div class="kdoc-section">
<p class="kdoc-section__eyebrow"><?= htmlspecialchars($t['arch_eyebrow']) ?></p>
<h2 class="kdoc-section__title"><?= htmlspecialchars($t['arch_title']) ?></h2>
<p class="kdoc-section__sub"><?= htmlspecialchars($t['arch_sub']) ?></p>
<div class="kdoc-pipeline">
<div class="kdoc-pipeline__pass">
<span class="kdoc-pipeline__pass-badge kdoc-pipeline__pass-badge--mini">Pass 1 &middot; PHP / regex</span>
<h3 class="kdoc-pipeline__pass-title"><?= htmlspecialchars($t['pass1_title']) ?></h3>
<p class="kdoc-pipeline__pass-body"><?= $t['pass1_p1_html'] ?></p>
<ul>
<li><?= $t['pass1_li1_html'] ?></li>
<li><?= htmlspecialchars($t['pass1_li2']) ?></li>
<li><?= htmlspecialchars($t['pass1_li3']) ?></li>
<li><?= htmlspecialchars($t['pass1_li4']) ?></li>
<li><?= htmlspecialchars($t['pass1_li5']) ?></li>
</ul>
<p class="kdoc-pipeline__pass-body" style="margin-top:0.7rem;"><?= htmlspecialchars($t['pass1_p2']) ?></p>
</div>
<div class="kdoc-pipeline__arrow-down" aria-hidden="true">&rarr;</div>
<div class="kdoc-pipeline__pass">
<span class="kdoc-pipeline__pass-badge">Pass 2 &middot; gpt-4o-mini / gpt-4o</span>
<h3 class="kdoc-pipeline__pass-title"><?= htmlspecialchars($t['pass2_title']) ?></h3>
<p class="kdoc-pipeline__pass-body"><?= $t['pass2_p1_html'] ?></p>
<ul>
<li><?= htmlspecialchars($t['pass2_li1']) ?></li>
<li><?= htmlspecialchars($t['pass2_li2']) ?></li>
<li><?= htmlspecialchars($t['pass2_li3']) ?></li>
<li><?= htmlspecialchars($t['pass2_li4']) ?></li>
</ul>
<p class="kdoc-pipeline__pass-body" style="margin-top:0.7rem;"><?= htmlspecialchars($t['pass2_p2']) ?></p>
</div>
<div class="kdoc-pipeline__arrow-down" aria-hidden="true">&rarr;</div>
<div class="kdoc-pipeline__pass">
<span class="kdoc-pipeline__pass-badge kdoc-pipeline__pass-badge--optional">Pass 3 &middot; PHP post-processor</span>
<h3 class="kdoc-pipeline__pass-title"><?= htmlspecialchars($t['pass3_title']) ?></h3>
<p class="kdoc-pipeline__pass-body"><?= $t['pass3_p1_html'] ?></p>
<ul>
<li><?= $t['pass3_f1_html'] ?></li>
<li><?= $t['pass3_f2_html'] ?></li>
<li><?= $t['pass3_f3_html'] ?></li>
<li><?= $t['pass3_f4_html'] ?></li>
</ul>
<p class="kdoc-pipeline__pass-body" style="margin-top:0.7rem;"><?= $t['pass3_p2_html'] ?></p>
</div>
</div>
</div>
<!-- Regional patterns -->
<section class="kdoc-section--alt">
<div class="kdoc-section">
<p class="kdoc-section__eyebrow"><?= htmlspecialchars($t['date_eyebrow']) ?></p>
<h2 class="kdoc-section__title"><?= htmlspecialchars($t['date_title']) ?></h2>
<p class="kdoc-section__sub"><?= htmlspecialchars($t['date_sub']) ?></p>
<table class="kdoc-table">
<thead>
<tr>
<th><?= htmlspecialchars($t['th_region']) ?></th>
<th><?= htmlspecialchars($t['th_patterns']) ?></th>
<th><?= htmlspecialchars($t['th_notes']) ?></th>
</tr>
</thead>
<tbody>
<tr>
<td>Nordic &#9733;</td>
<td>Fødselsnummer, D-number, +47 phone, email, Norwegian address</td>
<td><?= htmlspecialchars($t['rn1']) ?></td>
</tr>
<tr>
<td>European</td>
<td>+ IBAN, Swedish personnummer, Danish CPR, Finnish HETU, UK NI</td>
<td><?= htmlspecialchars($t['rn2']) ?></td>
</tr>
<tr>
<td>ECHR</td>
<td>+ ECHR application numbers, DOB phrases, ECtHR case references</td>
<td><?= htmlspecialchars($t['rn3']) ?></td>
</tr>
<tr>
<td>Global</td>
<td>+ US SSN, driver's licence formats, generic document numbers</td>
<td><?= htmlspecialchars($t['rn4']) ?></td>
</tr>
</tbody>
</table>
</div>
</section>
<!-- Classification schema -->
<div class="kdoc-section">
<p class="kdoc-section__eyebrow"><?= htmlspecialchars($t['class_eyebrow']) ?></p>
<h2 class="kdoc-section__title"><?= htmlspecialchars($t['class_title']) ?></h2>
<h3 style="font-family:'Crimson Pro',serif; font-size:1.15rem; font-weight:700; margin:0 0 0.8rem; color:var(--dbn-blue);"><?= htmlspecialchars($t['class_h1']) ?></h3>
<table class="kdoc-table">
<thead>
<tr>
<th><?= htmlspecialchars($t['th_entity']) ?></th>
<th><?= htmlspecialchars($t['th_definition']) ?></th>
<th><?= htmlspecialchars($t['th_output']) ?></th>
</tr>
</thead>
<tbody>
<tr>
<td><code>person</code></td>
<td><?= htmlspecialchars($t['et1_def']) ?></td>
<td><?= htmlspecialchars($t['et1_out']) ?></td>
</tr>
<tr>
<td><code>organisation</code></td>
<td><?= htmlspecialchars($t['et2_def']) ?></td>
<td><?= htmlspecialchars($t['et2_out']) ?></td>
</tr>
<tr>
<td><code>place</code></td>
<td><?= htmlspecialchars($t['et3_def']) ?></td>
<td><?= htmlspecialchars($t['et3_out']) ?></td>
</tr>
<tr>
<td><code>date</code></td>
<td><?= htmlspecialchars($t['et4_def']) ?></td>
<td><?= htmlspecialchars($t['et4_out']) ?></td>
</tr>
</tbody>
</table>
<h3 style="font-family:'Crimson Pro',serif; font-size:1.15rem; font-weight:700; margin:2rem 0 0.8rem; color:var(--dbn-blue);"><?= htmlspecialchars($t['class_h2']) ?></h3>
<table class="kdoc-table">
<thead>
<tr>
<th><?= htmlspecialchars($t['th_format']) ?></th>
<th><?= htmlspecialchars($t['th_person_ex']) ?></th>
<th><?= htmlspecialchars($t['th_org_ex']) ?></th>
</tr>
</thead>
<tbody>
<tr>
<td>Contextual &#9733;</td>
<td><code><?= htmlspecialchars($t['fmt1_person']) ?></code></td>
<td><code><?= htmlspecialchars($t['fmt1_org']) ?></code></td>
</tr>
<tr>
<td>Generic</td>
<td><code><?= htmlspecialchars($t['fmt2_person']) ?></code></td>
<td><code><?= htmlspecialchars($t['fmt2_org']) ?></code></td>
</tr>
<tr>
<td>Pseudonym</td>
<td><em><?= htmlspecialchars($t['fmt3_person']) ?></em></td>
<td><em><?= htmlspecialchars($t['fmt3_org']) ?></em></td>
</tr>
</tbody>
</table>
</div>
<!-- Multi-engine -->
<section class="kdoc-section--alt">
<div class="kdoc-section">
<p class="kdoc-section__eyebrow"><?= htmlspecialchars($t['eng_eyebrow']) ?></p>
<h2 class="kdoc-section__title"><?= htmlspecialchars($t['eng_title']) ?></h2>
<p class="kdoc-section__sub"><?= htmlspecialchars($t['eng_sub']) ?></p>
<table class="kdoc-table">
<thead>
<tr>
<th><?= htmlspecialchars($t['th_engine']) ?></th>
<th><?= htmlspecialchars($t['th_model']) ?></th>
<th><?= htmlspecialchars($t['th_latency']) ?></th>
<th><?= htmlspecialchars($t['th_best']) ?></th>
</tr>
</thead>
<tbody>
<tr>
<td>Azure gpt-4o-mini &#9733;</td>
<td><code>gpt-4o-mini</code> (Azure West Europe)</td>
<td>~15 s</td>
<td><?= htmlspecialchars($t['eng1_best']) ?></td>
</tr>
<tr>
<td>Azure gpt-4o</td>
<td><code>gpt-4o</code> (Azure West Europe)</td>
<td>~45 s</td>
<td><?= htmlspecialchars($t['eng2_best']) ?></td>
</tr>
</tbody>
</table>
</div>
</section>
<!-- Privacy by design -->
<div class="kdoc-section">
<p class="kdoc-section__eyebrow"><?= htmlspecialchars($t['priv_eyebrow']) ?></p>
<h2 class="kdoc-section__title"><?= htmlspecialchars($t['priv_title']) ?></h2>
<div class="kdoc-privacy">
<p class="kdoc-privacy__title"><?= htmlspecialchars($t['priv_badge']) ?></p>
<ul>
<li><?= $t['priv_1_html'] ?></li>
<li><?= htmlspecialchars($t['priv_2']) ?></li>
<li><?= $t['priv_3_html'] ?></li>
<li><?= $t['priv_4_html'] ?></li>
<li><?= $t['priv_5_html'] ?></li>
</ul>
</div>
</div>
<!-- CTA -->
<section class="kdoc-cta-strip">
<h2 class="kdoc-cta-strip__title"><?= htmlspecialchars($t['cta_title']) ?></h2>
<p class="kdoc-cta-strip__sub"><?= htmlspecialchars($t['cta_sub']) ?></p>
<div class="kdoc-hero__ctas">
<?php if ($isAuthed): ?>
<a href="/redact.php" class="kdoc-btn-primary"><?= htmlspecialchars($t['btn_open']) ?></a>
<?php else: ?>
<a href="<?= htmlspecialchars($toolsLogin) ?>" class="kdoc-btn-primary"><?= htmlspecialchars($t['btn_signin_cta']) ?></a>
<a href="<?= htmlspecialchars($registerUrl) ?>" class="kdoc-btn-secondary"><?= htmlspecialchars($t['btn_register']) ?></a>
<?php endif; ?>
<a href="/redact-guide.php" class="kdoc-btn-secondary"><?= htmlspecialchars($t['btn_guide']) ?></a>
</div>
</section>
<?php require_once __DIR__ . '/includes/footer.php'; ?>
<script src="assets/js/tools.js" defer></script>
</body>
</html>