Files
dobetternorge-tools/scripts/generate-page-translations.php
T
daveadmin 29579eae66 Add NO/UK/PL translations to all 6 doc pages
Per-page translation arrays in translations/*.php (EN/NO/UK/PL) for
korrespond-about, korrespond-guide, korrespond-tech, timeline-about,
timeline-guide, and timeline-tech. Generated via Azure gpt-4o-mini;
Norwegian legal/institution terms preserved as-is in all languages.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 16:58:15 +02:00

829 lines
66 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
/**
* generate-page-translations.php
* One-shot CLI script: php scripts/generate-page-translations.php
* Reads all English strings, calls Azure gpt-4o-mini for NO/UK/PL, writes translations/*.php
*/
declare(strict_types=1);
define('SCRIPT_MODE', true);
require_once dirname(__DIR__) . '/includes/bootstrap.php';
require_once dirname(__DIR__) . '/includes/AzureOpenAiGateway.php';
$outDir = dirname(__DIR__) . '/translations';
if (!is_dir($outDir)) {
mkdir($outDir, 0755, true);
}
// ─────────────────────────────────────────────────────────────────────────────
// English string definitions for all 6 pages
// Keys ending in _html contain trusted HTML markup — echoed raw in page PHP.
// All other keys are plain text — echoed via htmlspecialchars().
// ─────────────────────────────────────────────────────────────────────────────
$pages = [];
// ═══════════════════════════════════════════════════════════
// korrespond-about.php
// ═══════════════════════════════════════════════════════════
$pages['korrespond-about'] = [
// Nav
'nav_about' => 'About',
'nav_guide' => 'User guide',
'nav_howit' => 'How it works',
'nav_opentool' => '← Open the tool',
'nav_signin' => 'Sign in',
'nav_open' => 'Open Korrespond →',
// Hero
'hero_kicker' => 'AI Correspondence · Norwegian Authorities · Hard-RAG Grounded',
'hero_title' => 'Draft the letter that changes everything.',
'hero_sub' => 'Korrespond turns your situation into a polished, statute-grounded letter, email, or court filing — always in Norwegian bokmål plus your working language, side by side. Every § citation is verified against 220,000+ indexed legal passages before it reaches you.',
'stat_legal' => 'legal passages indexed',
'stat_authority' => 'authority presets',
'stat_output' => 'output types',
'stat_pipeline' => 'pipeline passes',
'btn_try' => 'Try Korrespond free →',
'btn_guide' => 'User guide',
'btn_howit' => 'How it works',
// What you get
'what_eyebrow' => 'What you get',
'what_title' => 'Three things no other tool gives you at once.',
'f1_title' => 'Always Norwegian + your language',
'f1_body' => 'The canonical draft is always Norwegian bokmål — the legally operative form. Your working language (English, Polish, or Ukrainian) appears side-by-side as a reference column. Copy either version with one click.',
'f2_title' => 'Hard-RAG: no hallucinated statutes',
'f2_body' => 'Every § number in the draft is traced to a specific retrieved passage from the legal corpus before it reaches you. The self-check pass strips any citation that can\'t be verified. If no statute fits — the draft says so plainly, without fake references.',
'f3_title' => 'Covers the statutes that matter',
'f3_body_html' => 'forvaltningsloven (fvl §§ 17, 18, 24-25, 28, 32), barnevernsloven, NAV-loven, opplæringslova, barnehageloven, and EMK Art. 6 &amp; 8 — all reachable by selecting the right recipient body. Optional second pass pulls formal ECHR case citations (Strand Lobben, Johansen, K.O. and V.M.).',
// How it works
'how_eyebrow' => 'How it works',
'how_title' => 'Describe → retrieve → draft. In under a minute.',
'how_sub' => 'Three steps from blank form to ready-to-send letter. No legal training required.',
's1_title' => 'Describe your situation',
's1_body' => 'Choose the authority you\'re writing to, select your output type and tone, and describe what happened. Pick a goal chip (appeal, access to documents, request meeting…) or write your own. Upload the original letter if replying.',
's1_example' => 'E.g. "NAV denied my application. I want to appeal and demand a reasoned decision."',
's2_title' => 'AI retrieves applicable law & drafts',
's2_body' => 'The pipeline classifies your situation, identifies the relevant statute set for your chosen authority, retrieves the top matching passages from 220,000+ indexed sources, and drafts a formal letter — citing only what it actually found.',
's2_example' => 'Produces two-column output: Norsk (bokmål) canonical + your working language reference.',
's3_title' => 'Refine with formal citations',
's3_body_html' => 'Optional second pass: choose Norwegian law, ECHR, or both. The tool rewrites the draft with court-ready citation style — <em>jf. forvaltningsloven § 17</em>, <em>Strand Lobben m.fl. mot Norge, EMD-37283/13, §§ 207214</em> — and appends a Rettskilder (legal sources) block.',
's3_example' => 'Costs one extra credit. Takes ~30 seconds.',
// Screenshots
'ss_eyebrow' => 'Screenshots',
'ss_title' => 'See it in action.',
'g1_caption' => 'The intake form: choose recipient body, output type, and tone in seconds.',
'g2_caption' => 'Two-column output: Norwegian bokmål canonical on the left, English reference on the right.',
'g3_caption' => 'The optional Refine pass — choose Norwegian law, ECHR, or both.',
'g4_caption_html' => 'After refinement: formal citations like <em>jf. opplæringslova § 9 A-4</em> and <em>EMK artikkel 8</em>.',
// Under the hood
'hood_eyebrow' => 'Under the hood',
'hood_title' => 'Powered by a purpose-built legal LLM.',
'llm_badge' => 'Fine-tuned model',
'llm_body_html' => 'A QLoRA fine-tune trained on Norwegian child-welfare and administrative law text. Unlike a general-purpose LLM, dbn-legal-agent understands the procedural vocabulary of forvaltningsloven — what a <em>klage</em> requires, what triggers fvl § 17 consultation rights, how Barnevernet decisions must be reasoned under § 6-3. It runs alongside Azure gpt-4o to shape the structure and register of every draft.',
'llm_s1' => 'fine-tune method',
'llm_s2' => 'procedural vocabulary',
'llm_s3' => 'drafting backbone',
'llm_s4' => 'pipeline architecture',
// Authority coverage
'auth_eyebrow' => 'Authority coverage',
'auth_title' => '11 presets. Each loads the right statutes automatically.',
'auth_sub' => 'Selecting a recipient body pre-loads the relevant statute set into the Hard-RAG retrieval. No need to know which laws apply — the tool does that for you.',
// CTA strip
'cta_title' => 'Ready to draft your letter?',
'cta_sub' => 'Free for Do Better Norge members. No credit card required.',
'btn_open' => 'Open Korrespond →',
'btn_signin_cta' => 'Sign in to use Korrespond →',
'btn_register' => 'Register free',
];
// ═══════════════════════════════════════════════════════════
// timeline-about.php
// ═══════════════════════════════════════════════════════════
$pages['timeline-about'] = [
'nav_about' => 'About',
'nav_guide' => 'User guide',
'nav_howit' => 'How it works',
'nav_opentool' => '← Open the tool',
'nav_signin' => 'Sign in',
'nav_open' => 'Open Timeline →',
// Hero
'hero_kicker' => 'Events · Deadlines · Milestones',
'hero_title' => 'Every date in your case, instantly mapped.',
'hero_sub' => 'Timeline reads Norwegian case notes, court decisions, and correspondence — then extracts every temporal reference into a sortable, confidence-scored chronology. Know what happened, when, and what comes next.',
'stat_formats' => 'date formats recognised',
'stat_types' => 'event types classified',
'stat_filters' => 'focus filter modes',
'stat_levels' => 'confidence levels',
'btn_try' => 'Try Timeline free →',
'btn_guide' => 'User guide',
'btn_howit' => 'How it works',
// What you get
'what_eyebrow' => 'What you get',
'what_title' => 'Three things that change how you read a case.',
'f1_title' => 'AI date extraction',
'f1_body_html' => 'Timeline recognises 12+ Norwegian date formats — from <code>30.07.2015</code> to <em>tre uker etter vedtaket</em> to <em>hver mandag</em>. Every temporal reference in your document is surfaced, not just the obvious calendar dates.',
'f2_title' => 'Confidence scoring per event',
'f2_body' => 'Every extracted event is tagged HIGH, MEDIUM, or LOW confidence based on how explicitly the date appears in the source. Use the confidence filter to hide uncertain events when you need a clean, defensible timeline.',
'f3_title' => 'Evidence trail & next step',
'f3_body' => 'Below every timeline: a full Evidence Trail listing every source document, a What Remains Uncertain list for dates the AI couldn\'t resolve, and a single Next Practical Step recommendation for your case.',
// How it works
'how_eyebrow' => 'How it works',
'how_title' => 'Upload → extract → review. In under a minute.',
'how_sub' => 'Three steps from raw case documents to a complete, sortable timeline.',
's1_title' => 'Upload documents or paste text',
's1_body' => 'Upload up to 5 files (PDF, DOCX, TXT) or paste up to 128,000 characters of case text. Add optional context notes to help the AI interpret abbreviations, actor names, or document-specific date conventions.',
's1_example' => 'E.g. "D refers to the defendant throughout. All dates are in 2024 unless stated otherwise."',
's2_title' => 'AI extracts & classifies',
's2_body' => 'The engine reads every sentence, recognising Norwegian date formats and classifying each event by type (absolute, relative, recurring, conditional, period) and assigning a confidence score. Focus filters scope the extraction to what matters most for your case.',
's2_example' => 'Returns structured output: date · type · confidence · actor · description · source excerpt',
's3_title' => 'Review the sortable timeline',
's3_body' => 'Events appear in document order or sorted chronologically. Search by keyword, filter by confidence, and review each event\'s source excerpt. The evidence trail and next step recommendation appear below the timeline.',
's3_example' => 'Sort: document order ↔ chronological · Search any keyword · Filter by confidence',
// Screenshots
'ss_eyebrow' => 'Screenshots',
'ss_title' => 'See it in action.',
'g1_caption' => 'The form: choose engine, focus filter, confidence, background events, and date type settings.',
'g2_caption' => 'Timeline output: each event shows date, type badge, confidence badge, actor, description, and source excerpt.',
'g3_caption' => 'Detailed events from a Barnevernet case, with named actors and verified source excerpts.',
'g4_caption' => 'Evidence Trail, What Remains Uncertain, and Next Practical Step appear below every timeline.',
// Under the hood
'hood_eyebrow' => 'Under the hood',
'hood_title' => 'Powered by a purpose-built legal LLM.',
'llm_badge' => 'Fine-tuned model',
'llm_body_html' => 'A QLoRA fine-tune trained on Norwegian child-welfare and administrative law text — case notes, court decisions, and Barnevernet correspondence. Unlike a general-purpose LLM, dbn-legal-agent understands the temporal patterns of Norwegian legal proceedings: when an appeal window closes, what <em>akutt</em> means procedurally, how Fylkesnemnda milestones are sequenced. It runs on the GPU/cuttlefish engine as the local extraction backbone.',
'llm_s1' => 'fine-tune method',
'llm_s2' => 'date formats',
'llm_s3' => 'event types',
'llm_s4' => 'local engine',
// Focus filters
'focus_eyebrow' => 'Focus filters',
'focus_title' => 'Extract only what your case needs.',
'focus_sub' => 'Four modes scope the extraction pipeline to the events that matter most for your situation.',
'ff1_title' => 'All events ★',
'ff1_body' => 'Default. Extracts every temporal reference in the document — background dates, operational events, deadlines, and narrative context. Best for building a complete picture from scratch.',
'ff2_title' => 'Legal deadlines',
'ff2_body' => 'Scopes extraction to filing deadlines, appeal windows, and statutory time limits. Filters out narrative and background dates. Use when you need to know exactly what dates you must act on.',
'ff3_title' => 'Court hearings',
'ff3_body' => 'Focuses on tribunal sessions, mediation dates, and court appearances. Ideal for preparing for a hearing or reconstructing the full history of proceedings.',
'ff4_title' => 'CPS milestones',
'ff4_body' => 'Targets Barnevernet interventions, akuttplassering dates, tiltaksplan milestones, and Fylkesnemnda proceedings. The most targeted mode for child welfare cases.',
// CTA strip
'cta_title' => 'Ready to map your case?',
'cta_sub' => 'Free for Do Better Norge members. No credit card required.',
'btn_open' => 'Open Timeline →',
'btn_signin_cta' => 'Sign in to use Timeline →',
'btn_register' => 'Register free',
];
// ═══════════════════════════════════════════════════════════
// korrespond-guide.php
// ═══════════════════════════════════════════════════════════
$pages['korrespond-guide'] = [
'nav_about' => 'About',
'nav_guide' => 'User guide',
'nav_howit' => 'How it works',
'nav_opentool' => '← Open the tool',
'nav_signin' => 'Sign in',
'nav_open' => 'Open Korrespond →',
// Hero
'hero_kicker' => 'User Guide · Korrespond',
'hero_title' => 'How to use Korrespond.',
'hero_sub' => 'A step-by-step walkthrough of every control — from choosing your mode to understanding the output and using the formal citation refine pass.',
// TOC
'toc_title' => 'In this guide',
'toc_1' => 'Choose a mode: Reply or Initiate',
'toc_2' => 'Choose the recipient body',
'toc_3' => 'Output type',
'toc_4' => 'Tone',
'toc_5' => 'Context fields',
'toc_6' => 'Describe the situation & goal chips',
'toc_7' => 'Upload files (Reply mode)',
'toc_8' => 'The clarify gate',
'toc_9' => 'Understanding the output',
'toc_10' => 'Using the Refine pass',
'toc_11' => 'Tips & gotchas',
// Step 1
'step1_title' => 'Choose a mode: Reply or Initiate',
'step1_p1_html' => '<strong>Reply mode</strong> is for when you have received a letter, decision, or notice and need to respond to it. Upload the document (PDF, DOCX, or TXT) and the tool will read it as the basis for your reply. You can still add narrative context.',
'step1_p2_html' => '<strong>Initiate mode</strong> is for when you want to start a new correspondence from scratch — no incoming document. You\'ll describe the situation in the "What happened" field. This mode is required for the narrative field.',
// Step 2
'step2_title' => 'Choose the recipient body',
'step2_intro' => 'The recipient body dropdown pre-loads the relevant statute set into the Hard-RAG retrieval pipeline. Choosing correctly means the tool searches the right laws — you don\'t need to know which statutes apply yourself.',
'th_recipient' => 'Recipient',
'th_statutes' => 'Statutes loaded',
'th_typical' => 'Typical use',
'r1_use' => 'Enrolment disputes, special needs provisions',
'r2_use' => 'Access to education, psycho-social environment',
'r3_use' => 'After-school care disputes',
'r4_use' => 'Benefit denials, appeal of decisions',
'r5_use' => 'Adoption, surrogacy, family reunification',
'r6_use' => 'Care orders, emergency placements, tiltaksplan',
'r7_use' => 'Complaints about municipality / Barnevernet',
'r8_use' => 'Social security tribunal appeals',
'r9_use' => 'Court filings, procedural motions',
'r10_use' => 'Any other municipal body',
'r11_use' => 'Authorities not in the list above',
// Step 3
'step3_title' => 'Output type',
'step3_intro' => 'Choose the format that fits what you need to send.',
'th_type' => 'Type',
'th_when' => 'When to use it',
'th_structure' => 'Structure',
'ot1_when' => 'Day-to-day correspondence, quick inquiries, follow-ups',
'ot1_struct' => 'Short subject + body with signature',
'ot2_when' => 'Official complaints, appeals, access-to-documents requests',
'ot2_struct' => 'Sender/receiver block, date, reference, body, signature',
'ot3_when' => 'Submissions to Tingretten or Trygderetten',
'ot3_struct' => 'Numbered sections, legal argument structure, prayer for relief',
'ot4_when' => 'Before calling a caseworker or authority',
'ot4_struct' => 'Opening line · key facts · statutes to cite if pressed · questions to ask · escalation path',
// Step 4
'step4_title' => 'Tone',
'step4_intro' => 'Tone affects the register and directness of the draft — not the legal accuracy. The AI will maintain correct Norwegian procedural formality regardless of tone.',
'th_tone' => 'Tone',
'tone1_when' => 'First contact, relationship still intact, no conflict yet',
'tone2_when' => 'Default. Works for most situations — factual, polite, direct',
'tone3_when' => 'Deadline has passed, previous requests ignored, clear legal obligation exists',
'tone4_when' => 'Formal complaints, escalations, when cooperation has broken down completely. Use intentionally — sets a confrontational tone that can close doors.',
'tone5_when' => 'De-escalation, apology situations, requesting a second chance or meeting',
// Step 5
'step5_title' => 'Context fields',
'step5_p1_html' => '<strong>Case reference (saksnummer):</strong> The reference number on any letter you\'ve received. Providing this helps the AI draft precise references in the header. If you don\'t have one yet, leave it blank.',
'step5_p2_html' => '<strong>Where (kommune / fylke):</strong> The geographical location of the authority. This helps the AI address the letter correctly and can affect which specific regulations apply (e.g. local school rules).',
'step5_p3_html' => '<strong>Next deadline:</strong> If there\'s a deadline for your response or action, enter it here. The AI will include an explicit deadline reference in the letter where appropriate. Accepts YYYY-MM-DD or plain text like "3 weeks from today".',
'step5_p4_html' => '<strong>Who is involved:</strong> Names and roles of the key parties — you, any caseworker, the child if relevant, a lawyer, etc. Keep it brief (e.g. "Me (parent), caseworker Anna Hansen, son Ola (age 8)"). <em>Tip: use the Redact tool first if you\'ll share this externally.</em>',
// Step 6
'step6_title' => 'Describe the situation & choose a goal',
'step6_p1_html' => '<strong>"What happened / context"</strong> is the most important field. Write what happened, when, who decided what, and what outcome you want. The more specific you are, the better the draft. 8,000 characters maximum.',
'step6_p2_html' => '<strong>Goal chips</strong> let you quickly state your legal goal. Click one to auto-fill the Goal field — you can then edit it. Each chip maps to a specific procedural right:',
'goal1_html' => '<strong>Access to docs (fvl §18)</strong> — Request access to all documents in your case',
'goal2_html' => '<strong>Appeal (fvl §28)</strong> — Formally appeal a decision to the klageinstans',
'goal3_html' => '<strong>Request meeting</strong> — Request a face-to-face meeting with a caseworker',
'goal4_html' => '<strong>Reasoned decision (fvl §24-25)</strong> — Demand a written, reasoned decision',
'goal5_html' => '<strong>Right to be heard (fvl §17)</strong> — Invoke your procedural right to be heard before a decision',
'goal6_html' => '<strong>Complaint</strong> — File a complaint about caseworker conduct',
'goal7_html' => '<strong>Clarify timeline</strong> — Ask for a status update and expected timeline',
'goal_note' => 'You can type your own goal in the text field instead of — or in addition to — using a chip.',
// Step 7
'step7_title' => 'Upload files (Reply mode)',
'step7_p1_html' => 'In <strong>Reply mode</strong>, upload the letter or decision you received. The AI will read and summarise it as the basis for your reply. Supported formats: <strong>PDF</strong>, <strong>DOCX</strong>, <strong>TXT</strong>. Up to 4 files, max 8 MB each.',
'step7_p2_html' => '<strong>Convention:</strong> the first file uploaded is treated as the primary received letter. Additional files are treated as supporting attachments (e.g. previous correspondence, evidence).',
'step7_p3_html' => 'In <strong>Initiate mode</strong>, uploads are optional — use them to provide supporting context (previous letters, medical reports, etc.).',
'step7_p4' => 'All files are processed in memory and immediately discarded when the session ends. Nothing is written to disk or retained.',
// Step 8
'step8_title' => 'The clarify gate',
'step8_p1_html' => 'Before drafting, the AI runs a quick classification pass (Pass 1) to understand your situation. If it finds gaps that would significantly affect the quality of the draft — a missing decision date, unclear which child is involved, unknown authority — it pauses and shows a <strong>"Before we draft, clarify:"</strong> panel with specific questions.',
'step8_p2_html' => '<strong>Answer what you can</strong>, then click <em>Continue draft</em>. Your answers are merged into the context before Pass 2 runs. This extra step costs no additional credit.',
'step8_p3_html' => 'If you\'re in a hurry or simply don\'t know the answers, click <em>Draft anyway</em>. The tool will proceed with what it has and flag uncertainties in the output.',
'step8_p4_html' => '<strong>Note:</strong> the clarify pass is free. Credits are only deducted when the actual drafting (Pass 2) begins.',
// Step 9
'step9_title' => 'Understanding the output',
'step9_intro' => 'The output has two columns:',
'step9_col1_html' => '<strong>Norsk (bokmål) — canonical:</strong> The legally operative draft. This is what you send. Use the Copy or Download .txt button to get the text.',
'step9_col2_html' => '<strong>Working language — reference:</strong> A translation into your working language (EN/PL/UK). Use this to understand what you\'re sending — do not send this version to the authority.',
'step9_note_html' => '<strong>Cited law note:</strong> at the bottom of the output, a note shows how many law sources were retrieved and cited. If it says "No cited law sources — draft is plain-language", it means no statute matched your situation closely enough to cite — the draft will still be useful but won\'t include § references. This is the honest behaviour: no fake citations.',
// Step 10
'step10_title' => 'Using the Refine pass',
'step10_p1_html' => 'The <strong>Refine with formal citations</strong> panel appears after the initial draft. This optional second pass (+1 credit) rewrites the draft with court-ready citation style and appends a Rettskilder (legal sources) block at the end.',
'step10_scope' => 'Choose your jurisdiction scope:',
'step10_opt1_html' => '<strong>Norwegian law only:</strong> Rewrites to use <em>jf. forvaltningsloven § 17</em>, <em>jf. opplæringslova § 9 A-4</em> style. Best for most domestic correspondence.',
'step10_opt2_html' => '<strong>ECHR (EMK + HUDOC):</strong> Adds European Court of Human Rights citations with full case name, application number, date, and paragraph — e.g. <em>Strand Lobben m.fl. mot Norge, EMD-37283/13 (17.09.2019), § 207</em>. Use when arguing family life rights (Art. 8) or fair trial (Art. 6).',
'step10_opt3_html' => '<strong>Both:</strong> Combines Norwegian statute citations with ECHR case law. Strongest for Barnevernet, Bufdir, or court filings where both domestic and ECHR grounds apply.',
// Step 11
'step11_title' => 'Tips & gotchas',
'tip1_html' => '<strong>Use the Redact tool first.</strong> If your narrative includes full names, fødselsnumre, or addresses, run it through Redact before pasting into Korrespond — especially if you plan to share the output.',
'tip2_html' => '<strong>Saksnummer helps a lot.</strong> Even a partial case reference from a letter header helps the AI address the reply correctly and cite the right case context.',
'tip3_html' => '<strong>Adversarial tone is powerful — use it intentionally.</strong> It signals formal conflict escalation. Once sent, it can close cooperative doors. Use Firm first unless you\'ve genuinely exhausted other options.',
'tip4_html' => '<strong>Goal chips stack with the narrative.</strong> If you pick "Appeal (fvl §28)" but your narrative also mentions access to documents, the AI will address both — you don\'t need to pick just one chip.',
'tip5_html' => '<strong>Deadline field affects the draft directly.</strong> The AI explicitly states the deadline in the letter and frames the request with urgency where appropriate. Always fill it in if one exists.',
'tip6_html' => '<strong>For ECHR citations, choose Barnevernet or Bufdir as body.</strong> Those presets load the ECHR + family law corpus slices. The Refine pass will then find the strongest relevant case law (Strand Lobben, Johansen, K.O. and V.M.).',
// CTA
'cta_title' => 'Ready to try it?',
'cta_sub' => 'Free for Do Better Norge members.',
'btn_open' => 'Open Korrespond →',
'btn_signin_cta' => 'Sign in to use Korrespond →',
'btn_register' => 'Register free',
'btn_techlink' => 'How it works under the hood',
];
// ═══════════════════════════════════════════════════════════
// timeline-guide.php
// ═══════════════════════════════════════════════════════════
$pages['timeline-guide'] = [
'nav_about' => 'About',
'nav_guide' => 'User guide',
'nav_howit' => 'How it works',
'nav_opentool' => '← Open the tool',
'nav_signin' => 'Sign in',
'nav_open' => 'Open Timeline →',
// Hero
'hero_kicker' => 'User Guide · Timeline',
'hero_title' => 'How to use Timeline.',
'hero_sub' => 'A step-by-step walkthrough of every control — from choosing your engine and focus filter to reading the output and understanding the evidence trail.',
// TOC
'toc_title' => 'In this guide',
'toc_1' => 'Choose your engine',
'toc_2' => 'Focus filter',
'toc_3' => 'Confidence filter',
'toc_4' => 'Background events',
'toc_5' => 'Date types',
'toc_6' => 'Upload files',
'toc_7' => 'Pasted text & context notes',
'toc_8' => 'Reading the timeline',
'toc_9' => 'Evidence trail, uncertainty & next step',
'toc_10' => 'Tips & gotchas',
// Step 1 — Engine
'step1_title' => 'Choose your engine',
'step1_intro' => 'The engine controls the AI model used for extraction. Azure engines use your BNL Azure credits. The GPU engine runs the LiteLLM proxy on the local cuttlefish server with the dbn-legal-agent fine-tuned model.',
'th_engine' => 'Engine',
'th_speed' => 'Speed',
'th_best' => 'Best for',
'eng1_speed' => 'Fast (~15 s)',
'eng1_best' => 'Default. Everyday extractions, long documents, first pass.',
'eng2_speed' => 'Best quality (~45 s)',
'eng2_best' => 'Complex documents, overlapping events, ambiguous or poorly formatted source text.',
'eng3_speed' => 'Local (~25 s)',
'eng3_best' => 'Maximum privacy. Runs entirely on local GPU with dbn-legal-agent fine-tuned model. Requires cuttlefish server online.',
// Step 2 — Focus filter
'step2_title' => 'Focus filter',
'step2_intro' => 'The focus filter scopes what kinds of events the AI looks for. Found in the Advanced settings panel (click to expand).',
'th_mode' => 'Mode',
'th_extracts' => 'What it extracts',
'th_when' => 'When to use',
'focus1_extracts' => 'Every temporal reference in the document — background, operational, deadlines, narrative',
'focus1_when' => 'Building a complete chronology from scratch',
'focus2_extracts' => 'Filing deadlines, appeal windows, statutory time limits',
'focus2_when' => 'Checking whether you\'ve missed a deadline or when you must act next',
'focus3_extracts' => 'Tribunal sessions, mediation dates, court appearances',
'focus3_when' => 'Preparing for a hearing or reconstructing hearing history',
'focus4_extracts' => 'Barnevernet interventions, akuttplassering, tiltaksplan milestones, Fylkesnemnda proceedings',
'focus4_when' => 'Child welfare cases needing a CPS-specific chronology',
// Step 3 — Confidence filter
'step3_title' => 'Confidence filter',
'step3_intro' => 'Controls whether uncertain events appear in the output. Also in the Advanced settings panel.',
'th_setting' => 'Setting',
'th_does' => 'What it does',
'conf1_does' => 'Returns all extracted events including LOW-confidence ones (shown in grey). Use for a complete picture — decide yourself what to trust.',
'conf2_does' => 'Suppresses any event the model isn\'t reasonably certain about. Use when you need a clean, defensible timeline for court or legal filing.',
'conf_note' => 'LOW-confidence events are typically those where the date is implied rather than stated, relative to an unclear reference point, or extracted from a degraded or ambiguous section of the source.',
// Step 4 — Background events
'step4_title' => 'Background events',
'step4_p1_html' => '<strong>Default: ON (checked).</strong> When checked, historical context dates are included — dates like <em>"born 30.07.2015"</em>, <em>"met around 2011/2012"</em>, <em>"married in 2009"</em>. These establish the narrative and biographical context of a case.',
'step4_p2_html' => '<strong>When to uncheck:</strong> if you only want operational events and decisions — not biographical background. Unchecking substantially reduces event count in case notes that mix history with current proceedings. Useful when you\'re building a deadline tracker or action list rather than a full chronology.',
// Step 5 — Date types
'step5_title' => 'Date types',
'step5_p1_html' => '<strong>Default: ON (checked).</strong> When checked, relative references (<em>"tre uker etter vedtaket"</em>), recurring patterns (<em>"each Monday"</em>, <em>"every 6 months"</em>), and conditional dates (<em>"if no response within 14 days"</em>) are included alongside absolute dates.',
'step5_p2_html' => '<strong>When to uncheck:</strong> if you need only exact calendar dates — for example when exporting to a calendar app, a deadline tracker, or a court submission that requires hard dates only. Unchecking removes all events without a resolvable absolute date.',
// Step 6 — Upload files
'step6_title' => 'Upload files',
'step6_p1_html' => 'Drag files onto the upload zone or click <strong>browse</strong>. A file list appears below with a Clear button to remove files.',
'step6_p2_html' => '<strong>Supported formats:</strong> <strong>PDF</strong>, <strong>DOCX</strong>, <strong>TXT</strong>. Up to 5 files per run.',
'step6_p3' => 'Files are extracted to text in memory. Nothing is written to disk. Nothing is retained after the session ends.',
'step6_p4' => 'Upload and pasted text can be combined — the AI reads all sources together as a single input. If a case spans multiple documents, upload them all and let the tool assemble one unified timeline.',
// Step 7 — Pasted text & context notes
'step7_title' => 'Pasted text & context notes',
'step7_p1_html' => 'Paste up to <strong>128,000 characters</strong> of text into the main text area — approximately 90100 pages of A4. This can include case notes, decision letters, correspondence, or any document containing dates.',
'step7_p2_html' => '<strong>Context notes</strong> (optional, up to 2,000 characters): use this field to guide the AI on ambiguities in your document. These notes are passed directly into the extraction prompt and are not stored. Good examples:',
'ctx1' => '"All dates are in 2024 unless stated otherwise."',
'ctx2' => '"D refers to the defendant, M is the mother, BV is Barnevernet."',
'ctx3' => '"The document is a summary of events, not in chronological order."',
'ctx4' => '"\'Vedtaket\' always refers to the omsorgsovertakelse decision of 12.03.2024."',
'ctx5' => '"Focus on the father\'s actions. Ignore events relating to the sister."',
// Step 8 — Reading the timeline
'step8_title' => 'Reading the timeline',
'step8_intro' => 'Each event card in the timeline contains:',
'card1_html' => '<strong>Date:</strong> the resolved date. Absolute events show a formatted date (e.g. <em>2025-04-09</em>). Relative events show the expressed reference (e.g. <em>"three weeks after the decision"</em>). Periods show startend.',
'card2_html' => '<strong>Date type badge:</strong> one of <code>ABSOLUTE</code> / <code>RELATIVE</code> / <code>RECURRING</code> / <code>CONDITIONAL</code> / <code>PERIOD</code>. Tells you how the date was expressed in the source.',
'card3_html' => '<strong>Confidence badge:</strong> <code>HIGH</code> (green — clearly stated), <code>MEDIUM</code> (amber — inferred), <code>LOW</code> (grey — ambiguous). LOW events appear greyed out when "Show all" is selected.',
'card4_html' => '<strong>Actor:</strong> the person, institution, or body the event is attributed to (named or by role).',
'card5_html' => '<strong>Event description:</strong> a concise one-sentence summary of what happened on that date.',
'card6_html' => '<strong>Source excerpt:</strong> the exact text from your document that produced this event.',
'sort_note_html' => '<strong>Sorting:</strong> use the sort toggle above the timeline to switch between <strong>Document order</strong> (the order events appear in your source) and <strong>Chronological</strong> (oldest to newest).',
'search_note_html' => '<strong>Search:</strong> type any keyword to filter events. The search matches against date, actor, description, and source excerpt simultaneously.',
// Step 9 — Evidence trail
'step9_title' => 'Evidence trail, uncertainty & next step',
'step9_intro' => 'Three sub-sections appear below every timeline:',
'ev1_html' => '<strong>Evidence Trail:</strong> lists every source document (or paste) that contributed events, with a title and excerpt identifying the source. Use this to verify that all your documents were processed and to trace any event back to its origin.',
'ev2_html' => '<strong>What Remains Uncertain:</strong> a bulleted list of dates the AI could not fully resolve — undated events, events with ambiguous years, relative dates with no resolvable anchor. This is what you still need to verify or obtain manually.',
'ev3_html' => '<strong>Next Practical Step:</strong> a single AI-generated recommended action based on the overall timeline — for example: <em>"Verify whether the 6-week appeal window from the 12.03.2024 omsorgsovertakelse has elapsed and document any missed deadline formally."</em>',
'ev_disclaimer' => 'The disclaimer at the bottom confirms that Timeline provides preparation support, not legal advice.',
// Step 10 — Tips
'step10_title' => 'Tips & gotchas',
'tip1_html' => '<strong>Use context notes for abbreviations.</strong> If your document uses internal abbreviations (D, M, BV, BH, the father, the case worker) consistently, naming them in the context notes dramatically improves actor attribution across all events.',
'tip2_html' => '<strong>Combine upload and paste for best coverage.</strong> Upload the main decision letter and paste related case notes into the text area. The AI reads all sources together and produces one unified timeline.',
'tip3_html' => '<strong>Run All events first, then re-run with focus.</strong> Start with the default "All events" mode to see the full picture. Then re-run with a specific focus mode (e.g. Legal deadlines) if you need a filtered view for a submission.',
'tip4_html' => '<strong>Date types off for clean deadline lists.</strong> If you\'re exporting to a calendar or deadline tracker, uncheck "Date types" to return only resolved calendar dates — no relative or recurring entries to sort through.',
'tip5_html' => '<strong>GPU engine for maximum privacy.</strong> The cuttlefish engine processes entirely locally using dbn-legal-agent — nothing leaves your network. Use it for the most sensitive case material.',
'tip6_html' => '<strong>LOW confidence isn\'t wrong — it\'s uncertain.</strong> A LOW confidence event may still be correct and important. Read the source excerpt for each LOW event before dismissing it — the AI may have found a real date that was just expressed ambiguously.',
// CTA
'cta_title' => 'Ready to try it?',
'cta_sub' => 'Free for Do Better Norge members.',
'btn_open' => 'Open Timeline →',
'btn_signin_cta' => 'Sign in to use Timeline →',
'btn_register' => 'Register free',
'btn_techlink' => 'How it works under the hood',
];
// ═══════════════════════════════════════════════════════════
// korrespond-tech.php
// ═══════════════════════════════════════════════════════════
$pages['korrespond-tech'] = [
'nav_about' => 'About',
'nav_guide' => 'User guide',
'nav_howit' => 'How it works',
'nav_opentool' => '← Open the tool',
'nav_signin' => 'Sign in',
'nav_open' => 'Open Korrespond →',
// Hero
'hero_kicker' => 'Technical Showcase · Hard-RAG · Fine-tuned LLM · 3-pass pipeline',
'hero_title' => 'How Korrespond knows what to write.',
'hero_sub' => 'A full walkthrough of the retrieval-augmented generation pipeline, citation verification system, fine-tuned legal model, and the formal citation refine pass that produces court-ready references.',
'stat1' => 'passages indexed',
'stat2' => 'corpus slices',
'stat3' => 'pipeline passes',
'stat4' => 'drafting model',
// Architecture
'arch_eyebrow' => 'Architecture',
'arch_title' => 'Three passes. Each with a distinct job.',
'arch_sub' => 'The pipeline is intentionally sequential — Pass 1 is cheap and fast (gpt-4o-mini); Pass 2 is expensive and only runs if the situation is clear enough; Pass 3 is optional and user-triggered.',
'pass1_title' => 'Classify & gap-check',
'pass1_p1_html' => 'Parses the intake and returns a structured JSON classification:',
'pass1_p2_html' => 'If <code>missing_facts</code> is non-empty → emits <strong>clarify gate</strong>. No credit deducted until Pass 2 starts.',
'pass2_title' => 'Retrieve → draft → check → translate',
'pass2_p1' => 'Four sub-steps, each verified before proceeding:',
'pass2_r1_html' => '<strong>Retrieve:</strong> hybrid dense + BM25 search across the preset corpus slices; top 8 passages returned with source IDs',
'pass2_r2_html' => '<strong>Draft:</strong> gpt-4o generates the letter using <code>[CITE:N]</code> tokens referencing only retrieved source IDs',
'pass2_r3_html' => '<strong>Self-check:</strong> strips any <code>[CITE:N]</code> token whose source ID isn\'t in the retrieved pool; flags deadline/goal/tone compliance',
'pass2_r4_html' => '<strong>Translate:</strong> Norwegian draft → working language (single call)',
'pass3_title' => 'Formal citation refine',
'pass3_p1_html' => 'User-triggered (+1 credit). Jurisdiction-scoped retrieval, then rewrites inline citations to formal style and appends Rettskilder block:',
'pass3_n1_html' => '<strong>Norwegian:</strong> <em>jf. forvaltningsloven § 17</em>',
'pass3_n2_html' => '<strong>ECHR:</strong> full case name, application number, date, paragraph',
'pass3_n3_html' => '<strong>Both:</strong> combined domestic + ECHR grounds',
// Hard-RAG
'rag_eyebrow' => 'Hard-RAG',
'rag_title' => 'Every § citation is verified before it reaches you.',
'rag_sub' => 'Hard-RAG means the model is constrained to only cite what it retrieved. No § number can appear in the final draft unless a corresponding source passage was actually found and fetched.',
'rag_box1' => 'User intake + body preset',
'rag_box2' => 'Corpus slice selection',
'rag_box3' => 'Hybrid search (dense vector + BM25)',
'rag_box4' => 'Top 8 passages with source IDs',
'rag_box5' => 'Passages injected into gpt-4o prompt',
'rag_box6' => 'Draft with [CITE:N] tokens only',
'rag_box7' => 'Self-check: verify each [CITE:N] resolves',
'rag_box8' => 'Strip unverified citations',
'rag_p1_html' => 'The self-check pass parses every <code>[CITE:N]</code> token in the draft and looks up the source ID <code>N</code> in the retrieved pool. If it doesn\'t match — the citation is removed and the paragraph is rewritten without it. The output also flags whether the deadline was addressed, whether the stated goal was achieved, and whether the tone matched the selected chip.',
'rag_h2' => 'What happens when no statute fits?',
'rag_p2_html' => 'If no corpus passage closely matches the situation, the draft is produced in plain language without § references. A note in the output says: <em>"No cited law sources — draft is plain-language (no § references available from corpus)."</em> This is the intentional, honest behaviour — a blank draft is better than one with fake citations.',
// Knowledge base
'kb_eyebrow' => 'Knowledge base',
'kb_title' => '220,000+ passages across 8 corpus slices.',
'kb_sub' => 'The legal corpus is split into named slices. Each recipient body preset maps to a set of slices, so retrieval is always scoped to the right area of law.',
'kb_s1' => 'total indexed passages',
'kb_s2' => 'corpus slices',
'kb_s3' => 'FNV tribunal decisions',
'kb_s4' => 'ECHR Norwegian-family cases',
'kb_s5' => 'AI Search (West Europe)',
'kb_s6' => 'dense vector + BM25',
'kb_h2' => 'Corpus slices',
'kb_h3' => 'Body preset → slice mapping (examples)',
'kb_th1' => 'Recipient body',
'kb_th2' => 'Corpus slices loaded',
// Fine-tuned model
'ft_eyebrow' => 'Fine-tuned model',
'ft_title' => 'dbn-legal-agent: trained on Norwegian legal text.',
'ft_badge' => 'QLoRA fine-tune',
'ft_p1' => 'A QLoRA (Quantized Low-Rank Adaptation) fine-tune trained on Norwegian child-welfare and administrative law text. Unlike a general-purpose LLM, dbn-legal-agent has internalized the procedural vocabulary and reasoning patterns of forvaltningsloven: what triggers a § 17 right to be heard, what a lawful § 24 reasoned decision must contain, how barnevernsloven § 6-3 frames the child\'s best interest standard.',
'ft_p2' => 'In the Korrespond pipeline, dbn-legal-agent runs as a domain adapter alongside Azure gpt-4o. The retrieval prompt is constructed using dbn-legal-agent\'s representation of the intake, while gpt-4o handles the final generation within the Hard-RAG constraint. This separation gives structural clarity (gpt-4o) and domain precision (dbn-legal-agent) in the same pipeline.',
'ft_th1' => 'Pass',
'ft_th2' => 'Model',
'ft_th3' => 'Role',
'ft_r1_role' => 'Fast structured classification + gap detection',
'ft_r2_role' => 'Domain-aware question generation',
'ft_r3_role' => 'Full letter generation within Hard-RAG constraints',
'ft_r4_role' => 'Citation verification + tone/goal/deadline audit',
'ft_r5_role' => 'Norwegian → working language translation',
'ft_r6_role' => 'Formal citation rewrite + Rettskilder block',
// Pass 3
'p3_eyebrow' => 'Pass 3 — Formal citation refine',
'p3_title' => 'Court-ready citations in two styles.',
'p3_sub' => 'The optional third pass does a jurisdiction-scoped retrieval run, then rewrites the draft with formal inline citations and a Rettskilder appendix. Two distinct citation formats are supported:',
'p3_f1_title' => 'Norwegian statute style',
'p3_f1_body_html' => 'Inline citations use <em>jf.</em> (with reference to) and the official statute name + section: <code>jf. forvaltningsloven § 17</code>, <code>jf. opplæringslova § 9 A-4</code>, <code>jf. barnevernsloven § 6-3</code>. Section numbers are verified against the corpus before inclusion.',
'p3_f2_title' => 'ECHR citation style',
'p3_f2_body_html' => 'Full European Court of Human Rights citation format: case name · application number · date · chamber/Grand Chamber · paragraph. Example: <code>Strand Lobben m.fl. mot Norge, EMD-37283/13 (Storkammer, 10.09.2019), § 207</code>. Sources pulled from the ECHR corpus slice and HUDOC.',
'p3_eg_title' => 'Example refined output',
'p3_eg_caption_html' => 'Refined draft (Norwegian + English) with <em>opplæringslova § 9 A-4</em> and <em>EMK artikkel 8</em> inline citations.',
'p3_anchor_title' => 'Anchor queries for ECHR mode',
'p3_anchor_intro' => 'For Barnevernet and Bufdir cases, the ECHR refine pass runs specific anchor queries targeting the most-cited Norwegian family cases in the HUDOC corpus:',
// Privacy
'priv_eyebrow' => 'Privacy & security',
'priv_title' => 'Your documents never leave your session.',
'priv_badge' => 'Privacy by design',
'priv_1_html' => 'All uploaded files are extracted to text <strong>in memory</strong> using PHP\'s in-process file handlers. The raw binary is never written to disk on the server.',
'priv_2' => 'Session context (your narrative, uploaded text, drafts) is scoped to your authenticated session and discarded when the session ends.',
'priv_3_html' => 'Azure OpenAI (gpt-4o, gpt-4o-mini) is configured on the <strong>West Europe</strong> region. Data processed via Azure OpenAI is not used for model training under the default enterprise agreement.',
'priv_4_html' => 'Azure AI Search (<code>bnl-legal-search</code>) stores <strong>only the public legal corpus</strong> — statutes, tribunal decisions, ECHR judgments. None of your case information is stored in the search index.',
'priv_5' => 'Qdrant vector database stores only the public corpus embeddings — no user data.',
'priv_6_html' => 'Telemetry logged: tool name, language, output type, pass count, latency, source count. <strong>No case text, no names, no case references are logged.</strong>',
// CTA
'cta_title' => 'See it work on your case.',
'cta_sub' => 'Free for Do Better Norge members. All 3 passes available to every member.',
'btn_open' => 'Open Korrespond →',
'btn_signin_cta' => 'Sign in to use Korrespond →',
'btn_register' => 'Register free',
'btn_guide' => 'User guide',
];
// ═══════════════════════════════════════════════════════════
// timeline-tech.php
// ═══════════════════════════════════════════════════════════
$pages['timeline-tech'] = [
'nav_about' => 'About',
'nav_guide' => 'User guide',
'nav_howit' => 'How it works',
'nav_opentool' => '← Open the tool',
'nav_signin' => 'Sign in',
'nav_open' => 'Open Timeline →',
// Hero
'hero_kicker' => 'Technical Showcase · How the AI reads time',
'hero_title' => 'How Timeline knows when things happened.',
'hero_sub' => 'A full walkthrough of the 3-pass extraction pipeline, Norwegian date format recognition, event classification schema, multi-engine architecture, and the fine-tuned dbn-legal-agent model.',
'stat1' => 'date formats',
'stat2' => 'event types',
'stat3' => 'pipeline passes',
'stat4' => 'engine options',
// Architecture
'arch_eyebrow' => 'Architecture',
'arch_title' => 'Three passes. Each with a distinct job.',
'arch_sub' => 'The pipeline is intentionally sequential — Pass 1 is rule-based and near-instant; Pass 2 is the LLM extraction; Pass 3 post-processes and scores the output.',
'pass1_title' => 'Detect & normalise known formats',
'pass1_p1_html' => 'A deterministic pattern-matching pass runs before any LLM call. It scans the full input for dates matching 12+ Norwegian formats and normalises them to ISO 8601:',
'pass1_li1_html' => '<code>dd.mm.yyyy</code> → <code>YYYY-MM-DD</code>',
'pass1_li2' => 'd. månedsnavn yyyy → resolved calendar date',
'pass1_li3' => 'Diary-format lines (starting with a date + colon) → auto-tagged as events',
'pass1_li4' => 'Two-digit years → always interpreted as 20YY',
'pass1_p2' => 'Normalised anchors are injected into the LLM prompt to reduce hallucinated or misread dates.',
'pass2_title' => 'Extract, classify & score',
'pass2_p1_html' => 'The LLM reads the full document alongside the pre-pass anchors. For every temporal reference it returns a structured JSON event object:',
'pass2_p2' => 'The prompt explicitly instructs the model not to invent dates or actors not present in the source. Temperature is set to 0.1 for deterministic output.',
'pass3_title' => 'Filter, sort & assemble',
'pass3_p1_html' => 'PHP applies all active filters before returning the result:',
'pass3_f1_html' => '<strong>Focus filter</strong> — strips events not matching the requested focus mode (deadlines / hearings / CPS)',
'pass3_f2_html' => '<strong>Confidence filter</strong> — removes LOW-confidence events if requested',
'pass3_f3_html' => '<strong>Background filter</strong> — strips background/narrative events if unchecked',
'pass3_f4_html' => '<strong>Date-type filter</strong> — strips relative/recurring events if unchecked',
'pass3_p2_html' => 'The post-processor then assembles the <code>what_remains_uncertain</code> list and the <code>next_practical_step</code> recommendation.',
// Date recognition
'date_eyebrow' => 'Date recognition',
'date_title' => '12+ Norwegian date formats, all recognised.',
'date_sub' => 'Norwegian legal documents use a wide variety of date notations. The Pass 1 pre-pass recognises all of these deterministically; the LLM handles the rest in Pass 2.',
'th_format' => 'Format',
'th_example' => 'Example',
'th_notes' => 'Notes',
'dn1' => 'Standard Norwegian numeric',
'dn2' => 'Two-digit year → always 20YY',
'dn3' => 'Written month in bokmål/nynorsk',
'dn4' => 'Year inferred by proximity scanning',
'dn5' => 'ISO 8601',
'dn6' => 'Month + year only',
'dn7' => 'Year-only reference',
'dn8' => 'Seasonal reference → Q3/Q4',
'dn9' => 'Date + colon → auto-tagged as event',
'dn10' => 'Anchored to nearest resolved event',
'dn11' => 'Classified as recurring',
'dn12' => 'Yields start_date + end_date',
// Classification schema
'class_eyebrow' => 'Classification schema',
'class_title' => 'Five event types. Three confidence levels.',
'class_h1' => 'date_type values',
'th_datetype' => 'date_type',
'th_definition' => 'Definition',
'dt1_def' => 'A specific, resolvable calendar date',
'dt2_def' => 'A date expressed relative to another event',
'dt3_def' => 'A pattern that repeats on a schedule',
'dt4_def' => 'A date contingent on a condition being met',
'dt5_def' => 'A date range or duration with start and end',
'class_h2' => 'confidence levels',
'th_conf' => 'confidence',
'th_meaning' => 'Meaning',
'th_visual' => 'Visual in timeline',
'conf1_meaning' => 'Date is explicitly and unambiguously stated in the source text',
'conf1_visual' => 'Green badge',
'conf2_meaning' => 'Date is inferred, approximate, or stated with slight ambiguity',
'conf2_visual' => 'Amber badge',
'conf3_meaning' => 'Date is implied, undated, or extracted from a degraded/ambiguous passage',
'conf3_visual' => 'Grey badge',
'class_h3' => 'Actor attribution rules',
'th_rule' => 'Rule',
'act1_rule' => 'Named entity in the same sentence',
'act2_rule' => 'Role label without a name',
'act3_rule' => 'No clear attribution in sentence',
'act4_rule' => 'Document-level default',
'act4_example' => 'If no per-event actor, defaults to the document sender/issuing body',
// Multi-engine
'eng_eyebrow' => 'Engines',
'eng_title' => 'Three engines, one structured output.',
'eng_sub' => 'All engines return the same JSON schema — the post-processor handles all three identically. Engine choice affects speed, quality, and privacy only.',
'th_model' => 'Model',
'th_latency' => 'Latency',
'th_best' => 'Best for',
'eng1_best' => 'Default. Fast, cost-efficient, handles most legal documents well.',
'eng2_best' => 'Complex documents, overlapping events, poor-quality or dense source text.',
'eng3_best' => 'Maximum privacy. Entirely local. Fine-tuned on Norwegian legal corpus.',
// Fine-tuned model
'ft_eyebrow' => 'Fine-tuned model',
'ft_title' => 'dbn-legal-agent: trained on Norwegian legal text.',
'ft_badge' => 'QLoRA fine-tune',
'ft_p1' => 'A QLoRA (Quantized Low-Rank Adaptation) fine-tune trained on Norwegian child-welfare and administrative law text — case notes, court decisions, Barnevernet correspondence, Fylkesnemnda decisions, and Statsforvalter rulings. The model has internalised the temporal patterns of Norwegian legal proceedings: the procedural sequence of an omsorgsovertakelse, the typical timeline of a tiltaksplan review cycle, what akutt means as a temporal signal, how Fylkesnemnda milestones are ordered.',
'ft_p2' => 'In the Timeline GPU engine, dbn-legal-agent runs as the primary extraction model via the LiteLLM proxy on cuttlefish. The structured JSON output schema is identical to the Azure engines — the same post-processing pipeline applies regardless of which engine produced the extraction. No Azure API calls are made when the GPU engine is selected.',
// Privacy
'priv_eyebrow' => 'Privacy & security',
'priv_title' => 'Your documents never leave your session.',
'priv_badge' => 'Privacy by design',
'priv_1_html' => 'All uploaded files are extracted to text <strong>in memory</strong> using PHP\'s in-process file handlers. The raw binary is never written to disk on the server.',
'priv_2' => 'Session context (pasted text, uploaded content, extracted timeline events) is scoped to your authenticated session and discarded when the session ends.',
'priv_3_html' => 'Azure OpenAI (<code>gpt-4o</code>, <code>gpt-4o-mini</code>) is configured on the <strong>West Europe</strong> region. Data processed via Azure OpenAI is not used for model training under the default enterprise agreement.',
'priv_4_html' => 'The GPU/cuttlefish engine processes entirely locally — no data leaves your network. The LiteLLM proxy on cuttlefish receives your document text and returns structured JSON; nothing is forwarded to an external API.',
'priv_5_html' => 'Telemetry logged: tool name, engine, focus mode, event count, latency. <strong>No document text, case references, actor names, or extracted events are logged.</strong>',
// CTA
'cta_title' => 'See it work on your case.',
'cta_sub' => 'Free for Do Better Norge members. All engines available to every member.',
'btn_open' => 'Open Timeline →',
'btn_signin_cta' => 'Sign in to use Timeline →',
'btn_register' => 'Register free',
'btn_guide' => 'User guide',
];
// ─────────────────────────────────────────────────────────────────────────────
// Translation engine — direct curl (SSL verify off for WAMP CLI dev)
// ─────────────────────────────────────────────────────────────────────────────
$azConfig = [
'endpoint' => rtrim((string)dbnToolsEnv('DBN_AZURE_OPENAI_ENDPOINT', ''), '/'),
'api_key' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_API_KEY', ''),
'api_version' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_API_VERSION', '2024-02-01'),
'chat_deployment' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_CHAT_DEPLOYMENT', 'gpt-4o-mini'),
];
function azureChat(array $config, array $messages, array $options = []): array
{
$url = $config['endpoint']
. '/openai/deployments/'
. rawurlencode($config['chat_deployment'])
. '/chat/completions?api-version='
. rawurlencode($config['api_version']);
$payload = json_encode(array_filter([
'messages' => $messages,
'temperature' => $options['temperature'] ?? 0.1,
'max_tokens' => $options['max_tokens'] ?? 4096,
'response_format' => isset($options['json']) && $options['json']
? ['type' => 'json_object'] : null,
], fn($v) => $v !== null), JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
$ch = curl_init($url);
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $payload,
CURLOPT_HTTPHEADER => [
'Content-Type: application/json',
'api-key: ' . $config['api_key'],
],
CURLOPT_TIMEOUT => (int)($options['timeout'] ?? 120),
CURLOPT_SSL_VERIFYPEER => false, // dev-only: WAMP has no system CA bundle
CURLOPT_SSL_VERIFYHOST => false,
]);
$body = curl_exec($ch);
$code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
$err = curl_error($ch);
curl_close($ch);
if ($body === false) {
throw new RuntimeException("Azure curl failed: $err");
}
$decoded = json_decode($body, true);
if (!is_array($decoded)) {
throw new RuntimeException("Azure returned non-JSON (HTTP $code): " . substr($body, 0, 200));
}
if ($code < 200 || $code >= 300) {
throw new RuntimeException('Azure error: ' . ($decoded['error']['message'] ?? "HTTP $code"));
}
return $decoded;
}
$languages = [
'no' => 'Norwegian (Norsk bokmål)',
'uk' => 'Ukrainian',
'pl' => 'Polish',
];
$systemPrompt = <<<'PROMPT'
You are a professional legal translator. Translate all JSON values to {LANG}.
PRESERVE AS-IS (do not translate):
- Norwegian institution/authority names: Barnevernet, Statsforvalteren, Bufdir, Tingretten, Trygderetten, Fylkesnemnda, NAV, SFO, Barnehage, Skole, Kommune, Annet
- Norwegian legal act names and abbreviations: fvl, forvaltningsloven, barnevernsloven, opplæringslova, barnehageloven, trygderettsloven, tvisteloven, NAV-loven, EMK, ECHR, bokmål, nynorsk
- Norwegian legal terms: klage, klageinstans, vedtak, saksnummer, akutt, omsorgsovertakelse, tiltaksplan, Rettskilder, jf., fødselsnumre, akuttplassering
- ECHR case names: Strand Lobben m.fl. mot Norge, Johansen mot Norge, K.O. og V.M. mot Norge, Aune mot Norge
- Technical terms and product names: QLoRA, gpt-4o, gpt-4o-mini, Hard-RAG, Azure, BM25, ISO 8601, HUDOC, LiteLLM, cuttlefish, dbn-legal-agent, dbn-legal-agent, PHP, JSON
- HTML tags and entities exactly as they appear: <em>, <strong>, <code>, <br>, <ul>, <li>, &amp;, &lt;, &gt;
- § symbols and all section/article numbers: §§ 17, 18, 24-25, Art. 6, Art. 8, EMD-37283/13
- All proper names, place names, and code snippets in backtick/code elements
- The word "Do Better Norge" (brand name)
Return a JSON object with EXACTLY the same keys as the input. Translate only the values (not the keys). Preserve the meaning accurately for a legal context.
PROMPT;
function translateBatch(
array $azConfig,
string $langName,
string $systemPrompt,
array $batch
): array {
$prompt = str_replace('{LANG}', $langName, $systemPrompt);
$json = json_encode($batch, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
$response = azureChat($azConfig, [
['role' => 'system', 'content' => $prompt],
['role' => 'user', 'content' => $json],
], [
'json' => true,
'max_tokens' => 4096,
'temperature' => 0.1,
'timeout' => 120,
]);
$content = trim((string)($response['choices'][0]['message']['content'] ?? ''));
$decoded = json_decode($content, true);
if (!is_array($decoded)) {
echo " [WARN] JSON parse failed, trying extraction...\n";
if (preg_match('/\{.*\}/s', $content, $m)) {
$decoded = json_decode($m[0], true);
}
}
if (!is_array($decoded)) {
echo " [ERROR] Could not parse response for batch, returning English fallback\n";
return $batch;
}
// Fill in any missing keys with English fallback
foreach ($batch as $k => $v) {
if (!isset($decoded[$k])) {
$decoded[$k] = $v;
}
}
return $decoded;
}
function writeTranslationsFile(string $path, array $data): void
{
$php = "<?php\n// Auto-generated by scripts/generate-page-translations.php\n// DO NOT EDIT MANUALLY — re-run the script to regenerate\nreturn ";
$php .= var_export($data, true);
$php .= ";\n";
file_put_contents($path, $php);
}
// ─────────────────────────────────────────────────────────────────────────────
// Main loop
// ─────────────────────────────────────────────────────────────────────────────
foreach ($pages as $pageName => $englishStrings) {
echo "\n=== Translating: $pageName ===\n";
$translations = ['en' => $englishStrings];
// Split into batches of 20 to avoid token limits
$batchSize = 20;
$keys = array_keys($englishStrings);
$batches = array_chunk($keys, $batchSize);
foreach ($languages as $langCode => $langName) {
echo " Language: $langName ($langCode)\n";
$langResult = [];
foreach ($batches as $batchIdx => $batchKeys) {
$batchNum = $batchIdx + 1;
$total = count($batches);
echo " Batch $batchNum/$total (" . count($batchKeys) . " strings)...\n";
$batchInput = [];
foreach ($batchKeys as $k) {
$batchInput[$k] = $englishStrings[$k];
}
$translated = translateBatch($azConfig, $langName, $systemPrompt, $batchInput);
$langResult = array_merge($langResult, $translated);
}
$translations[$langCode] = $langResult;
}
$outFile = $outDir . '/' . $pageName . '.php';
writeTranslationsFile($outFile, $translations);
echo " Written: $outFile\n";
}
echo "\n✓ All translations generated.\n";