dobetternorge-tools/includes/LegalTools.php

<?php
declare(strict_types=1);

require_once __DIR__ . '/bootstrap.php';
require_once __DIR__ . '/AzureOpenAiGateway.php';

final class DbnLegalToolsService
{
    private const MAX_PASTE_CHARS = 128000;

    private DbnAzureOpenAiGateway $azure;

    public function __construct(?DbnAzureOpenAiGateway $azure = null)
    {
        $this->azure = $azure ?: new DbnAzureOpenAiGateway();
    }

    public function search(
        string  $query,
        string  $language    = 'en',
        int     $limit       = 6,
        string  $temporalMode = 'disabled',
        ?string $asOfDate    = null
    ): array {
        $query = trim($query);
        if (mb_strlen($query, 'UTF-8') < 3) {
            dbnToolsAbort('Search query must be at least 3 characters.', 422, 'query_too_short');
        }
        $limit = max(1, min(10, $limit));
        $temporalMode = in_array($temporalMode, ['legal_conservative', 'disabled'], true) ? $temporalMode : 'disabled';

        $trace = [
            $this->trace('Query interpretation', 'Searching Do Better Norge private corpus plus the subscribed family-legal package.', 'complete'),
            $this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode, private corpus enabled, shared package filter set to family-legal.', 'running'),
        ];

        $client = dbnToolsRequireClient();
        $package = $this->requireFamilyPackage((int)$client['id']);

        $chunks = [];
        $retrievalNote = 'ClientRagPipeline keyword retrieval';
        try {
            dbnToolsBootCaveau();
            $gatewayUrl = 'http://10.0.1.10:4000';
            try {
                $config = getConfig();
                $configured = trim((string)($config['ai_gateway']['url'] ?? ''));
                if ($configured !== '') {
                    $gatewayUrl = $configured;
                }
            } catch (Throwable $e) {
                // Retrieval still works in keyword mode without gateway config.
            }

            $rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
            $chunks = $rag->searchAll($query, $limit, null, [
                'search_private' => true,
                'search_shared' => true,
                'package_ids' => [(int)$package['id']],
                'chunk_limit' => $limit,
                'search_method' => 'keyword',
                'min_private' => 0,
                'include_beta_website' => true,
            ]);

            // Apply temporal reranking after retrieval (optional)
            if ($temporalMode === 'legal_conservative' && !empty($chunks)) {
                $temporalLayerPath = __DIR__ . '/../../ai-portal/platform/includes/LegalTemporalLayer.php';
                if (file_exists($temporalLayerPath)) {
                    require_once $temporalLayerPath;
                    $layer = new LegalTemporalLayer(['temporal_mode' => $temporalMode]);
                    $chunks = $layer->rerank($chunks, $query, $asOfDate);
                }
            }
        } catch (Throwable $e) {
            $retrievalNote = 'SQL keyword fallback after ClientRagPipeline error';
            $trace[] = $this->trace('Search fallback', 'Pipeline retrieval failed; using direct SQL keyword fallback without storing the query.', 'warning');
            $chunks = $this->fallbackKeywordSearch((int)$client['id'], $package, $query, $limit);
        }

        if (!$chunks) {
            $fallback = $this->fallbackKeywordSearch((int)$client['id'], $package, $query, $limit);
            if ($fallback) {
                $chunks = $fallback;
                $retrievalNote = 'SQL keyword fallback';
            }
        }

        $sharedDocIds = [];
        foreach (array_slice($chunks, 0, $limit) as $chunk) {
            if (($chunk['source_type'] ?? '') !== 'private' && isset($chunk['document_id'])) {
                $sharedDocIds[(int)$chunk['document_id']] = true;
            }
        }
        $docSummaries = $sharedDocIds ? $this->fetchDocSummaries(array_keys($sharedDocIds)) : [];

        $hits = array_map(
            fn(array $chunk): array => $this->sourceFromChunk(
                $chunk,
                ($chunk['source_type'] ?? '') !== 'private'
                    ? ($docSummaries[(int)($chunk['document_id'] ?? 0)] ?? null)
                    : null
            ),
            array_slice($chunks, 0, $limit)
        );
        $confidence = $this->citationConfidence($hits);

        $trace[1] = $this->trace('Search tools used', $retrievalNote . '; returned ' . count($hits) . ' source hit(s).', 'complete');
        $trace[] = $this->trace('Evidence found', count($hits) ? 'Retrieved source excerpts for review.' : 'No matching source excerpts were found.', count($hits) ? 'complete' : 'warning');
        $trace[] = $this->trace('Citation confidence', ucfirst($confidence) . ' confidence based on source count and retrieval scores.', $confidence === 'low' ? 'warning' : 'complete');

        return [
            'tool' => 'search',
            'language' => $language,
            'what_we_found' => count($hits) ? 'Found source excerpts from the legal corpus.' : 'No matching source excerpts were found.',
            'hits' => $hits,
            'evidence_trail' => $hits,
            'what_remains_uncertain' => count($hits) ? 'Search results still need human review for legal relevance and currentness.' : 'The corpus may not contain enough evidence for this query.',
            'next_practical_step' => count($hits) ? 'Open the strongest sources and confirm the cited sections before relying on them.' : 'Try a narrower query with statutory terms, party names, or dates.',
            'trace' => $trace,
            'trace_metadata' => [
                'chunk_count' => count($chunks),
                'source_count' => count($hits),
                'deployment' => null,
                'citation_confidence' => $confidence,
            ],
            'disclaimer' => dbnToolsDisclaimer($language),
        ];
    }

    public function ask(string $question, string $language = 'en'): array
    {
        $search = $this->search($question, $language, 7);
        $hits = $search['hits'];
        $trace = $search['trace'];

        if (!$hits) {
            $trace[] = $this->trace('Synthesis', 'Skipped answer synthesis because no evidence was found.', 'warning');
            return [
                'tool' => 'ask',
                'language' => $language,
                'answer' => $language === 'no'
                    ? 'Jeg fant ikke nok kildestøtte i familie-rettskorpuset til å svare sikkert.'
                    : 'I did not find enough source support in the family-law corpus to answer safely.',
                'what_we_found' => $search['what_we_found'],
                'evidence_trail' => [],
                'what_remains_uncertain' => $search['what_remains_uncertain'],
                'next_practical_step' => $search['next_practical_step'],
                'trace' => $trace,
                'trace_metadata' => [
                    'chunk_count' => 0,
                    'source_count' => 0,
                    'deployment' => null,
                    'citation_confidence' => 'low',
                ],
                'disclaimer' => dbnToolsDisclaimer($language),
            ];
        }

        $this->azure->requireChat();

        $context = $this->buildEvidenceContext($hits);
        $locale = $language === 'no' ? 'Norwegian' : 'English';
        $prompt = <<<PROMPT
Question:
{$question}

Evidence excerpts:
{$context}

Return JSON only with these keys:
{
  "answer": "short direct answer in {$locale}",
  "what_we_found": "plain-language summary of the supported finding",
  "evidence_trail": [{"title":"source title","why_it_matters":"one sentence","citation":"visible source title or section"}],
  "what_remains_uncertain": ["specific gaps or caveats"],
  "next_practical_step": "one concrete next action"
}
PROMPT;

        $system = $this->legalJsonSystemPrompt($language);
        $raw = $this->azure->chatText([
            ['role' => 'system', 'content' => $system],
            ['role' => 'user', 'content' => $prompt],
        ], [
            'json' => true,
            'temperature' => 0.15,
            'max_tokens' => 1300,
        ]);

        $json = $this->azure->decodeJsonObject($raw);
        if (!$json) {
            $json = [
                'answer' => $raw,
                'what_we_found' => 'Azure returned a plain-text answer based on the retrieved excerpts.',
                'evidence_trail' => [],
                'what_remains_uncertain' => ['The response format could not be validated as structured JSON.'],
                'next_practical_step' => 'Review the source excerpts manually before relying on the answer.',
            ];
        }

        $trace[] = $this->trace('Synthesis', 'Azure OpenAI generated an answer using only the retrieved source excerpts.', 'complete');
        $trace[] = $this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($json['what_remains_uncertain'] ?? []), 'complete');
        $trace[] = $this->trace('Next practical step', (string)($json['next_practical_step'] ?? 'Review the evidence trail.'), 'complete');

        return [
            'tool' => 'ask',
            'language' => $language,
            'answer' => (string)($json['answer'] ?? ''),
            'what_we_found' => (string)($json['what_we_found'] ?? ''),
            'evidence_trail' => $hits,
            'citation_notes' => $this->normalizeEvidenceTrail($json['evidence_trail'] ?? [], $hits),
            'sources' => $hits,
            'what_remains_uncertain' => $json['what_remains_uncertain'] ?? [],
            'next_practical_step' => (string)($json['next_practical_step'] ?? ''),
            'trace' => $trace,
            'trace_metadata' => [
                'chunk_count' => count($hits),
                'source_count' => count($hits),
                'deployment' => $this->azure->chatDeployment(),
                'citation_confidence' => $search['trace_metadata']['citation_confidence'] ?? 'medium',
            ],
            'disclaimer' => dbnToolsDisclaimer($language),
        ];
    }

    public function summarize(string $text, string $language = 'en'): array
    {
        $text = $this->requirePasteText($text);
        $this->azure->requireChat();

        $locale = $language === 'no' ? 'Norwegian' : 'English';
        $prompt = <<<PROMPT
Summarize this pasted case-preparation text in {$locale}. Do not invent missing facts.

Pasted text:
{$text}

Return JSON only:
{
  "what_we_found": "plain-language summary",
  "key_facts": ["fact"],
  "dates": ["date or unknown"],
  "parties": ["party or role"],
  "legal_references_detected": ["reference"],
  "what_remains_uncertain": ["uncertainty"],
  "next_practical_step": "one concrete next action"
}
PROMPT;

        $json = $this->runJsonTool($prompt, $language, 1300);
        $trace = [
            $this->trace('Query interpretation', 'Summarize pasted text without saving the text or output.', 'complete'),
            $this->trace('Search tools used', 'No external corpus search; source is the user-pasted text.', 'complete'),
            $this->trace('Evidence found', 'Evidence trail is limited to the pasted text supplied in this request.', 'complete'),
            $this->trace('Citation confidence', 'Medium confidence for factual extraction; no external legal source verification was performed.', 'warning'),
            $this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($json['what_remains_uncertain'] ?? []), 'complete'),
            $this->trace('Next practical step', (string)($json['next_practical_step'] ?? 'Review the summary against the original text.'), 'complete'),
        ];

        return [
            'tool' => 'summarize',
            'language' => $language,
            'what_we_found' => (string)($json['what_we_found'] ?? ''),
            'key_facts' => $json['key_facts'] ?? [],
            'dates' => $json['dates'] ?? [],
            'parties' => $json['parties'] ?? [],
            'legal_references_detected' => $json['legal_references_detected'] ?? [],
            'evidence_trail' => [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; not stored.']],
            'what_remains_uncertain' => $json['what_remains_uncertain'] ?? [],
            'next_practical_step' => (string)($json['next_practical_step'] ?? ''),
            'trace' => $trace,
            'trace_metadata' => [
                'chunk_count' => 1,
                'source_count' => 1,
                'deployment' => $this->azure->chatDeployment(),
            ],
            'disclaimer' => dbnToolsDisclaimer($language),
        ];
    }

    public function timeline(
        string $text,
        string $language           = 'en',
        string $engine             = 'azure_mini',
        string $focus              = 'all',
        string $confidenceFilter   = 'all',
        bool   $includeRelative    = true,
        bool   $includeBackground  = true
    ): array {
        $text   = $this->requirePasteText($text);
        $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini';
        $focus  = in_array($focus, ['all', 'deadlines', 'hearings', 'cps'], true) ? $focus : 'all';

        if ($engine !== 'gpu') {
            $this->azure->requireChat();
        }

        $locale = $language === 'no' ? 'Norwegian' : 'English';

        $focusInstruction = match ($focus) {
            'deadlines' => "\nFocus specifically on: legal deadlines, filing dates, response windows, appeal periods, and statutory time limits. Deprioritise narrative events with no legal deadline significance.",
            'hearings'  => "\nFocus specifically on: court hearings, tribunal sessions, mediation sessions, formal meetings, and hearing-related procedural dates.",
            'cps'       => "\nFocus specifically on: CPS (Barnevernet) interventions, home visits, case reviews, acute measures (akuttvedtak), and Fylkesnemnda proceedings.",
            default     => '',
        };

        $backgroundInstruction = $includeBackground
            ? "\nAlso extract BACKGROUND and NARRATIVE events: dates embedded in contextual paragraphs, historical facts, year-only references, and approximate years (e.g. \"rundt 2011/2012\", \"David ble født den 30.07.2015\", \"familien i 2015\"). These are valid timeline events even when they appear in introductory or background text — do NOT skip them."
            : "\nDo NOT include purely historical background or narrative context dates. Focus only on operational events, deadlines, and milestones that are directly actionable in the case.";

        $prompt = <<<PROMPT
Build a chronological timeline from the pasted text in {$locale}.

Extract ALL dates, deadlines, milestones, and temporal references.{$focusInstruction}{$backgroundInstruction}

IMPORTANT — Norwegian date formats to recognise:
- DD.MM.YY  (e.g. 18.09.25 = 2025-09-18, 09.04.25 = 2025-04-09)
- D.M.YY    (e.g. 6.1.25 = 2025-01-06)
- DD.MM.    (e.g. 18.09. — day and month without year; infer year from surrounding context)
- D.M.      (e.g. 6.1. — day and month only)
- DD.MM.YYYY (e.g. 18.09.2025)
- Two-digit years: always interpret as 20YY (25 → 2025, 24 → 2024).
- Diary / log format: lines that begin with a date followed by a colon or space are ALWAYS events.
  Example: "18.09.25: Samtale med Davids lærer" → date 2025-09-18, event "Samtale med Davids lærer".
  Example: "6.1. Samtaler med David"             → date unknown-year-01-06, event "Samtaler med David".
- Do NOT skip a line just because the year is ambiguous — record what you can and set confidence accordingly.

For each temporal reference provide:
- "date": ISO 8601 date (YYYY-MM-DD) if determinable, otherwise a human-readable description such as "06 Jan (year unknown)"
- "date_type": one of  absolute | relative | recurring | conditional | period
- "actor": person, institution, or party involved — or "unknown"
- "event": concise description of what happened or is due
- "source_excerpt": the verbatim phrase from the text that grounds this event (≤ 30 words)
- "confidence": high | medium | low

Sort events chronologically (absolute dates first, then relative, then recurring).
Keep uncertain dates explicit — do not invent dates not in the text.
If multiple documents are separated by "--- Document: … ---" markers, note the source document in the event description where helpful.

Pasted text:
{$text}

Return JSON only:
{
  "what_we_found": "short overview",
  "events": [{"date":"...","date_type":"absolute","actor":"...","event":"...","source_excerpt":"...","confidence":"high|medium|low"}],
  "evidence_trail": [{"title":"...","excerpt":"..."}],
  "what_remains_uncertain": ["..."],
  "next_practical_step": "..."
}
PROMPT;

        $system = $this->legalJsonSystemPrompt($language);
        $messages = [
            ['role' => 'system', 'content' => $system],
            ['role' => 'user',   'content' => $prompt],
        ];
        $chatOptions = ['json' => true, 'temperature' => 0.1, 'max_tokens' => 4000, 'timeout' => 120];
        $deployLabel = $this->azure->chatDeployment();

        try {
            if ($engine === 'gpu') {
                $response    = $this->callGpuLlm($messages, $chatOptions);
                $deployLabel = 'GPU (cuttlefish)';
            } elseif ($engine === 'azure_full') {
                $response    = $this->azure->withDeployment('gpt-4o')->chat($messages, $chatOptions);
                $deployLabel = 'gpt-4o';
            } else {
                $response    = $this->azure->chat($messages, $chatOptions);
                $deployLabel = $this->azure->chatDeployment();
            }
        } catch (Throwable $e) {
            dbnToolsAbort('LLM request failed: ' . $e->getMessage(), 502, 'llm_error');
        }

        $raw  = (string)($response['choices'][0]['message']['content'] ?? '');
        $json = $this->azure->decodeJsonObject($raw);
        if (!$json) {
            dbnToolsAbort('The selected engine did not return valid structured JSON.', 502, 'llm_invalid_json');
        }

        $events = is_array($json['events'] ?? null) ? $json['events'] : [];

        // Post-filter: confidence
        if ($confidenceFilter === 'high_medium') {
            $events = array_values(array_filter($events, fn($ev) => ($ev['confidence'] ?? 'low') !== 'low'));
        }

        // Post-filter: relative/recurring date types
        if (!$includeRelative) {
            $events = array_values(array_filter($events, fn($ev) => ($ev['date_type'] ?? 'absolute') === 'absolute'));
        }

        $engineLabel = match ($engine) {
            'gpu'        => 'GPU (cuttlefish)',
            'azure_full' => 'gpt-4o',
            default      => $deployLabel ?? $this->azure->chatDeployment(),
        };

        $focusLabel = match ($focus) {
            'deadlines' => 'legal deadlines',
            'hearings'  => 'court hearings',
            'cps'       => 'CPS milestones',
            default     => 'all events',
        };

        $trace = [
            $this->trace('Query interpretation', "Extract {$focusLabel} from pasted text. Engine: {$engineLabel}. Without saving the text or output.", 'complete'),
            $this->trace('Search tools used', 'No external corpus search; source is the user-pasted text.', 'complete'),
            $this->trace('Evidence found', count($events) . ' event(s) identified' . ($confidenceFilter === 'high_medium' ? ' (low-confidence filtered out)' : '') . '.', count($events) ? 'complete' : 'warning'),
            $this->trace('Citation confidence', 'Confidence is per event and based only on the pasted text.', 'complete'),
            $this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($json['what_remains_uncertain'] ?? []), 'complete'),
            $this->trace('Next practical step', (string)($json['next_practical_step'] ?? 'Verify dates against original documents.'), 'complete'),
        ];

        return [
            'tool'                   => 'timeline',
            'language'               => $language,
            'what_we_found'          => (string)($json['what_we_found'] ?? ''),
            'events'                 => $events,
            'evidence_trail'         => $json['evidence_trail'] ?? [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; not stored.']],
            'what_remains_uncertain' => $json['what_remains_uncertain'] ?? [],
            'next_practical_step'    => (string)($json['next_practical_step'] ?? ''),
            'trace'                  => $trace,
            'trace_metadata'         => [
                'chunk_count'  => count($events),
                'source_count' => 1,
                'deployment'   => $engineLabel,
            ],
            'disclaimer' => dbnToolsDisclaimer($language),
        ];
    }

    public function redact(
        string $text,
        string $mode         = 'standard',
        string $region       = 'nordic',
        string $language     = 'en',
        array  $aliases      = [],
        string $engine       = 'azure_mini',
        string $outputFormat = 'contextual',
        bool   $keepOfficials = false,
        array  $exemptNames  = [],
        array  $redactTypes  = []
    ): array {
        $text         = $this->requirePasteText($text);
        $mode         = $mode === 'strict' ? 'strict' : 'standard';
        $region       = in_array($region, ['nordic', 'european', 'echr', 'global'], true) ? $region : 'nordic';
        $engine       = in_array($engine, ['azure_mini', 'azure_full', 'gpu', 'regex'], true) ? $engine : 'azure_mini';
        $outputFormat = in_array($outputFormat, ['contextual', 'generic', 'pseudonym'], true) ? $outputFormat : 'contextual';

        // Normalise entity-type flags (all on by default)
        $doNames  = ($redactTypes['names']  ?? true) !== false;
        $doOrgs   = ($redactTypes['orgs']   ?? true) !== false;
        $doPlaces = ($redactTypes['places'] ?? true) !== false;
        $doDob    = ($redactTypes['dob']    ?? true) !== false;

        // Pass 1 — deterministic regex
        [$preRedacted, $pass1Counts] = $this->deterministicRedaction($text, $mode, $region);
        $pass1Total  = array_sum($pass1Counts);
        $pass1Detail = $pass1Total
            ? implode(', ', array_map(
                fn($k, $v) => "{$k}: {$v}",
                array_keys(array_filter($pass1Counts, fn($v): bool => $v > 0)),
                array_filter($pass1Counts, fn($v): bool => $v > 0)
              ))
            : 'none detected';

        $engineLabel = match ($engine) {
            'azure_full' => 'Azure gpt-4o',
            'gpu'        => 'GPU (cuttlefish)',
            'regex'      => 'Regex only',
            default      => 'Azure gpt-4o-mini',
        };

        $trace = [
            $this->trace('Query interpretation', "Redact PII from pasted text. Region: {$region}. Mode: {$mode}. Engine: {$engineLabel}.", 'complete'),
            $this->trace('Pass 1 — Deterministic patterns', "Applied {$region} pattern pack. {$pass1Detail}.", $pass1Total > 0 ? 'complete' : 'warning'),
        ];

        // Pass 2 — LLM semantic scan
        $finalRedacted = $preRedacted;
        $pass2Counts   = [];
        $llmDeployment = null;

        $llmResult = $this->llmRedactionPass(
            $preRedacted, $language, $aliases, $engine,
            $keepOfficials, $exemptNames,
            $doNames, $doOrgs, $doPlaces, $doDob
        );

        if (!empty($llmResult['skipped'])) {
            $trace[] = $this->trace('Pass 2 — LLM semantic scan', 'Skipped: ' . ($llmResult['reason'] ?? 'not configured') . '.', 'warning');
        } elseif (!empty($llmResult['error'])) {
            $trace[] = $this->trace('Pass 2 — LLM semantic scan', 'Skipped due to error: ' . dbnToolsExcerpt($llmResult['error'], 100) . '.', 'warning');
        } else {
            $entities      = $llmResult['entities'] ?? [];
            $llmDeployment = $llmResult['deployment'] ?? null;
            $applied       = 0;

            foreach ($entities as $entity) {
                if (!is_array($entity)) {
                    continue;
                }
                $original = (string)($entity['original'] ?? '');
                $type     = (string)($entity['type']     ?? 'other');
                $tag      = (string)($entity['tag']      ?? '[IDENTIFIER]');
                if ($original === '' || str_starts_with($original, '[')) {
                    continue;
                }
                // Allow [ROLE: Name] format when keepOfficials is on, else require plain bracket tag
                if (!preg_match('/^\[[A-Za-z0-9_\- ]+(?::\s*[^\]]+)?\]$/', $tag)) {
                    $tag = '[IDENTIFIER]';
                }
                if (str_contains($finalRedacted, $original)) {
                    $finalRedacted = str_replace($original, $tag, $finalRedacted);
                    $pass2Counts[$type] = ($pass2Counts[$type] ?? 0) + 1;
                    $applied++;
                }
            }

            $pass2Detail = $applied > 0
                ? "{$applied} additional: " . implode(', ', array_map(fn($k, $v) => "{$k}: {$v}", array_keys($pass2Counts), $pass2Counts))
                : 'no additional entities found';

            $trace[] = $this->trace('Pass 2 — LLM semantic scan', "{$engineLabel} reviewed pre-redacted text for names, orgs, and places. {$pass2Detail}.", 'complete');
        }

        // Apply output format post-processing
        $allCounts = array_merge($pass1Counts, $pass2Counts);
        if ($outputFormat === 'generic') {
            $finalRedacted = $this->applyGenericTags($finalRedacted);
        } elseif ($outputFormat === 'pseudonym') {
            $finalRedacted = $this->applyPseudonymization($finalRedacted, $allCounts);
        }

        $categories = array_keys(array_filter($allCounts, fn($v): bool => $v > 0));

        $trace[] = $this->trace('Output format', match ($outputFormat) {
            'generic'   => 'All identifiers normalised to generic tags ([PERSON], [ORG], etc.).',
            'pseudonym' => 'Identifiers replaced with plausible pseudonymous values.',
            default     => 'Contextual role tags used (e.g. [FATHER], [JUDGE: Name]).',
        }, 'complete');
        $trace[] = $this->trace('Uncertainty / missing evidence', 'Human review recommended for contextual identification and unusual formatting.', 'warning');
        $trace[] = $this->trace('Next practical step', 'Review the output and rerun in strict mode if the text will be shared broadly.', 'complete');

        return [
            'tool'                       => 'redact',
            'mode'                       => $mode,
            'region'                     => $region,
            'engine_used'                => $engineLabel,
            'output_format'              => $outputFormat,
            'what_we_found'              => "Applied {$region} pattern pack" . ($llmDeployment || $engine === 'gpu' ? " and {$engineLabel} semantic scan" : '') . '.',
            'redacted_text'              => $finalRedacted,
            'detected_entity_categories' => $categories,
            'entity_counts'              => $allCounts,
            'evidence_trail'             => [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; not stored.']],
            'what_remains_uncertain'     => ['Human review is still recommended for contextual identification.'],
            'next_practical_step'        => 'Review the output and rerun in strict mode if the text will be shared broadly.',
            'trace'                      => $trace,
            'trace_metadata'             => [
                'chunk_count'  => 1,
                'source_count' => 1,
                'deployment'   => $llmDeployment ?? $engineLabel,
            ],
            'disclaimer' => 'Privacy support tool. Review before disclosure.',
        ];
    }

    private function requireFamilyPackage(int $clientId): array
    {
        $package = dbnToolsFetchPackage('family-legal');
        if (!$package || empty($package['is_active'])) {
            dbnToolsAbort('The family-legal corpus package is not active.', 503, 'package_unavailable');
        }
        if (!dbnToolsHasActiveSubscription($clientId, (int)$package['id'])) {
            dbnToolsAbort('Do Better Norge does not have an active family-legal subscription.', 503, 'subscription_missing');
        }
        return $package;
    }

    private function runJsonTool(string $prompt, string $language, int $maxTokens): array
    {
        $raw = $this->azure->chatText([
            ['role' => 'system', 'content' => $this->legalJsonSystemPrompt($language)],
            ['role' => 'user', 'content' => $prompt],
        ], [
            'json' => true,
            'temperature' => 0.1,
            'max_tokens' => $maxTokens,
        ]);
        $json = $this->azure->decodeJsonObject($raw);
        if (!$json) {
            dbnToolsAbort('Azure OpenAI did not return valid structured JSON.', 502, 'azure_invalid_json');
        }
        return $json;
    }

    private function legalJsonSystemPrompt(string $language): string
    {
        $locale = $language === 'no' ? 'Norwegian' : 'English';
        return <<<PROMPT
You are Do Better Norge Legal Tools in a source-grounded legal preparation workflow.
Use the DBN legal guardrails:
- Answer only from provided source excerpts or pasted text.
- Treat your role as legal information and issue-spotting, not final legal advice.
- Never invent statutes, paragraph numbers, case names, citations, parties, dates, or sources.
- If evidence is insufficient, say so plainly.
- Respond in {$locale}.
- Return valid JSON only. No markdown fences.
PROMPT;
    }

    private function buildEvidenceContext(array $hits): string
    {
        $lines = [];
        foreach ($hits as $idx => $hit) {
            $n = $idx + 1;
            $lines[] = "[{$n}] Title: " . ($hit['title'] ?? 'Untitled');
            if (!empty($hit['section'])) {
                $lines[] = "Section: " . $hit['section'];
            }
            $lines[] = "Corpus/package: " . ($hit['package_or_corpus'] ?? 'unknown');
            $lines[] = "Excerpt: " . ($hit['excerpt'] ?? '');
        }
        return implode("\n", $lines);
    }

    private function normalizeEvidenceTrail(mixed $trail, array $hits): array
    {
        if (!is_array($trail) || !$trail) {
            return array_map(fn(array $hit): array => [
                'title' => $hit['title'],
                'citation' => $hit['title'],
                'why_it_matters' => dbnToolsExcerpt($hit['excerpt'], 180),
            ], array_slice($hits, 0, 4));
        }
        return array_values(array_filter($trail, 'is_array'));
    }

    private function sourceFromChunk(array $chunk, ?string $docSummary = null): array
    {
        $title = (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source');
        $score = isset($chunk['similarity']) ? round((float)$chunk['similarity'], 4) : null;
        $rawExcerpt = dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620);
        return [
            'title'               => $title,
            'excerpt'             => $docSummary ?? $rawExcerpt,
            'chunk_text'          => $rawExcerpt,
            'package_or_corpus'   => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Norge'),
            'score'               => $score,
            'document_id'         => isset($chunk['document_id']) ? (int)$chunk['document_id'] : null,
            'chunk_id'            => isset($chunk['id']) ? (int)$chunk['id'] : null,
            'section'             => $chunk['section_title'] ?? null,
            'authority_type'      => $chunk['authority_type'] ?? null,
            'jurisdiction'        => $chunk['jurisdiction'] ?? null,
            // Temporal annotations (present when temporal_mode = 'legal_conservative')
            'temporal_state'      => $chunk['temporal_state'] ?? null,
            'temporal_kind'       => $chunk['temporal_kind'] ?? null,
            'temporal_reason'     => $chunk['temporal_reason'] ?? null,
            'currentness_warning' => $chunk['currentness_warning'] ?? null,
            'valid_from'          => $chunk['valid_from'] ?? null,
            'valid_until'         => $chunk['valid_until'] ?? null,
            'is_current_version'  => $chunk['is_current_version'] ?? null,
        ];
    }

    private function fetchDocSummaries(array $docIds): array
    {
        if (!$docIds) {
            return [];
        }
        try {
            $db = dbnToolsRagDb();
            $placeholders = implode(',', array_fill(0, count($docIds), '?'));
            $stmt = $db->prepare(
                "SELECT document_id, summary FROM doc_summaries
                 WHERE document_id IN ({$placeholders}) AND summary != ''"
            );
            $stmt->execute(array_values($docIds));
            return array_column($stmt->fetchAll(PDO::FETCH_ASSOC), 'summary', 'document_id');
        } catch (Throwable) {
            return [];
        }
    }

    private function citationConfidence(array $hits): string
    {
        if (!$hits) {
            return 'low';
        }
        $scores = array_values(array_filter(array_map(fn(array $h) => $h['score'] ?? null, $hits), 'is_numeric'));
        $best = $scores ? max($scores) : 0;
        if (count($hits) >= 3 && $best >= 0.35) {
            return 'high';
        }
        if (count($hits) >= 1) {
            return 'medium';
        }
        return 'low';
    }

    private function fallbackKeywordSearch(int $clientId, array $package, string $query, int $limit): array
    {
        $results = [];
        try {
            $results = array_merge($results, $this->fallbackPrivateSearch($clientId, $query, $limit));
        } catch (Throwable $e) {
            error_log('DBN tools private fallback failed: ' . $e->getMessage());
        }
        try {
            $remaining = max(1, $limit - count($results));
            $results = array_merge($results, $this->fallbackSharedSearch($package, $query, $remaining));
        } catch (Throwable $e) {
            error_log('DBN tools shared fallback failed: ' . $e->getMessage());
        }
        return array_slice($results, 0, $limit);
    }

    private function fallbackPrivateSearch(int $clientId, string $query, int $limit): array
    {
        $db = dbnToolsDb();
        $terms = $this->searchTerms($query);
        if (!$terms) {
            return [];
        }
        $clauses = [];
        $params = [':client_id' => $clientId];
        foreach ($terms as $i => $term) {
            $key = ':term' . $i;
            $clauses[] = "(cc.content LIKE {$key} OR cd.title LIKE {$key})";
            $params[$key] = '%' . $term . '%';
        }
        $sql = 'SELECT cc.id, cc.document_id, cc.content, cd.title AS document_title, cd.category
                FROM client_chunks cc
                JOIN client_documents cd ON cc.document_id = cd.id
                WHERE cc.client_id = :client_id AND cd.status = "ready" AND (' . implode(' OR ', $clauses) . ')
                LIMIT ' . (int)$limit;
        $stmt = $db->prepare($sql);
        $stmt->execute($params);
        $rows = $stmt->fetchAll(PDO::FETCH_ASSOC);
        foreach ($rows as &$row) {
            $row['similarity'] = 0.25;
            $row['source_name'] = 'Do Better Norge private corpus';
            $row['source_type'] = 'private';
        }
        return $rows;
    }

    private function fallbackSharedSearch(array $package, string $query, int $limit): array
    {
        $ragDb = dbnToolsRagDb();
        $terms = $this->searchTerms($query);
        if (!$terms) {
            return [];
        }

        $where = ['d.status = "ready"'];
        $params = [];

        if (!empty($package['corpus_id'])) {
            $where[] = 'd.corpus_id = ?';
            $params[] = (int)$package['corpus_id'];
        }

        $cats = json_decode((string)($package['category_filter'] ?? '[]'), true) ?: [];
        if ($cats) {
            $where[] = 'd.category IN (' . implode(',', array_fill(0, count($cats), '?')) . ')';
            $params = array_merge($params, $cats);
        }

        $langs = json_decode((string)($package['language_filter'] ?? '[]'), true) ?: [];
        if ($langs) {
            $where[] = 'd.language IN (' . implode(',', array_fill(0, count($langs), '?')) . ')';
            $params = array_merge($params, $langs);
        }

        $termClauses = [];
        foreach ($terms as $term) {
            $termClauses[] = '(c.content LIKE ? OR d.title LIKE ?)';
            $params[] = '%' . $term . '%';
            $params[] = '%' . $term . '%';
        }
        $where[] = '(' . implode(' OR ', $termClauses) . ')';

        $sql = 'SELECT c.id, c.document_id, c.content, c.section_title, d.title AS document_title,
                       d.category, d.language
                FROM chunks c
                JOIN documents d ON c.document_id = d.id
                WHERE ' . implode(' AND ', $where) . '
                LIMIT ' . (int)$limit;
        $stmt = $ragDb->prepare($sql);
        $stmt->execute($params);
        $rows = $stmt->fetchAll(PDO::FETCH_ASSOC);
        foreach ($rows as &$row) {
            $row['similarity'] = 0.2;
            $row['source_name'] = (string)($package['name'] ?? 'family-legal');
            $row['source_type'] = 'package';
        }
        return $rows;
    }

    private function searchTerms(string $query): array
    {
        $parts = preg_split('/[^\p{L}\p{N}]+/u', mb_strtolower($query, 'UTF-8')) ?: [];
        $stop = ['the', 'and', 'for', 'with', 'that', 'this', 'hva', 'har', 'kan', 'jeg', 'som', 'det', 'med', 'til', 'og'];
        $terms = [];
        foreach ($parts as $part) {
            if (mb_strlen($part, 'UTF-8') < 3 || in_array($part, $stop, true)) {
                continue;
            }
            $terms[] = $part;
        }
        return array_slice(array_values(array_unique($terms)), 0, 6);
    }

    private function requirePasteText(string $text): string
    {
        $text = trim($text);
        if (mb_strlen($text, 'UTF-8') < 20) {
            dbnToolsAbort('Paste at least 20 characters of text.', 422, 'text_too_short');
        }
        if (mb_strlen($text, 'UTF-8') > self::MAX_PASTE_CHARS) {
            dbnToolsAbort('Pasted text is too long for the MVP limit.', 422, 'text_too_long');
        }
        return $text;
    }

    private function deterministicRedaction(string $text, string $mode, string $region = 'nordic'): array
    {
        $counts = [];

        $replace = function (string $pattern, string $type, string $token) use (&$text, &$counts): void {
            $text = preg_replace_callback($pattern, function () use (&$counts, $type, $token): string {
                $counts[$type] = ($counts[$type] ?? 0) + 1;
                return $token;
            }, $text) ?? $text;
        };

        foreach ($this->getPatternPack($region) as $entry) {
            $replace($entry['pattern'], $entry['type'], $entry['replacement']);
        }

        // Structured role-label names (Barn: X, Mother: X, etc.) — universal
        $text = preg_replace_callback(
            '/\b(Barn|Child|Navn|Name|Mor|Far|Mother|Father|Sønn|Datter)\s*:\s*([^\r\n,.;]+)/iu',
            function (array $m) use (&$counts): string {
                $counts['person_or_child_name'] = ($counts['person_or_child_name'] ?? 0) + 1;
                return $m[1] . ': [PERSON]';
            },
            $text
        ) ?? $text;

        // Child-identifier phrases ("barnet heter X", "child named X") — universal
        $text = preg_replace_callback(
            '/\b(?:barnet|child|sønn|son|datter|daughter)\s+(?:heter|named|called)?\s*([A-ZÆØÅ][\p{L}æøåÆØÅ\-]{2,})\b/iu',
            function () use (&$counts): string {
                $counts['person_or_child_name'] = ($counts['person_or_child_name'] ?? 0) + 1;
                return '[CHILD_IDENTIFIER]';
            },
            $text
        ) ?? $text;

        if ($mode === 'strict') {
            $replace('/\b[A-ZÆØÅ][\p{L}æøåÆØÅ\-]{2,}\s+[A-ZÆØÅ][\p{L}æøåÆØÅ\-]{2,}\b/u', 'person_or_child_name', '[PERSON]');
        }

        return [$text, $counts];
    }

    private function getPatternPack(string $region): array
    {
        $nordic = [
            ['pattern' => '/\b[A-Z0-9._%+\-]+@[A-Z0-9.\-]+\.[A-Z]{2,}\b/i',                                                                    'replacement' => '[EMAIL]',   'type' => 'email'],
            ['pattern' => '/(?<!\d)(?:\d{6}[\s\-]?\d{5}|\d{11})(?!\d)/u',                                                                       'replacement' => '[FNR]',     'type' => 'fødselsnummer'],
            ['pattern' => '/(?<!\d)(?:\+47[\s.\-]?)?(?:\d[\s.\-]?){8}(?!\d)/u',                                                                 'replacement' => '[PHONE]',   'type' => 'phone'],
            ['pattern' => '/\b[A-ZÆØÅ][\p{L}æøåÆØÅ\.\- ]{2,40}\s+(?:gate|gata|vei|veien|plass)\s+\d+[A-Za-z]?\b/iu',                          'replacement' => '[ADDRESS]', 'type' => 'address'],
            // Dates — must precede generic numeric patterns
            // Year range (e.g. 2011/2012, 2018-2019)
            ['pattern' => '/(?<!\d)(?:19|20)\d{2}\s*[\/\-–—]\s*(?:19|20)?\d{2}(?!\d)/u',                                                       'replacement' => '[DATE]',    'type' => 'date'],
            // Norwegian DD.MM.YYYY and DD/MM/YYYY
            ['pattern' => '/(?<!\d)(?:0?[1-9]|[12]\d|3[01])[.\/](?:0?[1-9]|1[0-2])[.\/](?:19|20)\d{2}(?!\d)/u',                              'replacement' => '[DATE]',    'type' => 'date'],
            // ISO YYYY-MM-DD
            ['pattern' => '/(?<!\d)(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])(?!\d)/u',                                          'replacement' => '[DATE]',    'type' => 'date'],
            // DD. Month YYYY (e.g. "30. juli 2015") and Month YYYY (Norwegian + English)
            ['pattern' => '/\b(?:\d{1,2}\.?\s+)?(?:januar|februar|mars|april|mai|juni|juli|august|september|oktober|november|desember|January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)\s+(?:19|20)\d{2}\b/iu', 'replacement' => '[DATE]', 'type' => 'date'],
            // Year after Norwegian/English temporal preposition (lookbehind keeps preposition)
            ['pattern' => '/(?<=\b(?:i|fra|siden|innen|før|etter|rundt|omkring|cirka|in|from|since|until|before|after|around|circa)\s)(?:19|20)\d{2}(?![\d\/\-])/iu', 'replacement' => '[DATE]', 'type' => 'date'],
        ];

        if ($region === 'nordic') {
            return $nordic;
        }

        $european = array_merge($nordic, [
            // Swedish personnummer short (YYMMDD-XXXX / YYMMDD+XXXX)
            ['pattern' => '/(?<!\d)\d{6}[-+]\d{4}(?!\d)/u',                                                                                      'replacement' => '[SE_PERSONNUMMER]', 'type' => 'se_personnummer'],
            // Swedish personnummer full (YYYYMMDD-XXXX)
            ['pattern' => '/(?<!\d)\d{8}[-+]\d{4}(?!\d)/u',                                                                                      'replacement' => '[SE_PERSONNUMMER]', 'type' => 'se_personnummer'],
            // Danish/Finnish CPR / henkilötunnus — same format as short SE personnummer but included for clarity
            ['pattern' => '/\b[A-CEGHJ-PR-TW-Z]{2}\s?\d{2}\s?\d{2}\s?\d{2}\s?[A-D]\b/i',                                                       'replacement' => '[UK_NI]',           'type' => 'uk_ni'],
            // French INSEE (15 digits, not overlapping with 11-digit FNR due to lookarounds)
            ['pattern' => '/(?<!\d)\d{15}(?!\d)/u',                                                                                              'replacement' => '[FR_INSEE]',        'type' => 'fr_insee'],
            // IBAN (2-letter country code + 2 check digits + up to 30 alphanumeric)
            ['pattern' => '/\b[A-Z]{2}\d{2}[A-Z0-9]{4}\d{7}[A-Z0-9]{0,16}\b/i',                                                                'replacement' => '[IBAN]',            'type' => 'iban'],
            // European phone (international prefix for major EU/EEA country codes)
            ['pattern' => '/(?<!\d)\+(?:44|46|45|358|33|49|34|39|31|32|41|43|30|351|353|48|36|420|421|372|371|370|386|385|356|357|40|359|352)[\s.\-]?(?:\d[\s.\-]?){7,12}(?!\d)/u', 'replacement' => '[PHONE]', 'type' => 'phone'],
            // Street address expanded to European street-type keywords
            ['pattern' => '/\b[A-ZÆØÅ][\p{L}æøåÆØÅ\.\- ]{2,40}\s+(?:gate|gata|vei|veien|plass|street|road|avenue|ave|rue|straße|strasse|straat|gade|calle|via|gatan|vägen)\s+\d+[A-Za-z]?\b/iu', 'replacement' => '[ADDRESS]', 'type' => 'address'],
        ]);

        if ($region === 'european') {
            return $european;
        }

        $echr = array_merge($european, [
            // ECHR application number (requires "Application no." or "App. No." prefix to avoid matching dates/pages)
            ['pattern' => '/\b(?:Application|App\.?)\s+(?:no\.?|nr\.?|#)\s*\d{3,6}\s*\/\s*\d{2,4}\b/i',                                        'replacement' => '[ECHR_APP_NO]',     'type' => 'echr_app_no'],
            // Date of birth stated in judgment context
            ['pattern' => '/\bborn\s+(?:on\s+)?\d{1,2}[.\s]+(?:January|February|March|April|May|June|July|August|September|October|November|December|januar|februar|mars|april|mai|juni|juli|august|september|oktober|november|desember)[,\s]+\d{4}\b/iu', 'replacement' => '[DOB]', 'type' => 'date_of_birth'],
            ['pattern' => '/\bf\.\s*\d{4}\b/iu',                                                                                                 'replacement' => '[DOB]',             'type' => 'date_of_birth'],
            // National ID label patterns in multiple languages
            ['pattern' => '/\b(?:personal\s+number|numéro\s+national|Personalausweis|personnummer|fødselsnummer|henkilötunnus|CPR-nummer)\s*:\s*[\w\s\-]+/iu', 'replacement' => '[NAT_ID]', 'type' => 'nat_id'],
        ]);

        if ($region === 'echr') {
            return $echr;
        }

        // global
        return array_merge($echr, [
            // US Social Security Number
            ['pattern' => '/(?<!\d)\d{3}-\d{2}-\d{4}(?!\d)/u',                                                                                  'replacement' => '[SSN]',             'type' => 'ssn'],
            // Document number in context (passport no., ID No., document no.)
            ['pattern' => '/\b(?:passport\s+(?:no\.?|number)|ID\s+(?:no\.?|number)|document\s+(?:no\.?|number))\s*[:\#]?\s*([A-Z0-9]{6,12})\b/iu', 'replacement' => '[DOC_NO]',       'type' => 'doc_no'],
        ]);
    }

    private function llmRedactionPass(
        string $preRedacted,
        string $language      = 'en',
        array  $aliases       = [],
        string $engine        = 'azure_mini',
        bool   $keepOfficials = false,
        array  $exemptNames   = [],
        bool   $doNames       = true,
        bool   $doOrgs        = true,
        bool   $doPlaces      = true,
        bool   $doDob         = true
    ): array {
        if ($engine === 'regex') {
            return ['skipped' => true, 'reason' => 'Regex-only mode selected'];
        }

        if ($engine !== 'gpu') {
            $missing = $this->azure->missingChatConfig();
            if ($missing) {
                return ['skipped' => true, 'reason' => 'Azure chat not configured (' . implode(', ', $missing) . ')'];
            }
        }

        $languageNote = $language === 'no' ? "\n  • The document may contain Norwegian or mixed-language content." : '';

        // Build alias block
        $aliasBlock = '';
        if (!empty($aliases)) {
            $lines = [];
            foreach ($aliases as $a) {
                $orig = str_replace(["\n", "\r", '`', '"', '{', '}'], ' ', substr(trim((string)($a['original'] ?? '')), 0, 100));
                $lbl  = str_replace(["\n", "\r", '`', '"', '{', '}'], ' ', substr(trim((string)($a['alias']    ?? '')), 0, 100));
                if ($orig !== '' && $lbl !== '') {
                    $lines[] = "  \"{$orig}\" → [{$lbl}]";
                }
            }
            if ($lines) {
                $aliasBlock = "\n\nALIAS OVERRIDES — use these exact replacement tags for these specific names instead of inferring a role:\n" . implode("\n", $lines);
            }
        }

        // Build exempt names block
        $exemptBlock = '';
        if (!empty($exemptNames)) {
            $quoted = array_map(fn($n) => '"' . str_replace(['"', "\n"], ['\\"', ' '], $n) . '"', array_slice($exemptNames, 0, 20));
            $exemptBlock = "\n\nEXEMPT NAMES — these names must NOT be redacted under any circumstances:\n  " . implode(', ', $quoted);
        }

        // Build entity-type restriction note
        $skipTypes = [];
        if (!$doOrgs)   $skipTypes[] = 'organisation names';
        if (!$doPlaces) $skipTypes[] = 'place names';
        if (!$doDob)    $skipTypes[] = 'dates of birth';
        if (!$doNames)  $skipTypes[] = 'person names';
        $skipNote = $skipTypes ? "\n\nSKIP these entity types — do NOT redact them: " . implode(', ', $skipTypes) . '.' : '';

        // Build officials note
        $officialsNote = '';
        if ($keepOfficials) {
            $officialsNote = "\n\nOFFICIALS — for persons identified as JUDGE, EXPERT_WITNESS, or CASEWORKER in an official capacity: do NOT replace their name with a plain bracket tag. Instead use the format [ROLE: Name], e.g. [JUDGE: Andersen] or [EXPERT_WITNESS: Dr. Larsen]. Their name must remain visible inside the tag.";
        }

        $allowedTypesNote = '';
        if (!$doNames) {
            $allowedTypesNote = "\n\nDo NOT include person_name entries in your output.";
        }

        $system = <<<PROMPT
You are a privacy redaction assistant for legal documents (ECHR judgements, Norwegian family law cases, EU child welfare documents). The text below has already had mechanical identifiers replaced with placeholder tags in [BRACKETS].

Your task: find any remaining identifiable information — person names, organisation names, specific places at city level or below, dates and dates of birth (including soft references like "i 2015", "august 2018", "rundt 2011/2012", "spring of 2019"), and identifying descriptions.

STEP 1 — For person names: identify each individual and infer their role or relationship from context.
Assign each person a consistent contextual tag used for every occurrence of their name:
  • Family roles:       FATHER, MOTHER, CHILD, CHILD_1, CHILD_2, GRANDPARENT, SIBLING
  • Professional roles: ATTORNEY, JUDGE, CASEWORKER, EXPERT_WITNESS
  • Generic fallback:   PERSON_1, PERSON_2  (use only when role cannot be determined)
The same individual MUST receive the same tag every time they appear.{$aliasBlock}{$exemptBlock}{$officialsNote}{$skipNote}{$allowedTypesNote}

Return ONLY a valid JSON object:
{"redactions":[{"original":"exact text as it appears","type":"person_name","tag":"[FATHER]"}]}

Allowed types and their tag format:
  person_name  →  contextual role tag e.g. [FATHER], [CHILD_1], [ATTORNEY]  (or alias tag if provided above)
  org          →  [ORG]
  place        →  [PLACE]
  date_of_birth → [DOB]
  date         →  [DATE]   (years, year ranges, month+year, soft temporal references — e.g. "i 2015" → "i [DATE]", "rundt 2011/2012" → "rundt [DATE]")
  other        →  [IDENTIFIER]

Rules:
  • Include only text that appears verbatim in the input. Do not invent or paraphrase.
  • The same person MUST get the same tag every time they appear.
  • If nothing needs redacting, return {"redactions":[]}.
  • Do not redact text already inside [BRACKETS].
  • Legal citations, statute names, article numbers, and institution names (e.g. "the European Court of Human Rights", "Barnevernloven § 4-12") are NOT PII.
  • Short common words, conjunctions, and prepositions are NOT PII.{$languageNote}
PROMPT;

        $messages = [
            ['role' => 'system', 'content' => $system],
            ['role' => 'user',   'content' => $preRedacted],
        ];
        $chatOptions = ['temperature' => 0.1, 'max_tokens' => 8000, 'json' => true, 'timeout' => 90];

        try {
            if ($engine === 'gpu') {
                $response    = $this->callGpuLlm($messages, $chatOptions);
                $deployLabel = 'GPU (cuttlefish)';
            } elseif ($engine === 'azure_full') {
                $response    = $this->azure->withDeployment('gpt-4o')->chat($messages, $chatOptions);
                $deployLabel = 'gpt-4o';
            } else {
                $response    = $this->azure->chat($messages, $chatOptions);
                $deployLabel = $this->azure->chatDeployment();
            }

            $content = (string)($response['choices'][0]['message']['content'] ?? '');
            $json    = $this->azure->decodeJsonObject($content);

            if (!is_array($json) || !array_key_exists('redactions', $json)) {
                return ['skipped' => false, 'entities' => [], 'error' => 'LLM returned unexpected JSON structure'];
            }

            return [
                'skipped'    => false,
                'entities'   => is_array($json['redactions']) ? $json['redactions'] : [],
                'deployment' => $deployLabel,
            ];
        } catch (Throwable $e) {
            error_log('DBN tools LLM redaction pass failed: ' . $e->getMessage());
            return ['skipped' => false, 'entities' => [], 'error' => $e->getMessage()];
        }
    }

    private function callGpuLlm(array $messages, array $options = []): array
    {
        return dbnToolsCallGpuLlm($messages, $options);
    }

    private function applyGenericTags(string $text): string
    {
        // Collapse contextual role tags (e.g. [FATHER], [JUDGE: Andersen], [CHILD_1]) → [PERSON]
        $text = preg_replace('/\[(?:FATHER|MOTHER|CHILD(?:_\d+)?|GRANDPARENT|SIBLING|ATTORNEY|JUDGE(?::\s*[^\]]+)?|CASEWORKER(?::\s*[^\]]+)?|EXPERT_WITNESS(?::\s*[^\]]+)?|PERSON(?:_\d+)?)\]/u', '[PERSON]', $text) ?? $text;
        return $text;
    }

    private function applyPseudonymization(string $text, array $allCounts): string
    {
        $norwegianNames = [
            'Ola Nordmann', 'Per Hansen', 'Kari Larsen', 'Anne Berg', 'Erik Dahl',
            'Ingrid Holm', 'Lars Moen', 'Silje Bakke', 'Tor Haugen', 'Eva Strand',
        ];
        $nameCursor  = 0;
        $phoneBase   = 1;
        $emailCursor = 0;
        $addrCursor  = 1;
        $orgCursor   = 1;
        $personMap   = [];

        // Replace named role tags (keeping consistent mapping per unique tag)
        $text = preg_replace_callback(
            '/\[(FATHER|MOTHER|CHILD(?:_\d+)?|GRANDPARENT|SIBLING|ATTORNEY|JUDGE(?::\s*[^\]]+)?|CASEWORKER(?::\s*[^\]]+)?|EXPERT_WITNESS(?::\s*[^\]]+)?|PERSON(?:_\d+)?)\]/u',
            function (array $m) use (&$nameCursor, &$personMap, $norwegianNames): string {
                $key = $m[1];
                if (!isset($personMap[$key])) {
                    $personMap[$key] = $norwegianNames[$nameCursor % count($norwegianNames)];
                    $nameCursor++;
                }
                return $personMap[$key];
            },
            $text
        ) ?? $text;

        $text = preg_replace_callback('/\[PHONE\]/', function () use (&$phoneBase): string {
            return sprintf('+47 400 00 %03d', $phoneBase++);
        }, $text) ?? $text;

        $text = preg_replace_callback('/\[EMAIL\]/', function () use (&$emailCursor): string {
            $letter = chr(ord('a') + ($emailCursor % 26));
            $emailCursor++;
            return "person.{$letter}@example.no";
        }, $text) ?? $text;

        $text = preg_replace_callback('/\[ADDRESS\]/', function () use (&$addrCursor): string {
            return "Eksempelveien {$addrCursor}, 0001 Oslo";
        }, $text) ?? $text;

        $text = preg_replace_callback('/\[ORG\]/', function () use (&$orgCursor): string {
            return "Eksempel AS ({$orgCursor})";
        }, $text) ?? $text;

        $text = preg_replace_callback('/\[FNR\]/', function (): string {
            return '010100XXXXX';
        }, $text) ?? $text;

        $text = preg_replace_callback('/\[(?:SE_PERSONNUMMER|FR_INSEE|UK_NI|SSN|NAT_ID|DOC_NO|ECHR_APP_NO)\]/', function (): string {
            return '[ID-REDACTED]';
        }, $text) ?? $text;

        $text = preg_replace_callback('/\[PLACE\]/', function (): string {
            return 'Eksempelby';
        }, $text) ?? $text;

        $text = preg_replace_callback('/\[DOB\]/', function (): string {
            return '01.01.0000';
        }, $text) ?? $text;

        $text = preg_replace_callback('/\[IBAN\]/', function (): string {
            return 'NO00 0000 00 00000';
        }, $text) ?? $text;

        return $text;
    }

    private function uncertaintySummary(mixed $uncertainty): string
    {
        if (is_array($uncertainty)) {
            $uncertainty = implode(' ', array_map('strval', $uncertainty));
        }
        $uncertainty = trim((string)$uncertainty);
        return $uncertainty !== '' ? dbnToolsExcerpt($uncertainty, 220) : 'No additional uncertainty was supplied by the tool.';
    }

    private function trace(string $label, string $detail, string $status = 'complete'): array
    {
        return [
            'label' => $label,
            'detail' => $detail,
            'status' => $status,
        ];
    }
}