diff --git a/assets/js/tools.js b/assets/js/tools.js index babe7c4..07bb41c 100644 --- a/assets/js/tools.js +++ b/assets/js/tools.js @@ -25,7 +25,7 @@ const REDACT_I18N = { redactEntityNames: 'Names', redactEntityOrgs: 'Organisations', redactEntityPlaces: 'Places', - redactEntityDob: 'Dates of birth', + redactEntityDob: 'Dates', redactOfficials: 'Officials', redactKeepOfficials: 'Keep official names (judges, experts)', redactOfficialsHint: 'When checked, judges, expert witnesses and caseworkers keep their names in a labelled tag: [JUDGE: Andersen]. Uncheck to replace all names with generic role tags.', @@ -79,7 +79,7 @@ const REDACT_I18N = { redactEntityNames: 'Navn', redactEntityOrgs: 'Organisasjoner', redactEntityPlaces: 'Steder', - redactEntityDob: 'Fødselsdatoer', + redactEntityDob: 'Datoer', redactOfficials: 'Offisielle', redactKeepOfficials: 'Behold offisielle navn (dommere, sakkyndige)', redactOfficialsHint: 'Når avkrysset beholder dommere, sakkyndige og saksbehandlere sine navn i en merket tagg: [DOMMER: Andersen]. Fjern haken for å erstatte alle navn med generiske rolletaggar.', @@ -133,7 +133,7 @@ const REDACT_I18N = { redactEntityNames: 'Імена', redactEntityOrgs: 'Організації', redactEntityPlaces: 'Місця', - redactEntityDob: 'Дати народження', + redactEntityDob: 'Дати', redactOfficials: 'Офіційні особи', redactKeepOfficials: 'Зберігати офіційні імена (судді, експерти)', redactOfficialsHint: 'Якщо позначено, судді, експерти та соціальні працівники зберігають свої імена у позначеному тезі: [СУДДЯ: Andersen].', @@ -187,7 +187,7 @@ const REDACT_I18N = { redactEntityNames: 'Imiona', redactEntityOrgs: 'Organizacje', redactEntityPlaces: 'Miejsca', - redactEntityDob: 'Daty urodzenia', + redactEntityDob: 'Daty', redactOfficials: 'Urzędnicy', redactKeepOfficials: 'Zachowaj oficjalne nazwy (sędziowie, eksperci)', redactOfficialsHint: 'Gdy zaznaczone, sędziowie, biegli i pracownicy socjalni zachowują swoje nazwiska w oznaczonym tagu: [SĘDZIA: Andersen].', diff --git a/includes/LegalTools.php b/includes/LegalTools.php index cea9515..2b3a24c 100644 --- a/includes/LegalTools.php +++ b/includes/LegalTools.php @@ -281,11 +281,12 @@ PROMPT; public function timeline( string $text, - string $language = 'en', - string $engine = 'azure_mini', - string $focus = 'all', - string $confidenceFilter = 'all', - bool $includeRelative = true + string $language = 'en', + string $engine = 'azure_mini', + string $focus = 'all', + string $confidenceFilter = 'all', + bool $includeRelative = true, + bool $includeBackground = true ): array { $text = $this->requirePasteText($text); $engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini'; @@ -304,10 +305,14 @@ PROMPT; default => '', }; + $backgroundInstruction = $includeBackground + ? "\nAlso extract BACKGROUND and NARRATIVE events: dates embedded in contextual paragraphs, historical facts, year-only references, and approximate years (e.g. \"rundt 2011/2012\", \"David ble født den 30.07.2015\", \"familien i 2015\"). These are valid timeline events even when they appear in introductory or background text — do NOT skip them." + : "\nDo NOT include purely historical background or narrative context dates. Focus only on operational events, deadlines, and milestones that are directly actionable in the case."; + $prompt = << '/(? '[FNR]', 'type' => 'fødselsnummer'], ['pattern' => '/(? '[PHONE]', 'type' => 'phone'], ['pattern' => '/\b[A-ZÆØÅ][\p{L}æøåÆØÅ\.\- ]{2,40}\s+(?:gate|gata|vei|veien|plass)\s+\d+[A-Za-z]?\b/iu', 'replacement' => '[ADDRESS]', 'type' => 'address'], + // Dates — must precede generic numeric patterns + // Year range (e.g. 2011/2012, 2018-2019) + ['pattern' => '/(? '[DATE]', 'type' => 'date'], + // Norwegian DD.MM.YYYY and DD/MM/YYYY + ['pattern' => '/(? '[DATE]', 'type' => 'date'], + // ISO YYYY-MM-DD + ['pattern' => '/(? '[DATE]', 'type' => 'date'], + // DD. Month YYYY (e.g. "30. juli 2015") and Month YYYY (Norwegian + English) + ['pattern' => '/\b(?:\d{1,2}\.?\s+)?(?:januar|februar|mars|april|mai|juni|juli|august|september|oktober|november|desember|January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)\s+(?:19|20)\d{2}\b/iu', 'replacement' => '[DATE]', 'type' => 'date'], + // Year after Norwegian/English temporal preposition (lookbehind keeps preposition) + ['pattern' => '/(?<=\b(?:i|fra|siden|innen|før|etter|rundt|omkring|cirka|in|from|since|until|before|after|around|circa)\s)(?:19|20)\d{2}(?![\d\/\-])/iu', 'replacement' => '[DATE]', 'type' => 'date'], ]; if ($region === 'nordic') { @@ -991,7 +1007,7 @@ PROMPT; $system = << Names - +