diff --git a/includes/LegalTools.php b/includes/LegalTools.php index b60a46a..ecef6e7 100644 --- a/includes/LegalTools.php +++ b/includes/LegalTools.php @@ -312,45 +312,68 @@ PROMPT; ? "\nAlso extract BACKGROUND and NARRATIVE events: dates embedded in contextual paragraphs, historical facts, year-only references, and approximate years (e.g. \"rundt 2011/2012\", \"David ble født den 30.07.2015\", \"familien i 2015\"). These are valid timeline events even when they appear in introductory or background text — do NOT skip them." : "\nDo NOT include purely historical background or narrative context dates. Focus only on operational events, deadlines, and milestones that are directly actionable in the case."; + $relativeInstruction = $includeRelative + ? '' + : "\nDo NOT extract relative, recurring, or conditional date references — extract only events with determinable absolute dates (date_type=absolute)."; + $prompt = << 'system', 'content' => $system], ['role' => 'user', 'content' => $prompt], ]; - $chatOptions = ['json' => true, 'temperature' => 0.1, 'max_tokens' => 4000, 'timeout' => 120]; + $chatOptions = ['json' => true, 'temperature' => 0.1, 'max_tokens' => ($engine === 'azure_full' ? 8000 : 4000), 'timeout' => 120]; $deployLabel = $this->azure->chatDeployment(); try { @@ -519,7 +542,15 @@ PROMPT; if (!preg_match('/^\[[A-Za-z0-9_\- ]+(?::\s*[^\]]+)?\]$/', $tag)) { $tag = '[IDENTIFIER]'; } - if (str_contains($finalRedacted, $original)) { + // Try word-boundary match first to avoid partial-word substitutions (e.g. "Per" inside "Persson") + $escaped = preg_quote($original, '/'); + $replaced = preg_replace('/\b' . $escaped . '\b/u', $tag, $finalRedacted); + if ($replaced !== null && $replaced !== $finalRedacted) { + $finalRedacted = $replaced; + $pass2Counts[$type] = ($pass2Counts[$type] ?? 0) + 1; + $applied++; + } elseif (str_contains($finalRedacted, $original)) { + // Fallback for names adjacent to punctuation or non-word characters $finalRedacted = str_replace($original, $tag, $finalRedacted); $pass2Counts[$type] = ($pass2Counts[$type] ?? 0) + 1; $applied++; @@ -607,7 +638,8 @@ PROMPT; { $locale = dbnToolsLanguageName($language); return <<