diff --git a/assets/js/tools.js b/assets/js/tools.js
index edbcacf..39b6c1a 100644
--- a/assets/js/tools.js
+++ b/assets/js/tools.js
@@ -388,6 +388,7 @@ const TIMELINE_I18N = {
let lastTimelineEvents = [];
let lastTimelineEventsOriginal = [];
let lastTimelineWhatWeFound = '';
+let lastTimelineInputDateHintCount = null;
let activeActorFilters = new Set();
let timelineSearchTerm = '';
let showSources = true;
@@ -1602,6 +1603,7 @@ function renderMainFinding(data) {
lastTimelineEventsOriginal = data.events || [];
lastTimelineEvents = [...lastTimelineEventsOriginal];
lastTimelineWhatWeFound = data.what_we_found || '';
+ lastTimelineInputDateHintCount = data.trace_metadata?.input_date_hint_count ?? null;
activeActorFilters = new Set();
timelineSearchTerm = '';
showSources = true;
@@ -1728,6 +1730,9 @@ function applyTimelineFilters() {
function renderTimeline(events, grouped = false) {
if (!events.length) {
+ if (lastTimelineInputDateHintCount === 0) {
+ return '
No recognizable dates were found in the extracted text. Check that the upload is text-searchable, or paste the relevant dated section and run again.
';
+ }
return 'No matching events.
';
}
const MONTH_NAMES = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'];
diff --git a/includes/AzureOpenAiGateway.php b/includes/AzureOpenAiGateway.php
index 5fd92fb..275bd13 100644
--- a/includes/AzureOpenAiGateway.php
+++ b/includes/AzureOpenAiGateway.php
@@ -149,13 +149,20 @@ final class DbnAzureOpenAiGateway
public function decodeJsonObject(string $content): ?array
{
$content = trim($content);
+ $content = (string)preg_replace('/^```(?:json)?\s*\n?/i', '', $content);
+ $content = (string)preg_replace('/\n?```\s*$/', '', $content);
+ $content = trim($content);
+
$decoded = json_decode($content, true);
if (is_array($decoded)) {
return $decoded;
}
- if (preg_match('/\{(?:[^{}]|(?R))*\}/s', $content, $match)) {
- $decoded = json_decode($match[0], true);
+ $start = strpos($content, '{');
+ $end = strrpos($content, '}');
+ if ($start !== false && $end !== false && $end > $start) {
+ $candidate = substr($content, $start, $end - $start + 1);
+ $decoded = json_decode($candidate, true);
if (is_array($decoded)) {
return $decoded;
}
diff --git a/includes/LegalTools.php b/includes/LegalTools.php
index 4a112b0..2408d94 100644
--- a/includes/LegalTools.php
+++ b/includes/LegalTools.php
@@ -361,6 +361,7 @@ PROMPT;
$onProgress && $onProgress("Preparing document\u{2026}");
$locale = dbnToolsLanguageName($language);
+ $inputDateHintCount = $this->timelineDateHintCount($text);
$focusInstruction = match ($focus) {
'deadlines' => "\nFocus specifically on: legal deadlines, filing dates, response windows, appeal periods, and statutory time limits. Deprioritise narrative events with no legal deadline significance.",
@@ -445,6 +446,42 @@ Return JSON only:
}
PROMPT;
+ if ($engine === 'nova_lite') {
+ $prompt = <<legalJsonSystemPrompt($language);
$messages = [
['role' => 'system', 'content' => $system],
@@ -464,23 +501,45 @@ PROMPT;
$response = $this->azure->withDeployment('gpt-4o-mini')->chat($messages, $chatOptions);
}
} catch (Throwable $e) {
- dbnToolsAbort('LLM request failed: ' . $e->getMessage(), 502, 'llm_error');
+ $msg = $e->getMessage();
+ if (preg_match('/timed?\s*out|timeout|operation timed out/i', $msg)) {
+ dbnToolsAbort('The model timed out. Try Quick mode, a smaller file, or fewer selected documents.', 504, 'llm_timeout');
+ }
+ dbnToolsAbort('LLM request failed: ' . $msg, 502, 'llm_error');
}
$onProgress && $onProgress("Parsing events\u{2026}");
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
- if ($engine === 'nova_lite') {
- $raw = (string)preg_replace('/^```(?:json)?\s*\n?/m', '', $raw);
- $raw = (string)preg_replace('/\n?```\s*$/m', '', $raw);
- $raw = trim($raw);
- }
$json = $this->azure->decodeJsonObject($raw);
if (!$json) {
dbnToolsAbort('The selected engine did not return valid structured JSON.', 502, 'llm_invalid_json');
}
$events = is_array($json['events'] ?? null) ? $json['events'] : [];
+ $usedFallbackExtractor = false;
+ if (!$events && $inputDateHintCount > 0) {
+ $fallbackEvents = $this->fallbackTimelineEvents($text);
+ if ($fallbackEvents) {
+ $events = $fallbackEvents;
+ $usedFallbackExtractor = true;
+ $uncertain = is_array($json['what_remains_uncertain'] ?? null) ? $json['what_remains_uncertain'] : [];
+ array_unshift($uncertain, 'The selected engine returned no events, so a deterministic date-line fallback extracted visible dated lines. Review these medium-confidence entries against the original file.');
+ $json['what_remains_uncertain'] = $uncertain;
+ $json['what_we_found'] = count($events) . ' date-like event(s) extracted by fallback after the selected engine returned no events.';
+ $json['next_practical_step'] = 'Review each fallback event against the original uploaded document and rerun with Standard or Deep if you need fuller actor/event interpretation.';
+ }
+ }
+ if (!$events && $inputDateHintCount === 0) {
+ $json['what_we_found'] = (string)($json['what_we_found'] ?? 'No recognizable dates were found in the extracted text from this upload.');
+ if (trim((string)$json['what_we_found']) === '') {
+ $json['what_we_found'] = 'No recognizable dates were found in the extracted text from this upload.';
+ }
+ $json['next_practical_step'] = (string)($json['next_practical_step'] ?? 'Check that the file text was extracted correctly, or upload a text-searchable PDF/DOCX.');
+ if (trim((string)$json['next_practical_step']) === '') {
+ $json['next_practical_step'] = 'Check that the file text was extracted correctly, or upload a text-searchable PDF/DOCX.';
+ }
+ }
// Post-filter: confidence
if ($confidenceFilter === 'high_medium') {
@@ -523,11 +582,117 @@ PROMPT;
'chunk_count' => count($events),
'source_count' => 1,
'deployment' => $engineLabel,
+ 'input_date_hint_count' => $inputDateHintCount,
+ 'used_fallback_extractor' => $usedFallbackExtractor,
],
'disclaimer' => dbnToolsDisclaimer($language),
];
}
+ private function timelineDateHintCount(string $text): int
+ {
+ preg_match_all('/(?= 80) {
+ break;
+ }
+ $line = trim((string)preg_replace('/\s+/u', ' ', $line));
+ if ($line === '') {
+ continue;
+ }
+ if (preg_match('/\b(20\d{2}|19\d{2})\b/u', $line, $ym)) {
+ $lastYear = (int)$ym[1];
+ }
+ if (!preg_match_all('/(?= 80) {
+ break 2;
+ }
+ $day = (int)$m[1][0];
+ $month = (int)$m[2][0];
+ if ($day < 1 || $day > 31 || $month < 1 || $month > 12) {
+ continue;
+ }
+ $yearRaw = $m[3][0] ?? '';
+ $year = null;
+ if ($yearRaw !== '') {
+ $year = strlen($yearRaw) === 2 ? 2000 + (int)$yearRaw : (int)$yearRaw;
+ $lastYear = $year;
+ } elseif ($lastYear !== null) {
+ $year = $lastYear;
+ }
+ $date = $year !== null
+ ? sprintf('%04d-%02d-%02d', $year, $month, $day)
+ : sprintf('%02d.%02d. (year unknown)', $day, $month);
+ $time = null;
+ if (preg_match('/\bkl\.?\s*(\d{1,2})[:.](\d{2})\b|\b(\d{1,2}):(\d{2})\b/u', $line, $tm)) {
+ $hour = (int)($tm[1] !== '' ? $tm[1] : $tm[3]);
+ $min = (int)($tm[2] !== '' ? $tm[2] : $tm[4]);
+ if ($hour >= 0 && $hour <= 23 && $min >= 0 && $min <= 59) {
+ $time = sprintf('%02d:%02d', $hour, $min);
+ }
+ }
+ $eventText = trim(preg_replace('/^\s*[-*#\s]*/u', '', $line));
+ $eventText = trim(preg_replace('/^' . preg_quote($m[0][0], '/') . '\s*(?:kl\.?\s*\d{1,2}[:.]\d{2})?\s*[:\-–—]?\s*/u', '', $eventText));
+ if ($eventText === '') {
+ $eventText = 'Dated event found in uploaded text.';
+ }
+ $events[] = [
+ 'date' => $date,
+ 'end_date' => null,
+ 'time' => $time,
+ 'date_type' => $year !== null ? 'absolute' : 'relative',
+ 'actor' => $this->fallbackTimelineActor($line),
+ 'event' => mb_substr($eventText, 0, 240, 'UTF-8'),
+ 'source_excerpt' => mb_substr($line, 0, 300, 'UTF-8'),
+ 'confidence' => 'medium',
+ ];
+ }
+ }
+
+ usort($events, static function (array $a, array $b): int {
+ $ad = (string)($a['date'] ?? '');
+ $bd = (string)($b['date'] ?? '');
+ $ai = preg_match('/^\d{4}-\d{2}-\d{2}$/', $ad) ? $ad : '9999-99-99';
+ $bi = preg_match('/^\d{4}-\d{2}-\d{2}$/', $bd) ? $bd : '9999-99-99';
+ return strcmp($ai, $bi);
+ });
+
+ return $events;
+ }
+
+ private function fallbackTimelineActor(string $line): string
+ {
+ $actors = [
+ '/barnevern(?:s?tjenesten)?|bv\b/iu' => 'Barnevernstjenesten',
+ '/fylkesnemnda/iu' => 'Fylkesnemnda',
+ '/statsforvalter(?:en)?/iu' => 'Statsforvalteren',
+ '/tingrett/iu' => 'Tingrett',
+ '/lagmannsrett/iu' => 'Lagmannsrett',
+ '/høyesterett|høyesterett/iu' => 'Høyesterett',
+ '/\bnav\b/iu' => 'NAV',
+ '/\bbup\b/iu' => 'BUP',
+ '/\bppt\b/iu' => 'PPT',
+ ];
+ foreach ($actors as $pattern => $actor) {
+ if (preg_match($pattern, $line)) {
+ return $actor;
+ }
+ }
+ return 'unknown';
+ }
+
public function redact(
string $text,
string $mode = 'standard',