Add chunked timeline routing

This commit is contained in:
2026-05-25 12:34:41 +02:00
parent 75b19f1dcf
commit 17ad54cf36
7 changed files with 521 additions and 31 deletions
+3 -1
View File
@@ -12,7 +12,9 @@ try {
dbnToolsError('No file was uploaded.', 422, 'missing_file');
}
$result = dbnToolsExtractUploadedFile($_FILES['file']);
$tool = (string)($_POST['tool'] ?? '');
$limit = $tool === 'timeline' ? DBN_TOOLS_TIMELINE_EXTRACT_TEXT_LIMIT : DBN_TOOLS_EXTRACT_TEXT_LIMIT;
$result = dbnToolsExtractUploadedFile($_FILES['file'], $limit);
$ftRemaining = dbnToolsFreeTierDeduct($ftUid, 'extract');
if ($ftRemaining >= 0) {
header('X-Credits-Remaining: ' . $ftRemaining);
+12 -2
View File
@@ -9,7 +9,7 @@ dbnToolsRequireAuth();
// Parse input and run credit pre-check BEFORE emitting SSE headers so that
// auth/credit errors can still return JSON (dbnToolsError / dbnToolsAbort).
$input = dbnToolsJsonInput(400000);
$input = dbnToolsJsonInput(1500000);
$language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
$_validEngines = ['nova_lite', 'azure_mini', 'azure_full'];
@@ -17,7 +17,7 @@ $_requestedEngine = in_array((string)($input['engine'] ?? ''), $_validEngines, t
? (string)$input['engine'] : 'azure_mini';
try {
$text = dbnToolsInjectDocContent($input, dbnToolsString($input, 'text', 128000, false));
$text = dbnToolsInjectDocContent($input, dbnToolsString($input, 'text', ToolModels::TIMELINE_DEEP_MAX_CHARS, false));
if (mb_strlen(trim($text), 'UTF-8') < 10) {
dbnToolsError('Paste text, upload a file, or select a document before running.', 422, 'empty_text');
}
@@ -33,6 +33,7 @@ try {
}
$timelineRoute = ToolModels::timelineRoute($ftUid, $_requestedEngine, $text);
ToolModels::assertTimelineQuoteAccepted($timelineRoute, $input);
$ftUid = dbnToolsFreeTierCheckAmount('timeline', (int)$timelineRoute['credits']);
} catch (DbnToolsHttpException $e) {
dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra);
@@ -69,6 +70,11 @@ try {
'msg' => 'This input is ' . number_format((int)$timelineRoute['input_char_count']) . " characters, so Timeline is using {$label} for reliability.",
]);
}
if (!empty($timelineRoute['chunked_timeline'])) {
sseEmit('status', [
'msg' => 'Processing ' . (int)$timelineRoute['timeline_chunk_count'] . ' timeline chunk(s).',
]);
}
$validFocus = ['all', 'deadlines', 'hearings', 'cps'];
$focus = in_array((string)($input['focus'] ?? ''), $validFocus, true)
@@ -110,6 +116,10 @@ try {
'auto_upgraded_engine' => $timelineRoute['auto_upgraded_engine'],
'input_char_count' => $timelineRoute['input_char_count'],
'engine_limit_chars' => $timelineRoute['engine_limit_chars'],
'max_char_limit' => $timelineRoute['max_char_limit'],
'chunked_timeline' => $timelineRoute['chunked_timeline'],
'timeline_chunk_count' => $timelineRoute['timeline_chunk_count'],
'estimated_credits' => $timelineRoute['estimated_credits'],
'credits_charged' => $timelineRoute['credits'],
]);
+7 -2
View File
@@ -6,7 +6,7 @@ require_once __DIR__ . '/../includes/ToolModels.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
$input = dbnToolsJsonInput(400000);
$input = dbnToolsJsonInput(1500000);
$language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
$_validEngines = ['nova_lite', 'azure_mini', 'azure_full'];
$_requestedEngine = in_array((string)($input['engine'] ?? ''), $_validEngines, true)
@@ -15,7 +15,7 @@ $_requestedEngine = in_array((string)($input['engine'] ?? ''), $_validEngines, t
$start = microtime(true);
try {
$text = dbnToolsInjectDocContent($input, dbnToolsString($input, 'text', 128000, false));
$text = dbnToolsInjectDocContent($input, dbnToolsString($input, 'text', ToolModels::TIMELINE_DEEP_MAX_CHARS, false));
if (mb_strlen(trim($text), 'UTF-8') < 10) {
dbnToolsAbort('Paste text, upload a file, or select a document before running.', 422, 'empty_text');
}
@@ -33,6 +33,7 @@ try {
}
$timelineRoute = ToolModels::timelineRoute($ftUid, $_requestedEngine, $text);
ToolModels::assertTimelineQuoteAccepted($timelineRoute, $input);
$ftUid = dbnToolsFreeTierCheckAmount('timeline', (int)$timelineRoute['credits']);
$validFocus = ['all', 'deadlines', 'hearings', 'cps'];
@@ -71,6 +72,10 @@ try {
'auto_upgraded_engine' => $timelineRoute['auto_upgraded_engine'],
'input_char_count' => $timelineRoute['input_char_count'],
'engine_limit_chars' => $timelineRoute['engine_limit_chars'],
'max_char_limit' => $timelineRoute['max_char_limit'],
'chunked_timeline' => $timelineRoute['chunked_timeline'],
'timeline_chunk_count' => $timelineRoute['timeline_chunk_count'],
'estimated_credits' => $timelineRoute['estimated_credits'],
'credits_charged' => $timelineRoute['credits'],
]);
+108 -12
View File
@@ -400,6 +400,7 @@ let lastOriginalText = '';
let lastRedactPayload = null;
let lastRunEngine = null;
let lastToolPayload = null;
let pendingTimelineQuote = null;
const VOCAB_PRESETS = {
barnerett: 'Barnevernet, Fylkesnemnda, barnevernloven, barneloven, barnets beste, samvær, foreldreansvar, omsorgsovertakelse, sakkyndig, advokat, prosessfullmektig, dommer, vitne, tolk, bistandsadvokat, fosterforeldre, fosterhjem, akuttvedtak, statsforvalter, Bufetat, saksbehandler, rettslig medhold, begjæring, samtykke, tilsynsfører',
@@ -764,10 +765,60 @@ function timelineEngineLabel(engine) {
}
function timelineClientRoute(engine, charCount) {
let effective = engine;
if (charCount > 55000) effective = 'azure_full';
else if (charCount > 25000 && effective === 'nova_lite') effective = 'azure_mini';
return { effective, upgraded: effective !== engine };
return timelineClientQuote(engine, charCount);
}
function timelineClientQuote(engine, charCount) {
const valid = ['nova_lite', 'azure_mini', 'azure_full'];
const requested = valid.includes(engine) ? engine : 'azure_mini';
const singleLimits = { nova_lite: 25000, azure_mini: 55000, azure_full: 128000 };
const maxLimits = { nova_lite: 100000, azure_mini: 300000, azure_full: 600000 };
const chunkSizes = { nova_lite: 10000, azure_mini: 16000, azure_full: 30000 };
const ranks = { nova_lite: 1, azure_mini: 2, azure_full: 3 };
const baseCredits = requested === 'azure_full' ? 2 : 1;
let effective = requested;
if (charCount > 600000) {
return {
error: true,
message: `This timeline input is ${charCount.toLocaleString()} characters. Split the file or use fewer selected documents; the current maximum is 600,000 characters.`,
};
}
if (charCount > maxLimits[effective]) {
effective = charCount <= maxLimits.azure_mini ? 'azure_mini' : 'azure_full';
}
if (charCount > maxLimits[effective]) effective = 'azure_full';
let credits = 1;
if (effective === 'nova_lite') {
credits = charCount <= singleLimits.nova_lite ? 1 : 2;
} else if (effective === 'azure_mini') {
credits = charCount <= singleLimits.azure_mini ? 1 : (charCount <= 180000 ? 2 : 3);
} else {
credits = charCount <= singleLimits.azure_full ? 2 : (charCount <= 350000 ? 4 : 6);
}
const chunked = charCount > singleLimits[effective];
return {
requested,
effective,
upgraded: ranks[effective] > ranks[requested],
charCount,
credits,
baseCredits,
chunked,
chunkCount: chunked ? Math.ceil(charCount / chunkSizes[effective]) : 1,
requiresConfirmation: credits > baseCredits || ranks[effective] > ranks[requested],
};
}
function timelineQuoteMessage(quote) {
return [
`Timeline will use ${timelineEngineLabel(quote.effective)} for ${Number(quote.charCount || 0).toLocaleString()} characters.`,
quote.chunked ? `It will process about ${quote.chunkCount} chunks.` : 'It can run in a single pass.',
`Cost: ${quote.credits} credit${quote.credits === 1 ? '' : 's'}.`,
'Continue?'
].join('\n');
}
function currentTimelineFocus() {
@@ -1122,15 +1173,36 @@ async function runTool(event) {
let timelineRouteNotice = '';
if (state.activeTool === 'timeline') {
payload.engine = currentTimelineEngine();
const clientRoute = timelineClientRoute(payload.engine, text.length);
const clientRoute = timelineClientQuote(payload.engine, text.length);
if (clientRoute.error) {
els.status.textContent = clientRoute.message;
return;
}
const pendingQuoteApplies = pendingTimelineQuote
&& pendingTimelineQuote.text === text
&& pendingTimelineQuote.requested === payload.engine;
if (pendingQuoteApplies) {
payload.accepted_timeline_quote = true;
payload.accepted_credits = pendingTimelineQuote.credits;
payload.accepted_effective_engine = pendingTimelineQuote.effective;
pendingTimelineQuote = null;
} else if (clientRoute.requiresConfirmation) {
if (!window.confirm(timelineQuoteMessage(clientRoute))) {
els.status.textContent = 'Timeline run cancelled before any credits were charged.';
return;
}
payload.accepted_timeline_quote = true;
payload.accepted_credits = clientRoute.credits;
payload.accepted_effective_engine = clientRoute.effective;
}
payload.focus = currentTimelineFocus();
payload.confidence_filter = currentConfidenceFilter();
payload.include_relative = currentIncludeRelative();
payload.include_background = currentIncludeBackground();
payload.user_notes = (document.getElementById('timelineNotes')?.value || '').trim();
payload.use_my_case = (typeof window.dbnGetUseMyCase === 'function') ? window.dbnGetUseMyCase() : false;
timelineRouteNotice = clientRoute.upgraded
? `This input is ${text.length.toLocaleString()} characters, so Timeline will use ${timelineEngineLabel(clientRoute.effective)} for reliability.`
timelineRouteNotice = clientRoute.upgraded || clientRoute.chunked
? `This input is ${text.length.toLocaleString()} characters, so Timeline will use ${timelineEngineLabel(clientRoute.effective)}${clientRoute.chunked ? ` across about ${clientRoute.chunkCount} chunks` : ''}.`
: '';
}
@@ -1157,7 +1229,30 @@ async function runTool(event) {
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
if (!resp.ok) {
const errData = await resp.json().catch(() => ({}));
const quote = errData.timeline_quote;
if (errData.error?.code === 'timeline_quote_required' && quote) {
const confirmQuote = {
effective: quote.effective_engine,
charCount: quote.input_char_count,
credits: quote.credits || quote.estimated_credits,
chunked: Boolean(quote.chunked_timeline),
chunkCount: quote.timeline_chunk_count || 1,
};
if (window.confirm(timelineQuoteMessage(confirmQuote))) {
pendingTimelineQuote = {
text,
requested: payload.engine,
effective: confirmQuote.effective,
credits: Number(confirmQuote.credits || 0),
};
return runTool(event);
}
throw new Error('Timeline run cancelled before any credits were charged.');
}
throw new Error(errData.error?.message || `HTTP ${resp.status}`);
}
const reader = resp.body.getReader();
const dec = new TextDecoder();
let buf = '', event = '';
@@ -1194,8 +1289,8 @@ async function runTool(event) {
renderResults(data);
renderTrace(data.trace || []);
const routeMeta = data.trace_metadata || {};
const serverRouteNotice = state.activeTool === 'timeline' && routeMeta.auto_upgraded_engine
? ` Used ${timelineEngineLabel(routeMeta.effective_engine)} for ${Number(routeMeta.input_char_count || 0).toLocaleString()} characters.`
const serverRouteNotice = state.activeTool === 'timeline' && (routeMeta.auto_upgraded_engine || routeMeta.chunked_timeline || routeMeta.credits_charged)
? ` Used ${timelineEngineLabel(routeMeta.effective_engine)} for ${Number(routeMeta.input_char_count || 0).toLocaleString()} characters${routeMeta.chunked_timeline ? ` across ${routeMeta.timeline_chunk_count || 1} chunks` : ''}; charged ${routeMeta.credits_charged || routeMeta.estimated_credits || 1} credit(s).`
: '';
els.status.textContent = `Done in ${data.latency_ms || 0} ms.${serverRouteNotice}`;
if (['ask', 'redact', 'timeline'].includes(state.activeTool)) {
@@ -1299,6 +1394,7 @@ async function handleFiles(fileList) {
for (const file of files) {
const formData = new FormData();
formData.append('file', file);
formData.append('tool', state.activeTool);
const resp = await fetch('api/extract.php', {
method: 'POST',
@@ -1318,7 +1414,7 @@ async function handleFiles(fileList) {
const combined = parts[0].text;
const MAX_COMBINED = 128000;
const MAX_COMBINED = state.activeTool === 'timeline' ? 600000 : 128000;
const combinedTruncated = combined.length > MAX_COMBINED;
els.input.value = combinedTruncated ? combined.slice(0, MAX_COMBINED) : combined;
@@ -1328,7 +1424,7 @@ async function handleFiles(fileList) {
els.uploadPrompt.classList.add('is-hidden');
els.uploadFileInfo.classList.remove('is-hidden');
const truncNote = (anyTruncated || combinedTruncated) ? ' — truncated to 128000 char limit' : '';
const truncNote = (anyTruncated || combinedTruncated) ? ` - truncated to ${MAX_COMBINED.toLocaleString()} char limit` : '';
els.status.textContent = `Extracted ${totalChars.toLocaleString()} chars from ${parts[0].filename}${truncNote}.`;
} catch (err) {
els.status.textContent = err.message;
+277 -3
View File
@@ -7,6 +7,7 @@ require_once __DIR__ . '/AzureOpenAiGateway.php';
final class DbnLegalToolsService
{
private const MAX_PASTE_CHARS = 128000;
private const MAX_TIMELINE_CHARS = 600000;
private DbnAzureOpenAiGateway $azure;
@@ -353,7 +354,7 @@ PROMPT;
string $userNotes = '',
?callable $onProgress = null
): array {
$text = $this->requirePasteText($text);
$text = $this->requirePasteText($text, self::MAX_TIMELINE_CHARS);
$engine = in_array($engine, ['nova_lite', 'azure_mini', 'azure_full'], true) ? $engine : 'azure_mini';
$focus = in_array($focus, ['all', 'deadlines', 'hearings', 'cps'], true) ? $focus : 'all';
@@ -382,6 +383,23 @@ PROMPT;
? "\n\nUser-provided context notes (use these to resolve ambiguities, not as source events):\n---\n" . $userNotes . "\n---"
: '';
$charCount = mb_strlen($text, 'UTF-8');
$singlePassLimit = $this->timelineSinglePassLimit($engine);
if ($charCount > $singlePassLimit) {
return $this->timelineChunked(
$text,
$language,
$engine,
$focus,
$confidenceFilter,
$includeRelative,
$includeBackground,
$userNotes,
$onProgress,
$inputDateHintCount
);
}
$prompt = <<<PROMPT
Build a chronological timeline from the pasted text in {$locale}.{$userNotesBlock}
@@ -589,6 +607,261 @@ PROMPT;
];
}
private function timelineChunked(
string $text,
string $language,
string $engine,
string $focus,
string $confidenceFilter,
bool $includeRelative,
bool $includeBackground,
string $userNotes,
?callable $onProgress,
int $inputDateHintCount
): array {
$engineLabel = match ($engine) { 'azure_full' => 'gpt-4o', 'nova_lite' => 'nova-lite', default => 'gpt-4o-mini' };
$chunkSize = $this->timelineChunkSize($engine);
$chunks = $this->timelineTextChunks($text, $chunkSize, 900);
$chunkCount = count($chunks);
$events = [];
$chunkFailures = 0;
$usedFallbackExtractor = false;
$onProgress && $onProgress('Splitting timeline into ' . $chunkCount . " chunk(s)\u{2026}");
foreach ($chunks as $idx => $chunk) {
$chunkNo = $idx + 1;
$chunkText = trim((string)$chunk['text']);
if (mb_strlen($chunkText, 'UTF-8') < 20) {
continue;
}
$onProgress && $onProgress("Extracting timeline chunk {$chunkNo}/{$chunkCount}\u{2026}");
try {
$result = $this->timeline(
$chunkText,
$language,
$engine,
$focus,
$confidenceFilter,
$includeRelative,
$includeBackground,
$userNotes,
null
);
$chunkEvents = is_array($result['events'] ?? null) ? $result['events'] : [];
if (!empty($result['trace_metadata']['used_fallback_extractor'])) {
$usedFallbackExtractor = true;
}
} catch (DbnToolsHttpException $e) {
$chunkFailures++;
$chunkEvents = [];
if ($this->timelineDateHintCount($chunkText) > 0) {
$chunkEvents = $this->fallbackTimelineEvents($chunkText);
if ($chunkEvents) {
$usedFallbackExtractor = true;
}
}
if (!$chunkEvents && $e->status >= 500) {
error_log('timeline chunk failed: ' . $e->errorCode . ' ' . $e->getMessage());
}
} catch (Throwable $e) {
$chunkFailures++;
$chunkEvents = $this->fallbackTimelineEvents($chunkText);
if ($chunkEvents) {
$usedFallbackExtractor = true;
}
error_log('timeline chunk throwable: ' . $e->getMessage());
}
foreach ($chunkEvents as $event) {
if (!is_array($event)) {
continue;
}
$event['chunk_index'] = $chunkNo;
$event['source_position'] = (int)$chunk['start'];
$events[] = $event;
}
}
$events = $this->mergeTimelineEvents($events);
if ($confidenceFilter === 'high_medium') {
$events = array_values(array_filter($events, fn($ev) => ($ev['confidence'] ?? 'low') !== 'low'));
}
if (!$includeRelative) {
$events = array_values(array_filter($events, fn($ev) => ($ev['date_type'] ?? 'absolute') === 'absolute'));
}
$focusLabel = match ($focus) {
'deadlines' => 'legal deadlines',
'hearings' => 'court hearings',
'cps' => 'CPS milestones',
default => 'all events',
};
$isoDates = array_values(array_filter(array_map(fn($ev) => (string)($ev['date'] ?? ''), $events), fn($d) => preg_match('/^\d{4}-\d{2}-\d{2}$/', $d)));
sort($isoDates);
$range = $isoDates ? (' from ' . $isoDates[0] . ' to ' . $isoDates[count($isoDates) - 1]) : '';
$actors = array_values(array_unique(array_filter(array_map(fn($ev) => (string)($ev['actor'] ?? ''), $events), fn($a) => $a !== '' && $a !== 'unknown')));
$summary = count($events) . " event(s) extracted from {$chunkCount} chunk(s){$range}.";
if ($actors) {
$summary .= ' Main actors: ' . implode(', ', array_slice($actors, 0, 8)) . '.';
}
$uncertain = [];
if ($chunkFailures > 0) {
$uncertain[] = "{$chunkFailures} chunk(s) needed fallback extraction or could not be fully parsed; review medium-confidence entries against the source.";
}
if ($usedFallbackExtractor) {
$uncertain[] = 'Deterministic fallback extraction was used for at least one chunk.';
}
$trace = [
$this->trace('Query interpretation', "Extract {$focusLabel} from pasted text. Engine: {$engineLabel}. Chunked timeline mode; without saving the text or output.", 'complete'),
$this->trace('Search tools used', 'No external corpus search; source is the user-pasted text split into local chunks.', 'complete'),
$this->trace('Evidence found', count($events) . " event(s) identified across {$chunkCount} chunk(s).", count($events) ? 'complete' : 'warning'),
$this->trace('Citation confidence', 'Confidence is per event and based only on the pasted text. Overlapping chunks were de-duplicated.', 'complete'),
$this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($uncertain), $uncertain ? 'warning' : 'complete'),
$this->trace('Next practical step', 'Review the merged timeline against the original uploaded document, especially duplicated or medium-confidence entries.', 'complete'),
];
return [
'tool' => 'timeline',
'language' => $language,
'what_we_found' => $summary,
'events' => $events,
'evidence_trail' => [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; split into local timeline chunks; not stored.']],
'what_remains_uncertain' => $uncertain,
'next_practical_step' => 'Review the merged timeline against the original uploaded document, especially duplicated or medium-confidence entries.',
'trace' => $trace,
'trace_metadata' => [
'chunk_count' => count($events),
'source_count' => $chunkCount,
'deployment' => $engineLabel,
'input_date_hint_count' => $inputDateHintCount,
'used_fallback_extractor' => $usedFallbackExtractor,
'chunked_timeline' => true,
'timeline_chunk_count' => $chunkCount,
'chunk_failures' => $chunkFailures,
],
'disclaimer' => dbnToolsDisclaimer($language),
];
}
private function timelineSinglePassLimit(string $engine): int
{
return match ($engine) {
'nova_lite' => 25000,
'azure_mini' => 55000,
default => 128000,
};
}
private function timelineChunkSize(string $engine): int
{
return match ($engine) {
'nova_lite' => 10000,
'azure_mini' => 16000,
default => 30000,
};
}
private function timelineTextChunks(string $text, int $chunkSize, int $overlap): array
{
$len = mb_strlen($text, 'UTF-8');
$chunks = [];
$start = 0;
while ($start < $len) {
$targetEnd = min($len, $start + $chunkSize);
$window = mb_substr($text, $start, $targetEnd - $start, 'UTF-8');
$end = $targetEnd;
if ($targetEnd < $len) {
$breakAt = mb_strrpos($window, "\n\n", 0, 'UTF-8');
if ($breakAt === false || $breakAt < (int)($chunkSize * 0.55)) {
$breakAt = mb_strrpos($window, "\n", 0, 'UTF-8');
}
if ($breakAt !== false && $breakAt > (int)($chunkSize * 0.45)) {
$end = $start + $breakAt;
}
}
$chunkText = trim(mb_substr($text, $start, max(1, $end - $start), 'UTF-8'));
if ($chunkText !== '') {
$chunks[] = ['start' => $start, 'text' => $chunkText];
}
if ($end >= $len) {
break;
}
$nextStart = max(0, $end - $overlap);
if ($nextStart <= $start) {
$nextStart = $end;
}
$start = $nextStart;
}
return $chunks;
}
private function mergeTimelineEvents(array $events): array
{
$merged = [];
foreach ($events as $event) {
if (!is_array($event)) {
continue;
}
$key = $this->timelineEventSignature($event);
if (!isset($merged[$key])) {
$merged[$key] = $event;
continue;
}
$existing = $merged[$key];
$candidateExcerpt = (string)($event['source_excerpt'] ?? '');
$existingExcerpt = (string)($existing['source_excerpt'] ?? '');
$additionalExcerpt = $candidateExcerpt;
if ($this->timelineConfidenceRank((string)($event['confidence'] ?? 'medium')) > $this->timelineConfidenceRank((string)($existing['confidence'] ?? 'medium'))) {
$merged[$key] = $event;
$additionalExcerpt = $existingExcerpt;
}
$oldExcerpt = (string)($merged[$key]['source_excerpt'] ?? '');
$newExcerpt = $additionalExcerpt;
if ($newExcerpt !== '' && $oldExcerpt !== '' && $newExcerpt !== $oldExcerpt && mb_strlen($oldExcerpt, 'UTF-8') < 260) {
$merged[$key]['source_excerpt'] = $oldExcerpt . ' / ' . $newExcerpt;
}
}
$events = array_values($merged);
usort($events, static function (array $a, array $b): int {
$ad = (string)($a['date'] ?? '');
$bd = (string)($b['date'] ?? '');
$ai = preg_match('/^\d{4}-\d{2}-\d{2}$/', $ad) ? $ad : '9999-99-99';
$bi = preg_match('/^\d{4}-\d{2}-\d{2}$/', $bd) ? $bd : '9999-99-99';
$cmp = strcmp($ai, $bi);
if ($cmp !== 0) {
return $cmp;
}
return strcmp((string)($a['time'] ?? ''), (string)($b['time'] ?? ''));
});
return $events;
}
private function timelineEventSignature(array $event): string
{
$date = mb_strtolower(trim((string)($event['date'] ?? '')), 'UTF-8');
$time = mb_strtolower(trim((string)($event['time'] ?? '')), 'UTF-8');
$actor = mb_strtolower(trim((string)($event['actor'] ?? 'unknown')), 'UTF-8');
$body = mb_strtolower(trim((string)($event['event'] ?? '')), 'UTF-8');
$body = (string)preg_replace('/[^\p{L}\p{N}]+/u', ' ', $body);
$body = trim((string)preg_replace('/\s+/u', ' ', $body));
return $date . '|' . $time . '|' . $actor . '|' . mb_substr($body, 0, 96, 'UTF-8');
}
private function timelineConfidenceRank(string $confidence): int
{
return match ($confidence) {
'high' => 3,
'medium' => 2,
default => 1,
};
}
private function timelineDateHintCount(string $text): int
{
preg_match_all('/(?<!\d)\d{1,2}\.\d{1,2}\.(?:\d{2,4})?(?!\d)/u', $text, $numeric);
@@ -1106,13 +1379,14 @@ PROMPT;
return array_slice(array_values(array_unique($terms)), 0, 6);
}
private function requirePasteText(string $text): string
private function requirePasteText(string $text, ?int $maxChars = null): string
{
$text = trim($text);
if (mb_strlen($text, 'UTF-8') < 20) {
dbnToolsAbort('Paste at least 20 characters of text.', 422, 'text_too_short');
}
if (mb_strlen($text, 'UTF-8') > self::MAX_PASTE_CHARS) {
$maxChars ??= self::MAX_PASTE_CHARS;
if (mb_strlen($text, 'UTF-8') > $maxChars) {
dbnToolsAbort('Pasted text is too long for the MVP limit.', 422, 'text_too_long');
}
return $text;
+108 -8
View File
@@ -1,6 +1,7 @@
<?php
declare(strict_types=1);
require_once __DIR__ . '/bootstrap.php';
require_once __DIR__ . '/FreeTier.php';
/**
@@ -14,6 +15,9 @@ final class ToolModels
public const TIMELINE_QUICK_CHAR_LIMIT = 25000;
public const TIMELINE_STANDARD_CHAR_LIMIT = 55000;
public const TIMELINE_DEEP_CHAR_LIMIT = 128000;
public const TIMELINE_QUICK_MAX_CHARS = 100000;
public const TIMELINE_STANDARD_MAX_CHARS = 300000;
public const TIMELINE_DEEP_MAX_CHARS = 600000;
public static function engineForUser(int $userId, string $requestedEngine): string
{
@@ -38,22 +42,32 @@ final class ToolModels
$tierEngine = self::engineForUser($userId, $requestedEngine);
$charCount = mb_strlen($text, 'UTF-8');
if ($charCount > self::TIMELINE_DEEP_CHAR_LIMIT) {
if ($charCount > self::TIMELINE_DEEP_MAX_CHARS) {
throw new DbnToolsHttpException(
'This timeline input is too large after selected documents or My Case context were added. Split the file or use fewer selected documents.',
413,
'timeline_input_too_large',
['input_char_count' => $charCount, 'max_chars' => self::TIMELINE_DEEP_CHAR_LIMIT]
['input_char_count' => $charCount, 'max_chars' => self::TIMELINE_DEEP_MAX_CHARS]
);
}
$effectiveEngine = $tierEngine;
if ($charCount > self::TIMELINE_STANDARD_CHAR_LIMIT) {
$effectiveEngine = 'azure_full';
} elseif ($charCount > self::TIMELINE_QUICK_CHAR_LIMIT && $effectiveEngine === 'nova_lite') {
$effectiveEngine = 'azure_mini';
if ($charCount > self::timelineEngineMaxChars($effectiveEngine)) {
$effectiveEngine = $charCount <= self::TIMELINE_STANDARD_MAX_CHARS ? 'azure_mini' : 'azure_full';
} elseif ($charCount > self::TIMELINE_STANDARD_CHAR_LIMIT && $effectiveEngine === 'nova_lite') {
$effectiveEngine = $charCount <= self::TIMELINE_QUICK_MAX_CHARS ? 'nova_lite' : 'azure_mini';
}
if ($charCount > self::timelineEngineMaxChars($effectiveEngine)) {
$effectiveEngine = 'azure_full';
}
$credits = self::timelineCreditsForSize($effectiveEngine, $charCount);
$baseCredits = self::timelineAdvertisedCredits($requestedEngine);
$requiresConfirmation = $credits > $baseCredits
|| self::timelineEngineRank($effectiveEngine) > self::timelineEngineRank($requestedEngine);
$chunked = $charCount > self::timelineEngineLimit($effectiveEngine);
return [
'requested_engine' => $requestedEngine,
'tier_engine' => $tierEngine,
@@ -61,13 +75,48 @@ final class ToolModels
'auto_upgraded_engine' => $effectiveEngine !== $tierEngine,
'input_char_count' => $charCount,
'engine_limit_chars' => self::timelineEngineLimit($effectiveEngine),
'credits' => self::timelineCredits($effectiveEngine),
'max_char_limit' => self::timelineEngineMaxChars($effectiveEngine),
'chunked_timeline' => $chunked,
'timeline_chunk_count' => $chunked ? (int)ceil($charCount / self::timelineChunkSize($effectiveEngine)) : 1,
'estimated_credits' => $credits,
'credits' => $credits,
'base_credits' => $baseCredits,
'requires_confirmation' => $requiresConfirmation,
];
}
public static function assertTimelineQuoteAccepted(array $route, array $input): void
{
if (empty($route['requires_confirmation'])) {
return;
}
$accepted = !empty($input['accepted_timeline_quote'])
&& (int)($input['accepted_credits'] ?? 0) === (int)$route['credits']
&& (string)($input['accepted_effective_engine'] ?? '') === (string)$route['effective_engine'];
if ($accepted) {
return;
}
$engineLabel = self::timelineEngineLabel((string)$route['effective_engine']);
throw new DbnToolsHttpException(
'This timeline is larger than the selected engine can handle at the advertised price. Confirm the quoted engine and credits before running.',
409,
'timeline_quote_required',
['timeline_quote' => array_merge($route, [
'effective_engine_label' => $engineLabel,
'message' => 'Timeline will use ' . $engineLabel . ' for '
. number_format((int)$route['input_char_count'])
. ' characters across about ' . (int)$route['timeline_chunk_count']
. ' chunk(s), costing ' . (int)$route['credits'] . ' credit(s).',
])]
);
}
public static function timelineCredits(string $engine): int
{
return $engine === 'azure_full' ? 2 : 1;
return self::timelineAdvertisedCredits($engine);
}
public static function timelineEngineLimit(string $engine): int
@@ -78,4 +127,55 @@ final class ToolModels
default => self::TIMELINE_DEEP_CHAR_LIMIT,
};
}
public static function timelineChunkSize(string $engine): int
{
return match ($engine) {
'nova_lite' => 10000,
'azure_mini' => 16000,
default => 30000,
};
}
public static function timelineEngineMaxChars(string $engine): int
{
return match ($engine) {
'nova_lite' => self::TIMELINE_QUICK_MAX_CHARS,
'azure_mini' => self::TIMELINE_STANDARD_MAX_CHARS,
default => self::TIMELINE_DEEP_MAX_CHARS,
};
}
public static function timelineCreditsForSize(string $engine, int $charCount): int
{
return match ($engine) {
'nova_lite' => $charCount <= self::TIMELINE_QUICK_CHAR_LIMIT ? 1 : 2,
'azure_mini' => $charCount <= self::TIMELINE_STANDARD_CHAR_LIMIT ? 1 : ($charCount <= 180000 ? 2 : 3),
default => $charCount <= self::TIMELINE_DEEP_CHAR_LIMIT ? 2 : ($charCount <= 350000 ? 4 : 6),
};
}
public static function timelineAdvertisedCredits(string $engine): int
{
return $engine === 'azure_full' ? 2 : 1;
}
public static function timelineEngineLabel(string $engine): string
{
return match ($engine) {
'nova_lite' => 'Quick',
'azure_full' => 'Deep',
default => 'Standard',
};
}
private static function timelineEngineRank(string $engine): int
{
return match ($engine) {
'nova_lite' => 1,
'azure_mini' => 2,
'azure_full' => 3,
default => 0,
};
}
}
+6 -3
View File
@@ -890,9 +890,10 @@ function dbnToolsExcerpt(string $text, int $limit = 520): string
const DBN_TOOLS_EXTRACT_MAX_BYTES = 8 * 1024 * 1024;
const DBN_TOOLS_EXTRACT_TEXT_LIMIT = 128000;
const DBN_TOOLS_TIMELINE_EXTRACT_TEXT_LIMIT = 600000;
const DBN_TOOLS_EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx'];
function dbnToolsExtractUploadedFile(array $file): array
function dbnToolsExtractUploadedFile(array $file, int $textLimit = DBN_TOOLS_EXTRACT_TEXT_LIMIT): array
{
$errCode = (int)($file['error'] ?? UPLOAD_ERR_NO_FILE);
if ($errCode !== UPLOAD_ERR_OK) {
@@ -936,8 +937,9 @@ function dbnToolsExtractUploadedFile(array $file): array
}
$truncated = false;
if (mb_strlen($text, 'UTF-8') > DBN_TOOLS_EXTRACT_TEXT_LIMIT) {
$text = mb_substr($text, 0, DBN_TOOLS_EXTRACT_TEXT_LIMIT, 'UTF-8');
$textLimit = max(1000, min($textLimit, DBN_TOOLS_TIMELINE_EXTRACT_TEXT_LIMIT));
if (mb_strlen($text, 'UTF-8') > $textLimit) {
$text = mb_substr($text, 0, $textLimit, 'UTF-8');
$truncated = true;
}
@@ -947,6 +949,7 @@ function dbnToolsExtractUploadedFile(array $file): array
'filename' => $originalName,
'chars' => mb_strlen($text, 'UTF-8'),
'truncated' => $truncated,
'limit' => $textLimit,
];
}