feat: Corpus Intelligence page + timeline background events

Adds /corpus.php — a data transparency page showing what powers the
legal tools: 9 coverage categories with live doc counts, a full
sources table pulled from the corpus DB, the AI stack (LLMs, Whisper,
Qdrant, Azure AI Search, embeddings, chunking), and a pipeline flow
diagram. Stats are live via a new /api/corpus-stats.php endpoint
(queries dobetter_rag + bnl_admin). The reasoning sidebar is repurposed
as a Corpus health panel on this page.

Also ships the in-progress timeline background events toggle:
API and UI wired together via include_background param.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-15 11:31:24 +02:00
parent 3196c33ebb
commit d2f9831472
7 changed files with 905 additions and 2 deletions
+68
View File
@@ -0,0 +1,68 @@
<?php
declare(strict_types=1);
require_once __DIR__ . '/../includes/bootstrap.php';
dbnToolsRequireMethod('GET');
dbnToolsRequireAuth();
try {
$bnlDb = dbnToolsDb();
$ragDb = dbnToolsRagDb();
// Total documents in the do-better-legal corpus (corpus_id=1)
$stmt = $ragDb->prepare('SELECT COUNT(*) FROM documents WHERE corpus_id = 1');
$stmt->execute();
$totalDocs = (int)$stmt->fetchColumn();
// Total chunks for corpus_id=1 documents
$stmt = $ragDb->prepare(
'SELECT COUNT(*) FROM chunks c
JOIN documents d ON c.document_id = d.id
WHERE d.corpus_id = 1'
);
$stmt->execute();
$totalChunks = (int)$stmt->fetchColumn();
// Doc counts by category
$stmt = $ragDb->prepare(
'SELECT category, COUNT(*) AS doc_count
FROM documents
WHERE corpus_id = 1 AND category IS NOT NULL AND category != \'\'
GROUP BY category
ORDER BY doc_count DESC'
);
$stmt->execute();
$byCategory = $stmt->fetchAll(PDO::FETCH_ASSOC);
// Last updated timestamp
$stmt = $ragDb->prepare('SELECT MAX(updated_at) FROM documents WHERE corpus_id = 1');
$stmt->execute();
$lastUpdated = $stmt->fetchColumn() ?: null;
// Active sources from bnl_admin
$stmt = $bnlDb->prepare(
'SELECT name, url, category, authority_type, language, schedule, is_active, scraper_class
FROM corpus_sources
WHERE corpus_id = 1
ORDER BY category, name'
);
$stmt->execute();
$sources = $stmt->fetchAll(PDO::FETCH_ASSOC);
$activeSources = count(array_filter($sources, fn($s) => !empty($s['is_active'])));
dbnToolsRespond([
'ok' => true,
'stats' => [
'total_chunks' => $totalChunks,
'total_docs' => $totalDocs,
'active_sources' => $activeSources,
'last_updated' => $lastUpdated,
'by_category' => $byCategory,
],
'sources' => $sources,
]);
} catch (Throwable $e) {
dbnToolsError('Could not load corpus statistics: ' . $e->getMessage(), 500, 'corpus_stats_error');
}
+3 -2
View File
@@ -22,7 +22,8 @@ dbnToolsWithTelemetry('timeline', $language, function () use ($input, $language)
$confidenceFilter = (string)($input['confidence_filter'] ?? '') === 'high_medium'
? 'high_medium' : 'all';
$includeRelative = ($input['include_relative'] ?? true) !== false;
$includeRelative = ($input['include_relative'] ?? true) !== false;
$includeBackground = ($input['include_background'] ?? true) !== false;
return (new DbnLegalToolsService())->timeline($text, $language, $engine, $focus, $confidenceFilter, $includeRelative);
return (new DbnLegalToolsService())->timeline($text, $language, $engine, $focus, $confidenceFilter, $includeRelative, $includeBackground);
});