feat: Corpus Intelligence page + timeline background events
Adds /corpus.php — a data transparency page showing what powers the legal tools: 9 coverage categories with live doc counts, a full sources table pulled from the corpus DB, the AI stack (LLMs, Whisper, Qdrant, Azure AI Search, embeddings, chunking), and a pipeline flow diagram. Stats are live via a new /api/corpus-stats.php endpoint (queries dobetter_rag + bnl_admin). The reasoning sidebar is repurposed as a Corpus health panel on this page. Also ships the in-progress timeline background events toggle: API and UI wired together via include_background param. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/../includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('GET');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
try {
|
||||
$bnlDb = dbnToolsDb();
|
||||
$ragDb = dbnToolsRagDb();
|
||||
|
||||
// Total documents in the do-better-legal corpus (corpus_id=1)
|
||||
$stmt = $ragDb->prepare('SELECT COUNT(*) FROM documents WHERE corpus_id = 1');
|
||||
$stmt->execute();
|
||||
$totalDocs = (int)$stmt->fetchColumn();
|
||||
|
||||
// Total chunks for corpus_id=1 documents
|
||||
$stmt = $ragDb->prepare(
|
||||
'SELECT COUNT(*) FROM chunks c
|
||||
JOIN documents d ON c.document_id = d.id
|
||||
WHERE d.corpus_id = 1'
|
||||
);
|
||||
$stmt->execute();
|
||||
$totalChunks = (int)$stmt->fetchColumn();
|
||||
|
||||
// Doc counts by category
|
||||
$stmt = $ragDb->prepare(
|
||||
'SELECT category, COUNT(*) AS doc_count
|
||||
FROM documents
|
||||
WHERE corpus_id = 1 AND category IS NOT NULL AND category != \'\'
|
||||
GROUP BY category
|
||||
ORDER BY doc_count DESC'
|
||||
);
|
||||
$stmt->execute();
|
||||
$byCategory = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
|
||||
// Last updated timestamp
|
||||
$stmt = $ragDb->prepare('SELECT MAX(updated_at) FROM documents WHERE corpus_id = 1');
|
||||
$stmt->execute();
|
||||
$lastUpdated = $stmt->fetchColumn() ?: null;
|
||||
|
||||
// Active sources from bnl_admin
|
||||
$stmt = $bnlDb->prepare(
|
||||
'SELECT name, url, category, authority_type, language, schedule, is_active, scraper_class
|
||||
FROM corpus_sources
|
||||
WHERE corpus_id = 1
|
||||
ORDER BY category, name'
|
||||
);
|
||||
$stmt->execute();
|
||||
$sources = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
|
||||
$activeSources = count(array_filter($sources, fn($s) => !empty($s['is_active'])));
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'stats' => [
|
||||
'total_chunks' => $totalChunks,
|
||||
'total_docs' => $totalDocs,
|
||||
'active_sources' => $activeSources,
|
||||
'last_updated' => $lastUpdated,
|
||||
'by_category' => $byCategory,
|
||||
],
|
||||
'sources' => $sources,
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsError('Could not load corpus statistics: ' . $e->getMessage(), 500, 'corpus_stats_error');
|
||||
}
|
||||
Reference in New Issue
Block a user