Add corpus explorer: search bar (Hybrid/BM25/Vector), category drill-down, source row expand
- api/corpus-search.php: new endpoint with three search modes (hybrid RAG, BM25 keyword, Qdrant vector) - api/corpus-documents.php: paginated document browser by category or source name - corpus.php: search bar with mode+language pills, Browse docs button on each category card with drill-down panel, expand toggle on each source row showing doc count and scraper class - tools.css: all new corpus interactive styles appended Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/../includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('GET');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
try {
|
||||
$ragDb = dbnToolsRagDb();
|
||||
$bnlDb = dbnToolsDb();
|
||||
|
||||
$category = isset($_GET['category']) && $_GET['category'] !== '' ? trim((string)$_GET['category']) : null;
|
||||
$sourceName = isset($_GET['source_name']) && $_GET['source_name'] !== '' ? trim((string)$_GET['source_name']) : null;
|
||||
$offset = max(0, (int)($_GET['offset'] ?? 0));
|
||||
$limit = max(1, min(50, (int)($_GET['limit'] ?? 20)));
|
||||
|
||||
// Build WHERE clause
|
||||
$where = ["d.corpus_id = 1", "d.status = 'ready'"];
|
||||
$params = [];
|
||||
|
||||
if ($category !== null) {
|
||||
$where[] = 'd.category = ?';
|
||||
$params[] = $category;
|
||||
}
|
||||
|
||||
if ($sourceName !== null) {
|
||||
// Filter by source via a JOIN to corpus_sources on category match
|
||||
// or by matching the scraper's URL pattern in source_url
|
||||
// We join bnl_admin.corpus_sources — but that's a different DB.
|
||||
// Simplest: filter documents whose source_url LIKE the source's url.
|
||||
// Fetch the source URL from bnl_admin first.
|
||||
$srcStmt = $bnlDb->prepare(
|
||||
"SELECT url FROM corpus_sources WHERE corpus_id = 1 AND name = ? LIMIT 1"
|
||||
);
|
||||
$srcStmt->execute([$sourceName]);
|
||||
$srcRow = $srcStmt->fetch(PDO::FETCH_ASSOC);
|
||||
if ($srcRow && !empty($srcRow['url'])) {
|
||||
$parsed = parse_url($srcRow['url']);
|
||||
$host = $parsed['host'] ?? '';
|
||||
if ($host !== '') {
|
||||
$where[] = "d.source_url LIKE ?";
|
||||
$params[] = '%' . $host . '%';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$whereStr = implode(' AND ', $where);
|
||||
|
||||
// Total count
|
||||
$countParams = $params;
|
||||
$countStmt = $ragDb->prepare("SELECT COUNT(*) FROM documents d WHERE $whereStr");
|
||||
$countStmt->execute($countParams);
|
||||
$total = (int)$countStmt->fetchColumn();
|
||||
|
||||
// Paginated rows
|
||||
$dataParams = $params;
|
||||
$dataParams[] = $limit;
|
||||
$dataParams[] = $offset;
|
||||
$dataStmt = $ragDb->prepare(
|
||||
"SELECT d.id, d.title, d.category, d.source_url, d.language, d.updated_at,
|
||||
COUNT(c.id) AS chunk_count
|
||||
FROM documents d
|
||||
LEFT JOIN chunks c ON c.document_id = d.id
|
||||
WHERE $whereStr
|
||||
GROUP BY d.id
|
||||
ORDER BY d.updated_at DESC
|
||||
LIMIT ? OFFSET ?"
|
||||
);
|
||||
$dataStmt->execute($dataParams);
|
||||
$documents = $dataStmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
|
||||
// Normalise chunk_count to int
|
||||
foreach ($documents as &$doc) {
|
||||
$doc['chunk_count'] = (int)$doc['chunk_count'];
|
||||
}
|
||||
unset($doc);
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'documents' => $documents,
|
||||
'total' => $total,
|
||||
'offset' => $offset,
|
||||
'limit' => $limit,
|
||||
'filter' => [
|
||||
'category' => $category,
|
||||
'source_name' => $sourceName,
|
||||
],
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsError('Could not load documents: ' . $e->getMessage(), 500, 'documents_error');
|
||||
}
|
||||
Reference in New Issue
Block a user