d5e61d656a
MariaDB rejects ? placeholders for LIMIT/OFFSET when emulate_prepares=false. Interpolate $limit and $offset as ints directly into SQL strings in both corpus-documents.php and corpus-search.php BM25 paths. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
90 lines
3.0 KiB
PHP
90 lines
3.0 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
require_once __DIR__ . '/../includes/bootstrap.php';
|
|
|
|
dbnToolsRequireMethod('GET');
|
|
dbnToolsRequireAuth();
|
|
|
|
try {
|
|
$ragDb = dbnToolsRagDb();
|
|
$bnlDb = dbnToolsDb();
|
|
|
|
$category = isset($_GET['category']) && $_GET['category'] !== '' ? trim((string)$_GET['category']) : null;
|
|
$sourceName = isset($_GET['source_name']) && $_GET['source_name'] !== '' ? trim((string)$_GET['source_name']) : null;
|
|
$offset = max(0, (int)($_GET['offset'] ?? 0));
|
|
$limit = max(1, min(50, (int)($_GET['limit'] ?? 20)));
|
|
|
|
// Build WHERE clause
|
|
$where = ["d.corpus_id = 1", "d.status = 'ready'"];
|
|
$params = [];
|
|
|
|
if ($category !== null) {
|
|
$where[] = 'd.category = ?';
|
|
$params[] = $category;
|
|
}
|
|
|
|
if ($sourceName !== null) {
|
|
// Filter by source via a JOIN to corpus_sources on category match
|
|
// or by matching the scraper's URL pattern in source_url
|
|
// We join bnl_admin.corpus_sources — but that's a different DB.
|
|
// Simplest: filter documents whose source_url LIKE the source's url.
|
|
// Fetch the source URL from bnl_admin first.
|
|
$srcStmt = $bnlDb->prepare(
|
|
"SELECT url FROM corpus_sources WHERE corpus_id = 1 AND name = ? LIMIT 1"
|
|
);
|
|
$srcStmt->execute([$sourceName]);
|
|
$srcRow = $srcStmt->fetch(PDO::FETCH_ASSOC);
|
|
if ($srcRow && !empty($srcRow['url'])) {
|
|
$parsed = parse_url($srcRow['url']);
|
|
$host = $parsed['host'] ?? '';
|
|
if ($host !== '') {
|
|
$where[] = "d.source_url LIKE ?";
|
|
$params[] = '%' . $host . '%';
|
|
}
|
|
}
|
|
}
|
|
|
|
$whereStr = implode(' AND ', $where);
|
|
|
|
// Total count
|
|
$countParams = $params;
|
|
$countStmt = $ragDb->prepare("SELECT COUNT(*) FROM documents d WHERE $whereStr");
|
|
$countStmt->execute($countParams);
|
|
$total = (int)$countStmt->fetchColumn();
|
|
|
|
// Paginated rows — LIMIT/OFFSET interpolated as ints (MariaDB rejects bound params here)
|
|
$dataStmt = $ragDb->prepare(
|
|
"SELECT d.id, d.title, d.category, d.source_url, d.language, d.updated_at,
|
|
COUNT(c.id) AS chunk_count
|
|
FROM documents d
|
|
LEFT JOIN chunks c ON c.document_id = d.id
|
|
WHERE $whereStr
|
|
GROUP BY d.id
|
|
ORDER BY d.updated_at DESC
|
|
LIMIT $limit OFFSET $offset"
|
|
);
|
|
$dataStmt->execute($params);
|
|
$documents = $dataStmt->fetchAll(PDO::FETCH_ASSOC);
|
|
|
|
// Normalise chunk_count to int
|
|
foreach ($documents as &$doc) {
|
|
$doc['chunk_count'] = (int)$doc['chunk_count'];
|
|
}
|
|
unset($doc);
|
|
|
|
dbnToolsRespond([
|
|
'ok' => true,
|
|
'documents' => $documents,
|
|
'total' => $total,
|
|
'offset' => $offset,
|
|
'limit' => $limit,
|
|
'filter' => [
|
|
'category' => $category,
|
|
'source_name' => $sourceName,
|
|
],
|
|
]);
|
|
} catch (Throwable $e) {
|
|
dbnToolsError('Could not load documents: ' . $e->getMessage(), 500, 'documents_error');
|
|
}
|