Files
dobetternorge-tools/api/corpus-documents.php
T
daveadmin d5e61d656a Fix MariaDB LIMIT/OFFSET bound-parameter error in corpus API
MariaDB rejects ? placeholders for LIMIT/OFFSET when emulate_prepares=false.
Interpolate $limit and $offset as ints directly into SQL strings in both
corpus-documents.php and corpus-search.php BM25 paths.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-15 12:31:20 +02:00

90 lines
3.0 KiB
PHP

<?php
declare(strict_types=1);
require_once __DIR__ . '/../includes/bootstrap.php';
dbnToolsRequireMethod('GET');
dbnToolsRequireAuth();
try {
$ragDb = dbnToolsRagDb();
$bnlDb = dbnToolsDb();
$category = isset($_GET['category']) && $_GET['category'] !== '' ? trim((string)$_GET['category']) : null;
$sourceName = isset($_GET['source_name']) && $_GET['source_name'] !== '' ? trim((string)$_GET['source_name']) : null;
$offset = max(0, (int)($_GET['offset'] ?? 0));
$limit = max(1, min(50, (int)($_GET['limit'] ?? 20)));
// Build WHERE clause
$where = ["d.corpus_id = 1", "d.status = 'ready'"];
$params = [];
if ($category !== null) {
$where[] = 'd.category = ?';
$params[] = $category;
}
if ($sourceName !== null) {
// Filter by source via a JOIN to corpus_sources on category match
// or by matching the scraper's URL pattern in source_url
// We join bnl_admin.corpus_sources — but that's a different DB.
// Simplest: filter documents whose source_url LIKE the source's url.
// Fetch the source URL from bnl_admin first.
$srcStmt = $bnlDb->prepare(
"SELECT url FROM corpus_sources WHERE corpus_id = 1 AND name = ? LIMIT 1"
);
$srcStmt->execute([$sourceName]);
$srcRow = $srcStmt->fetch(PDO::FETCH_ASSOC);
if ($srcRow && !empty($srcRow['url'])) {
$parsed = parse_url($srcRow['url']);
$host = $parsed['host'] ?? '';
if ($host !== '') {
$where[] = "d.source_url LIKE ?";
$params[] = '%' . $host . '%';
}
}
}
$whereStr = implode(' AND ', $where);
// Total count
$countParams = $params;
$countStmt = $ragDb->prepare("SELECT COUNT(*) FROM documents d WHERE $whereStr");
$countStmt->execute($countParams);
$total = (int)$countStmt->fetchColumn();
// Paginated rows — LIMIT/OFFSET interpolated as ints (MariaDB rejects bound params here)
$dataStmt = $ragDb->prepare(
"SELECT d.id, d.title, d.category, d.source_url, d.language, d.updated_at,
COUNT(c.id) AS chunk_count
FROM documents d
LEFT JOIN chunks c ON c.document_id = d.id
WHERE $whereStr
GROUP BY d.id
ORDER BY d.updated_at DESC
LIMIT $limit OFFSET $offset"
);
$dataStmt->execute($params);
$documents = $dataStmt->fetchAll(PDO::FETCH_ASSOC);
// Normalise chunk_count to int
foreach ($documents as &$doc) {
$doc['chunk_count'] = (int)$doc['chunk_count'];
}
unset($doc);
dbnToolsRespond([
'ok' => true,
'documents' => $documents,
'total' => $total,
'offset' => $offset,
'limit' => $limit,
'filter' => [
'category' => $category,
'source_name' => $sourceName,
],
]);
} catch (Throwable $e) {
dbnToolsError('Could not load documents: ' . $e->getMessage(), 500, 'documents_error');
}