diff --git a/api/corpus-documents.php b/api/corpus-documents.php index 8b7862e..1f6bb98 100644 --- a/api/corpus-documents.php +++ b/api/corpus-documents.php @@ -12,8 +12,15 @@ try { $category = isset($_GET['category']) && $_GET['category'] !== '' ? trim((string)$_GET['category']) : null; $sourceName = isset($_GET['source_name']) && $_GET['source_name'] !== '' ? trim((string)$_GET['source_name']) : null; + $titleFilter = isset($_GET['title']) && $_GET['title'] !== '' ? trim((string)$_GET['title']) : null; $offset = max(0, (int)($_GET['offset'] ?? 0)); $limit = max(1, min(50, (int)($_GET['limit'] ?? 20))); + $orderBy = match($_GET['sort'] ?? 'newest') { + 'oldest' => 'd.updated_at ASC', + 'alpha' => 'd.title ASC', + 'chunks' => 'chunk_count DESC', + default => 'd.updated_at DESC', + }; // Build WHERE clause $where = ["d.corpus_id = 1", "d.status = 'ready'"]; @@ -24,6 +31,11 @@ try { $params[] = $category; } + if ($titleFilter !== null) { + $where[] = 'd.title LIKE ?'; + $params[] = '%' . str_replace(['%', '_'], ['\\%', '\\_'], $titleFilter) . '%'; + } + if ($sourceName !== null) { // Filter by source via a JOIN to corpus_sources on category match // or by matching the scraper's URL pattern in source_url @@ -61,7 +73,7 @@ try { LEFT JOIN chunks c ON c.document_id = d.id WHERE $whereStr GROUP BY d.id - ORDER BY d.updated_at DESC + ORDER BY $orderBy LIMIT $limit OFFSET $offset" ); $dataStmt->execute($params); @@ -82,6 +94,8 @@ try { 'filter' => [ 'category' => $category, 'source_name' => $sourceName, + 'title' => $titleFilter, + 'sort' => $_GET['sort'] ?? 'newest', ], ]); } catch (Throwable $e) { diff --git a/api/corpus-search.php b/api/corpus-search.php index b2ecee1..683f5c0 100644 --- a/api/corpus-search.php +++ b/api/corpus-search.php @@ -29,6 +29,7 @@ try { 'category' => $h['category'] ?? '', 'section' => $h['section'] ?? null, 'excerpt' => $h['excerpt'] ?? ($h['chunk_text'] ?? ''), + 'full_text' => $h['full_text'] ?? $h['chunk_text'] ?? $h['excerpt'] ?? '', 'score' => $h['score'] ?? null, 'document_id' => $h['document_id'] ?? null, 'chunk_id' => $h['chunk_id'] ?? null, @@ -93,6 +94,7 @@ try { 'category' => $r['category'] ?? '', 'section' => $r['section'] ?? null, 'excerpt' => mb_substr((string)($r['excerpt'] ?? ''), 0, 600, 'UTF-8'), + 'full_text' => (string)($r['excerpt'] ?? ''), 'score' => isset($r['score']) ? round((float)$r['score'], 4) : null, 'document_id' => (int)$r['document_id'], 'chunk_id' => isset($r['chunk_id']) ? (int)$r['chunk_id'] : null, @@ -148,6 +150,7 @@ try { 'category' => $p['category'] ?? '', 'section' => $p['section_title'] ?? null, 'excerpt' => mb_substr((string)($p['content'] ?? ''), 0, 600, 'UTF-8'), + 'full_text' => (string)($p['content'] ?? ''), 'score' => round((float)($pt['score'] ?? 0), 4), 'document_id' => isset($p['document_id']) ? (int)$p['document_id'] : null, 'chunk_id' => $pt['id'] ?? null, @@ -234,6 +237,7 @@ try { 'category' => $d['category'] ?? '', 'section' => $d['section_title'] ?? null, 'excerpt' => mb_substr((string)($d['content'] ?? ''), 0, 600, 'UTF-8'), + 'full_text' => (string)($d['content'] ?? ''), 'score' => round((float)($d['@search.rerankerScore'] ?? $d['@search.score'] ?? 0), 4), 'document_id' => null, 'chunk_id' => $d['chunk_id'] ?? $d['id'] ?? null, diff --git a/corpus.php b/corpus.php index f852637..d2b61b3 100644 --- a/corpus.php +++ b/corpus.php @@ -78,6 +78,18 @@ require_once __DIR__ . '/includes/layout.php'; +
+ + + + + + + + + + +
@@ -178,6 +190,19 @@ require_once __DIR__ . '/includes/layout.php'; +
+ +
+ + +
+