diff --git a/api/corpus-documents.php b/api/corpus-documents.php index 8b7862e..1f6bb98 100644 --- a/api/corpus-documents.php +++ b/api/corpus-documents.php @@ -12,8 +12,15 @@ try { $category = isset($_GET['category']) && $_GET['category'] !== '' ? trim((string)$_GET['category']) : null; $sourceName = isset($_GET['source_name']) && $_GET['source_name'] !== '' ? trim((string)$_GET['source_name']) : null; + $titleFilter = isset($_GET['title']) && $_GET['title'] !== '' ? trim((string)$_GET['title']) : null; $offset = max(0, (int)($_GET['offset'] ?? 0)); $limit = max(1, min(50, (int)($_GET['limit'] ?? 20))); + $orderBy = match($_GET['sort'] ?? 'newest') { + 'oldest' => 'd.updated_at ASC', + 'alpha' => 'd.title ASC', + 'chunks' => 'chunk_count DESC', + default => 'd.updated_at DESC', + }; // Build WHERE clause $where = ["d.corpus_id = 1", "d.status = 'ready'"]; @@ -24,6 +31,11 @@ try { $params[] = $category; } + if ($titleFilter !== null) { + $where[] = 'd.title LIKE ?'; + $params[] = '%' . str_replace(['%', '_'], ['\\%', '\\_'], $titleFilter) . '%'; + } + if ($sourceName !== null) { // Filter by source via a JOIN to corpus_sources on category match // or by matching the scraper's URL pattern in source_url @@ -61,7 +73,7 @@ try { LEFT JOIN chunks c ON c.document_id = d.id WHERE $whereStr GROUP BY d.id - ORDER BY d.updated_at DESC + ORDER BY $orderBy LIMIT $limit OFFSET $offset" ); $dataStmt->execute($params); @@ -82,6 +94,8 @@ try { 'filter' => [ 'category' => $category, 'source_name' => $sourceName, + 'title' => $titleFilter, + 'sort' => $_GET['sort'] ?? 'newest', ], ]); } catch (Throwable $e) { diff --git a/api/corpus-search.php b/api/corpus-search.php index b2ecee1..683f5c0 100644 --- a/api/corpus-search.php +++ b/api/corpus-search.php @@ -29,6 +29,7 @@ try { 'category' => $h['category'] ?? '', 'section' => $h['section'] ?? null, 'excerpt' => $h['excerpt'] ?? ($h['chunk_text'] ?? ''), + 'full_text' => $h['full_text'] ?? $h['chunk_text'] ?? $h['excerpt'] ?? '', 'score' => $h['score'] ?? null, 'document_id' => $h['document_id'] ?? null, 'chunk_id' => $h['chunk_id'] ?? null, @@ -93,6 +94,7 @@ try { 'category' => $r['category'] ?? '', 'section' => $r['section'] ?? null, 'excerpt' => mb_substr((string)($r['excerpt'] ?? ''), 0, 600, 'UTF-8'), + 'full_text' => (string)($r['excerpt'] ?? ''), 'score' => isset($r['score']) ? round((float)$r['score'], 4) : null, 'document_id' => (int)$r['document_id'], 'chunk_id' => isset($r['chunk_id']) ? (int)$r['chunk_id'] : null, @@ -148,6 +150,7 @@ try { 'category' => $p['category'] ?? '', 'section' => $p['section_title'] ?? null, 'excerpt' => mb_substr((string)($p['content'] ?? ''), 0, 600, 'UTF-8'), + 'full_text' => (string)($p['content'] ?? ''), 'score' => round((float)($pt['score'] ?? 0), 4), 'document_id' => isset($p['document_id']) ? (int)$p['document_id'] : null, 'chunk_id' => $pt['id'] ?? null, @@ -234,6 +237,7 @@ try { 'category' => $d['category'] ?? '', 'section' => $d['section_title'] ?? null, 'excerpt' => mb_substr((string)($d['content'] ?? ''), 0, 600, 'UTF-8'), + 'full_text' => (string)($d['content'] ?? ''), 'score' => round((float)($d['@search.rerankerScore'] ?? $d['@search.score'] ?? 0), 4), 'document_id' => null, 'chunk_id' => $d['chunk_id'] ?? $d['id'] ?? null, diff --git a/corpus.php b/corpus.php index f852637..d2b61b3 100644 --- a/corpus.php +++ b/corpus.php @@ -78,6 +78,18 @@ require_once __DIR__ . '/includes/layout.php'; +
Loading documents…
'; - drillMoreWrap.hidden = true; - drillPanel.hidden = false; + drillState = { category: cat, sourceName: null, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' }; + drillEyebrow.textContent = 'Category'; + drillTitle.textContent = catLabels[cat] || cat; + drillDocList.innerHTML = 'Loading documents…
'; + drillMoreWrap.hidden = true; + drillCount.textContent = ''; + if (drillSortSelect) drillSortSelect.value = 'newest'; + if (drillSearchInput) drillSearchInput.value = ''; + drillPanel.hidden = false; drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' }); + pushHash(); fetchDrillPage(false); } function openDrillBySource(sourceName) { - drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20 }; - drillEyebrow.textContent = 'Source'; - drillTitle.textContent = sourceName; - drillDocList.innerHTML = 'Loading documents…
'; - drillMoreWrap.hidden = true; - drillPanel.hidden = false; + drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' }; + drillEyebrow.textContent = 'Source'; + drillTitle.textContent = sourceName; + drillDocList.innerHTML = 'Loading documents…
'; + drillMoreWrap.hidden = true; + drillCount.textContent = ''; + if (drillSortSelect) drillSortSelect.value = 'newest'; + if (drillSearchInput) drillSearchInput.value = ''; + drillPanel.hidden = false; drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' }); + pushHash(); fetchDrillPage(false); } function fetchDrillPage(append) { const qs = new URLSearchParams({ offset: drillState.offset, limit: drillState.limit }); - if (drillState.category) qs.set('category', drillState.category); - if (drillState.sourceName) qs.set('source_name', drillState.sourceName); + if (drillState.category) qs.set('category', drillState.category); + if (drillState.sourceName) qs.set('source_name', drillState.sourceName); + if (drillState.sort) qs.set('sort', drillState.sort); + if (drillState.titleFilter) qs.set('title', drillState.titleFilter); fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' }) .then(r => r.json()) @@ -637,6 +675,11 @@ require_once __DIR__ . '/includes/layout.php'; const loaded = drillState.offset + docs.length; drillMoreWrap.hidden = loaded >= drillState.total; drillState.offset = loaded; + if (drillCount) { + drillCount.textContent = drillState.total > 0 + ? 'Showing ' + fmt(loaded) + ' of ' + fmt(drillState.total) + ' documents' + : ''; + } }) .catch(() => { if (!append) drillDocList.innerHTML = 'Network error.
'; @@ -644,17 +687,43 @@ require_once __DIR__ . '/includes/layout.php'; } drillMoreBtn.addEventListener('click', () => fetchDrillPage(true)); - drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; }); + drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; pushHash(); }); + + if (drillSortSelect) { + drillSortSelect.addEventListener('change', () => { + drillState.sort = drillSortSelect.value; + drillState.offset = 0; + drillDocList.innerHTML = 'Loading documents…
'; + drillCount.textContent = ''; + fetchDrillPage(false); + }); + } + + let drillFilterTimer = null; + if (drillSearchInput) { + drillSearchInput.addEventListener('input', () => { + clearTimeout(drillFilterTimer); + drillFilterTimer = setTimeout(() => { + drillState.titleFilter = drillSearchInput.value.trim(); + drillState.offset = 0; + drillDocList.innerHTML = 'Loading documents…
'; + drillCount.textContent = ''; + fetchDrillPage(false); + }, 300); + }); + } // ── Search bar ──────────────────────────────────────────────────────────── let searchMode = 'hybrid'; let searchLang = 'en'; + let searchCat = ''; document.querySelectorAll('.search-modes .mode-pill').forEach(btn => { btn.addEventListener('click', () => { document.querySelectorAll('.search-modes .mode-pill').forEach(b => b.classList.remove('is-active')); btn.classList.add('is-active'); searchMode = btn.dataset.mode; + pushHash(); }); }); @@ -663,6 +732,16 @@ require_once __DIR__ . '/includes/layout.php'; document.querySelectorAll('.lang-pills .mode-pill').forEach(b => b.classList.remove('is-active')); btn.classList.add('is-active'); searchLang = btn.dataset.lang; + pushHash(); + }); + }); + + document.querySelectorAll('#searchCatPills .mode-pill').forEach(btn => { + btn.addEventListener('click', () => { + document.querySelectorAll('#searchCatPills .mode-pill').forEach(b => b.classList.remove('is-active')); + btn.classList.add('is-active'); + searchCat = btn.dataset.cat; + pushHash(); }); }); @@ -678,6 +757,7 @@ require_once __DIR__ . '/includes/layout.php'; return; } + pushHash(); searchResults.hidden = false; searchResults.innerHTML = `Searching in ${esc(searchMode)} mode…
`; searchBtn.disabled = true; @@ -686,7 +766,7 @@ require_once __DIR__ . '/includes/layout.php'; method: 'POST', credentials: 'same-origin', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8 }), + body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8, category: searchCat || null }), }) .then(r => r.json()) .then(data => { @@ -702,29 +782,50 @@ require_once __DIR__ . '/includes/layout.php'; } const modeLabel = { hybrid: 'Hybrid RAG', bm25: 'BM25 keyword', vector: 'Vector semantic' }[data.mode] || data.mode; - let html = `${excerpt}
-${excerpt}
+ ${hasMore ? '' : ''}`; + + if (hasMore) { + const expandBtn = card.querySelector('.passage-expand-btn'); + const fullDiv = card.querySelector('.passage-full-text'); + fullDiv.innerHTML = esc(fullText).replace(/\n/g, 'Network error.
`; }); @@ -732,6 +833,53 @@ require_once __DIR__ . '/includes/layout.php'; searchBtn.addEventListener('click', runSearch); searchInput.addEventListener('keydown', e => { if (e.key === 'Enter') runSearch(); }); + + // ── URL hash state ──────────────────────────────────────────────────────── + function pushHash() { + const p = new URLSearchParams(); + const q = searchInput ? searchInput.value.trim() : ''; + if (q) p.set('q', q); + if (searchMode !== 'hybrid') p.set('mode', searchMode); + if (searchLang !== 'en') p.set('lang', searchLang); + if (searchCat) p.set('cat', searchCat); + if (drillPanel && !drillPanel.hidden) { + if (drillState.category) p.set('drill', drillState.category); + if (drillState.sourceName) p.set('drillsrc', drillState.sourceName); + } + const hash = p.toString(); + history.replaceState(null, '', hash ? '#' + hash : location.pathname + location.search); + } + + function activatePill(group, attr, value) { + document.querySelectorAll(group).forEach(b => { + b.classList.toggle('is-active', b.dataset[attr] === value); + }); + } + + function restoreHash() { + if (!location.hash) return; + const p = new URLSearchParams(location.hash.slice(1)); + if (p.has('mode')) { + searchMode = p.get('mode'); + activatePill('.search-modes .mode-pill', 'mode', searchMode); + } + if (p.has('lang')) { + searchLang = p.get('lang'); + activatePill('.lang-pills .mode-pill', 'lang', searchLang); + } + if (p.has('cat')) { + searchCat = p.get('cat'); + activatePill('#searchCatPills .mode-pill', 'cat', searchCat); + } + if (p.has('drill')) openDrillByCategory(p.get('drill')); + if (p.has('drillsrc')) openDrillBySource(p.get('drillsrc')); + if (p.has('q') && searchInput) { + searchInput.value = p.get('q'); + runSearch(); + } + } + + restoreHash(); })();