feat(corpus): category filter, passage expand, drill enhancements, URL hash state

- Search: category filter pills scope results to a legal domain
- Search: full chunk text returned; click to expand inline beyond 600-char excerpt
- Drill panel: total count label ("Showing X of Y"), sort dropdown, title filter (300ms debounce)
- URL hash: preserves query/mode/lang/category/drill state for bookmarking

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-18 15:47:56 +02:00
parent ffcf887428
commit 2e2dfd7310
3 changed files with 208 additions and 42 deletions
+15 -1
View File
@@ -12,8 +12,15 @@ try {
$category = isset($_GET['category']) && $_GET['category'] !== '' ? trim((string)$_GET['category']) : null;
$sourceName = isset($_GET['source_name']) && $_GET['source_name'] !== '' ? trim((string)$_GET['source_name']) : null;
$titleFilter = isset($_GET['title']) && $_GET['title'] !== '' ? trim((string)$_GET['title']) : null;
$offset = max(0, (int)($_GET['offset'] ?? 0));
$limit = max(1, min(50, (int)($_GET['limit'] ?? 20)));
$orderBy = match($_GET['sort'] ?? 'newest') {
'oldest' => 'd.updated_at ASC',
'alpha' => 'd.title ASC',
'chunks' => 'chunk_count DESC',
default => 'd.updated_at DESC',
};
// Build WHERE clause
$where = ["d.corpus_id = 1", "d.status = 'ready'"];
@@ -24,6 +31,11 @@ try {
$params[] = $category;
}
if ($titleFilter !== null) {
$where[] = 'd.title LIKE ?';
$params[] = '%' . str_replace(['%', '_'], ['\\%', '\\_'], $titleFilter) . '%';
}
if ($sourceName !== null) {
// Filter by source via a JOIN to corpus_sources on category match
// or by matching the scraper's URL pattern in source_url
@@ -61,7 +73,7 @@ try {
LEFT JOIN chunks c ON c.document_id = d.id
WHERE $whereStr
GROUP BY d.id
ORDER BY d.updated_at DESC
ORDER BY $orderBy
LIMIT $limit OFFSET $offset"
);
$dataStmt->execute($params);
@@ -82,6 +94,8 @@ try {
'filter' => [
'category' => $category,
'source_name' => $sourceName,
'title' => $titleFilter,
'sort' => $_GET['sort'] ?? 'newest',
],
]);
} catch (Throwable $e) {
+4
View File
@@ -29,6 +29,7 @@ try {
'category' => $h['category'] ?? '',
'section' => $h['section'] ?? null,
'excerpt' => $h['excerpt'] ?? ($h['chunk_text'] ?? ''),
'full_text' => $h['full_text'] ?? $h['chunk_text'] ?? $h['excerpt'] ?? '',
'score' => $h['score'] ?? null,
'document_id' => $h['document_id'] ?? null,
'chunk_id' => $h['chunk_id'] ?? null,
@@ -93,6 +94,7 @@ try {
'category' => $r['category'] ?? '',
'section' => $r['section'] ?? null,
'excerpt' => mb_substr((string)($r['excerpt'] ?? ''), 0, 600, 'UTF-8'),
'full_text' => (string)($r['excerpt'] ?? ''),
'score' => isset($r['score']) ? round((float)$r['score'], 4) : null,
'document_id' => (int)$r['document_id'],
'chunk_id' => isset($r['chunk_id']) ? (int)$r['chunk_id'] : null,
@@ -148,6 +150,7 @@ try {
'category' => $p['category'] ?? '',
'section' => $p['section_title'] ?? null,
'excerpt' => mb_substr((string)($p['content'] ?? ''), 0, 600, 'UTF-8'),
'full_text' => (string)($p['content'] ?? ''),
'score' => round((float)($pt['score'] ?? 0), 4),
'document_id' => isset($p['document_id']) ? (int)$p['document_id'] : null,
'chunk_id' => $pt['id'] ?? null,
@@ -234,6 +237,7 @@ try {
'category' => $d['category'] ?? '',
'section' => $d['section_title'] ?? null,
'excerpt' => mb_substr((string)($d['content'] ?? ''), 0, 600, 'UTF-8'),
'full_text' => (string)($d['content'] ?? ''),
'score' => round((float)($d['@search.rerankerScore'] ?? $d['@search.score'] ?? 0), 4),
'document_id' => null,
'chunk_id' => $d['chunk_id'] ?? $d['id'] ?? null,
+189 -41
View File
@@ -78,6 +78,18 @@ require_once __DIR__ . '/includes/layout.php';
<button class="mode-pill" data-lang="pl" type="button">PL</button>
</div>
</div>
<div class="search-cats" role="group" aria-label="Category filter" id="searchCatPills">
<button class="mode-pill is-active" data-cat="" type="button">All</button>
<button class="mode-pill" data-cat="family-law" type="button">Family Law</button>
<button class="mode-pill" data-cat="child-welfare" type="button">Child Welfare</button>
<button class="mode-pill" data-cat="labour-law" type="button">Labour Law</button>
<button class="mode-pill" data-cat="social-welfare" type="button">Social Welfare</button>
<button class="mode-pill" data-cat="tax-law" type="button">Tax Law</button>
<button class="mode-pill" data-cat="administrative-law" type="button">Administrative</button>
<button class="mode-pill" data-cat="consumer-law" type="button">Consumer</button>
<button class="mode-pill" data-cat="immigration-law" type="button">Immigration</button>
<button class="mode-pill" data-cat="government-documents" type="button">Gov Docs</button>
</div>
</div>
<div id="corpusSearchResults" class="corpus-search-results" hidden></div>
@@ -178,6 +190,19 @@ require_once __DIR__ . '/includes/layout.php';
</div>
<button class="drill-close-btn" id="drillCloseBtn" type="button" aria-label="Close">✕</button>
</div>
<div class="drill-controls" id="drillControls">
<span class="drill-count" id="drillCount"></span>
<div class="drill-controls-right">
<input type="search" id="drillSearchInput" class="drill-search-input"
placeholder="Filter by title…" autocomplete="off">
<select id="drillSortSelect" class="drill-sort-select">
<option value="newest">Newest first</option>
<option value="oldest">Oldest first</option>
<option value="alpha">AZ</option>
<option value="chunks">Most passages</option>
</select>
</div>
</div>
<div id="drillDocList" class="doc-list"></div>
<div class="doc-list__more-wrap" id="drillMoreWrap" hidden>
<button class="doc-list__more" id="drillMoreBtn" type="button">Load more</button>
@@ -556,45 +581,58 @@ require_once __DIR__ . '/includes/layout.php';
}
// ── Category drill-down ───────────────────────────────────────────────────
let drillState = { category: null, sourceName: null, offset: 0, total: 0, limit: 20 };
const drillPanel = document.getElementById('corpusDrillPanel');
const drillDocList = document.getElementById('drillDocList');
const drillTitle = document.getElementById('drillTitle');
const drillEyebrow = document.getElementById('drillEyebrow');
const drillMoreWrap = document.getElementById('drillMoreWrap');
const drillMoreBtn = document.getElementById('drillMoreBtn');
const drillCloseBtn = document.getElementById('drillCloseBtn');
let drillState = { category: null, sourceName: null, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
const drillPanel = document.getElementById('corpusDrillPanel');
const drillDocList = document.getElementById('drillDocList');
const drillTitle = document.getElementById('drillTitle');
const drillEyebrow = document.getElementById('drillEyebrow');
const drillMoreWrap = document.getElementById('drillMoreWrap');
const drillMoreBtn = document.getElementById('drillMoreBtn');
const drillCloseBtn = document.getElementById('drillCloseBtn');
const drillCount = document.getElementById('drillCount');
const drillSortSelect = document.getElementById('drillSortSelect');
const drillSearchInput = document.getElementById('drillSearchInput');
document.querySelectorAll('.cat-browse-btn').forEach(btn => {
btn.addEventListener('click', () => openDrillByCategory(btn.dataset.cat));
});
function openDrillByCategory(cat) {
drillState = { category: cat, sourceName: null, offset: 0, total: 0, limit: 20 };
drillEyebrow.textContent = 'Category';
drillTitle.textContent = catLabels[cat] || cat;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillPanel.hidden = false;
drillState = { category: cat, sourceName: null, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
drillEyebrow.textContent = 'Category';
drillTitle.textContent = catLabels[cat] || cat;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillCount.textContent = '';
if (drillSortSelect) drillSortSelect.value = 'newest';
if (drillSearchInput) drillSearchInput.value = '';
drillPanel.hidden = false;
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
pushHash();
fetchDrillPage(false);
}
function openDrillBySource(sourceName) {
drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20 };
drillEyebrow.textContent = 'Source';
drillTitle.textContent = sourceName;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillPanel.hidden = false;
drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
drillEyebrow.textContent = 'Source';
drillTitle.textContent = sourceName;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillCount.textContent = '';
if (drillSortSelect) drillSortSelect.value = 'newest';
if (drillSearchInput) drillSearchInput.value = '';
drillPanel.hidden = false;
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
pushHash();
fetchDrillPage(false);
}
function fetchDrillPage(append) {
const qs = new URLSearchParams({ offset: drillState.offset, limit: drillState.limit });
if (drillState.category) qs.set('category', drillState.category);
if (drillState.sourceName) qs.set('source_name', drillState.sourceName);
if (drillState.category) qs.set('category', drillState.category);
if (drillState.sourceName) qs.set('source_name', drillState.sourceName);
if (drillState.sort) qs.set('sort', drillState.sort);
if (drillState.titleFilter) qs.set('title', drillState.titleFilter);
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
.then(r => r.json())
@@ -637,6 +675,11 @@ require_once __DIR__ . '/includes/layout.php';
const loaded = drillState.offset + docs.length;
drillMoreWrap.hidden = loaded >= drillState.total;
drillState.offset = loaded;
if (drillCount) {
drillCount.textContent = drillState.total > 0
? 'Showing ' + fmt(loaded) + ' of ' + fmt(drillState.total) + ' documents'
: '';
}
})
.catch(() => {
if (!append) drillDocList.innerHTML = '<p class="drill-error">Network error.</p>';
@@ -644,17 +687,43 @@ require_once __DIR__ . '/includes/layout.php';
}
drillMoreBtn.addEventListener('click', () => fetchDrillPage(true));
drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; });
drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; pushHash(); });
if (drillSortSelect) {
drillSortSelect.addEventListener('change', () => {
drillState.sort = drillSortSelect.value;
drillState.offset = 0;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillCount.textContent = '';
fetchDrillPage(false);
});
}
let drillFilterTimer = null;
if (drillSearchInput) {
drillSearchInput.addEventListener('input', () => {
clearTimeout(drillFilterTimer);
drillFilterTimer = setTimeout(() => {
drillState.titleFilter = drillSearchInput.value.trim();
drillState.offset = 0;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillCount.textContent = '';
fetchDrillPage(false);
}, 300);
});
}
// ── Search bar ────────────────────────────────────────────────────────────
let searchMode = 'hybrid';
let searchLang = 'en';
let searchCat = '';
document.querySelectorAll('.search-modes .mode-pill').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.search-modes .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchMode = btn.dataset.mode;
pushHash();
});
});
@@ -663,6 +732,16 @@ require_once __DIR__ . '/includes/layout.php';
document.querySelectorAll('.lang-pills .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchLang = btn.dataset.lang;
pushHash();
});
});
document.querySelectorAll('#searchCatPills .mode-pill').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('#searchCatPills .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchCat = btn.dataset.cat;
pushHash();
});
});
@@ -678,6 +757,7 @@ require_once __DIR__ . '/includes/layout.php';
return;
}
pushHash();
searchResults.hidden = false;
searchResults.innerHTML = `<p class="search-loading">Searching in <strong>${esc(searchMode)}</strong> mode…</p>`;
searchBtn.disabled = true;
@@ -686,7 +766,7 @@ require_once __DIR__ . '/includes/layout.php';
method: 'POST',
credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8 }),
body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8, category: searchCat || null }),
})
.then(r => r.json())
.then(data => {
@@ -702,29 +782,50 @@ require_once __DIR__ . '/includes/layout.php';
}
const modeLabel = { hybrid: 'Hybrid RAG', bm25: 'BM25 keyword', vector: 'Vector semantic' }[data.mode] || data.mode;
let html = `<div class="search-results-header"><span class="eyebrow">${esc(modeLabel)}</span><span class="search-results-count">${hits.length} passage${hits.length !== 1 ? 's' : ''}</span></div>`;
const header = document.createElement('div');
header.className = 'search-results-header';
header.innerHTML = `<span class="eyebrow">${esc(modeLabel)}</span><span class="search-results-count">${hits.length} passage${hits.length !== 1 ? 's' : ''}</span>`;
searchResults.innerHTML = '';
searchResults.appendChild(header);
hits.forEach(hit => {
const score = hit.score != null ? `<span class="passage-score">${Math.round(hit.score * 100)}%</span>` : '';
const catAuth = authorityLabels[hit.category] || { label: hit.category || '—', cls: 'badge--muted' };
const score = hit.score != null ? `<span class="passage-score">${Math.round(hit.score * 100)}%</span>` : '';
const catAuth = authorityLabels[hit.category] || { label: hit.category || '—', cls: 'badge--muted' };
const titleHtml = hit.source_url
? `<a href="${esc(hit.source_url)}" target="_blank" rel="noopener" class="passage-card__title">${esc(hit.title || '(Untitled)')}</a>`
: `<span class="passage-card__title">${esc(hit.title || '(Untitled)')}</span>`;
const section = hit.section ? `<span class="passage-section">§ ${esc(hit.section)}</span>` : '';
const excerpt = highlight(hit.excerpt || '', q);
html += `
<div class="passage-card">
<div class="passage-card__meta">
<span class="source-badge ${esc(catAuth.cls)}">${esc(catAuth.label)}</span>
${section}
${score}
</div>
${titleHtml}
<p class="passage-card__excerpt">${excerpt}</p>
</div>`;
const section = hit.section ? `<span class="passage-section">§ ${esc(hit.section)}</span>` : '';
const excerpt = highlight(hit.excerpt || '', q);
const fullText = (hit.full_text || '').trim();
const hasMore = fullText.length > (hit.excerpt || '').length;
const card = document.createElement('div');
card.className = 'passage-card';
card.innerHTML = `
<div class="passage-card__meta">
<span class="source-badge ${esc(catAuth.cls)}">${esc(catAuth.label)}</span>
${section}
${score}
</div>
${titleHtml}
<p class="passage-card__excerpt">${excerpt}</p>
${hasMore ? '<button class="passage-expand-btn" type="button" aria-expanded="false">Show full passage</button><div class="passage-full-text" hidden></div>' : ''}`;
if (hasMore) {
const expandBtn = card.querySelector('.passage-expand-btn');
const fullDiv = card.querySelector('.passage-full-text');
fullDiv.innerHTML = esc(fullText).replace(/\n/g, '<br>');
expandBtn.addEventListener('click', function () {
const isOpen = !fullDiv.hidden;
fullDiv.hidden = isOpen;
this.textContent = isOpen ? 'Show full passage' : 'Hide passage';
this.setAttribute('aria-expanded', String(!isOpen));
});
}
searchResults.appendChild(card);
});
searchResults.innerHTML = html;
})
.catch(err => {
.catch(() => {
searchBtn.disabled = false;
searchResults.innerHTML = `<p class="search-error">Network error.</p>`;
});
@@ -732,6 +833,53 @@ require_once __DIR__ . '/includes/layout.php';
searchBtn.addEventListener('click', runSearch);
searchInput.addEventListener('keydown', e => { if (e.key === 'Enter') runSearch(); });
// ── URL hash state ────────────────────────────────────────────────────────
function pushHash() {
const p = new URLSearchParams();
const q = searchInput ? searchInput.value.trim() : '';
if (q) p.set('q', q);
if (searchMode !== 'hybrid') p.set('mode', searchMode);
if (searchLang !== 'en') p.set('lang', searchLang);
if (searchCat) p.set('cat', searchCat);
if (drillPanel && !drillPanel.hidden) {
if (drillState.category) p.set('drill', drillState.category);
if (drillState.sourceName) p.set('drillsrc', drillState.sourceName);
}
const hash = p.toString();
history.replaceState(null, '', hash ? '#' + hash : location.pathname + location.search);
}
function activatePill(group, attr, value) {
document.querySelectorAll(group).forEach(b => {
b.classList.toggle('is-active', b.dataset[attr] === value);
});
}
function restoreHash() {
if (!location.hash) return;
const p = new URLSearchParams(location.hash.slice(1));
if (p.has('mode')) {
searchMode = p.get('mode');
activatePill('.search-modes .mode-pill', 'mode', searchMode);
}
if (p.has('lang')) {
searchLang = p.get('lang');
activatePill('.lang-pills .mode-pill', 'lang', searchLang);
}
if (p.has('cat')) {
searchCat = p.get('cat');
activatePill('#searchCatPills .mode-pill', 'cat', searchCat);
}
if (p.has('drill')) openDrillByCategory(p.get('drill'));
if (p.has('drillsrc')) openDrillBySource(p.get('drillsrc'));
if (p.has('q') && searchInput) {
searchInput.value = p.get('q');
runSearch();
}
}
restoreHash();
})();
</script>