feat(corpus): category filter, passage expand, drill enhancements, URL hash state
- Search: category filter pills scope results to a legal domain
- Search: full chunk text returned; click to expand inline beyond 600-char excerpt
- Drill panel: total count label ("Showing X of Y"), sort dropdown, title filter (300ms debounce)
- URL hash: preserves query/mode/lang/category/drill state for bookmarking
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -12,8 +12,15 @@ try {
|
||||
|
||||
$category = isset($_GET['category']) && $_GET['category'] !== '' ? trim((string)$_GET['category']) : null;
|
||||
$sourceName = isset($_GET['source_name']) && $_GET['source_name'] !== '' ? trim((string)$_GET['source_name']) : null;
|
||||
$titleFilter = isset($_GET['title']) && $_GET['title'] !== '' ? trim((string)$_GET['title']) : null;
|
||||
$offset = max(0, (int)($_GET['offset'] ?? 0));
|
||||
$limit = max(1, min(50, (int)($_GET['limit'] ?? 20)));
|
||||
$orderBy = match($_GET['sort'] ?? 'newest') {
|
||||
'oldest' => 'd.updated_at ASC',
|
||||
'alpha' => 'd.title ASC',
|
||||
'chunks' => 'chunk_count DESC',
|
||||
default => 'd.updated_at DESC',
|
||||
};
|
||||
|
||||
// Build WHERE clause
|
||||
$where = ["d.corpus_id = 1", "d.status = 'ready'"];
|
||||
@@ -24,6 +31,11 @@ try {
|
||||
$params[] = $category;
|
||||
}
|
||||
|
||||
if ($titleFilter !== null) {
|
||||
$where[] = 'd.title LIKE ?';
|
||||
$params[] = '%' . str_replace(['%', '_'], ['\\%', '\\_'], $titleFilter) . '%';
|
||||
}
|
||||
|
||||
if ($sourceName !== null) {
|
||||
// Filter by source via a JOIN to corpus_sources on category match
|
||||
// or by matching the scraper's URL pattern in source_url
|
||||
@@ -61,7 +73,7 @@ try {
|
||||
LEFT JOIN chunks c ON c.document_id = d.id
|
||||
WHERE $whereStr
|
||||
GROUP BY d.id
|
||||
ORDER BY d.updated_at DESC
|
||||
ORDER BY $orderBy
|
||||
LIMIT $limit OFFSET $offset"
|
||||
);
|
||||
$dataStmt->execute($params);
|
||||
@@ -82,6 +94,8 @@ try {
|
||||
'filter' => [
|
||||
'category' => $category,
|
||||
'source_name' => $sourceName,
|
||||
'title' => $titleFilter,
|
||||
'sort' => $_GET['sort'] ?? 'newest',
|
||||
],
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
|
||||
@@ -29,6 +29,7 @@ try {
|
||||
'category' => $h['category'] ?? '',
|
||||
'section' => $h['section'] ?? null,
|
||||
'excerpt' => $h['excerpt'] ?? ($h['chunk_text'] ?? ''),
|
||||
'full_text' => $h['full_text'] ?? $h['chunk_text'] ?? $h['excerpt'] ?? '',
|
||||
'score' => $h['score'] ?? null,
|
||||
'document_id' => $h['document_id'] ?? null,
|
||||
'chunk_id' => $h['chunk_id'] ?? null,
|
||||
@@ -93,6 +94,7 @@ try {
|
||||
'category' => $r['category'] ?? '',
|
||||
'section' => $r['section'] ?? null,
|
||||
'excerpt' => mb_substr((string)($r['excerpt'] ?? ''), 0, 600, 'UTF-8'),
|
||||
'full_text' => (string)($r['excerpt'] ?? ''),
|
||||
'score' => isset($r['score']) ? round((float)$r['score'], 4) : null,
|
||||
'document_id' => (int)$r['document_id'],
|
||||
'chunk_id' => isset($r['chunk_id']) ? (int)$r['chunk_id'] : null,
|
||||
@@ -148,6 +150,7 @@ try {
|
||||
'category' => $p['category'] ?? '',
|
||||
'section' => $p['section_title'] ?? null,
|
||||
'excerpt' => mb_substr((string)($p['content'] ?? ''), 0, 600, 'UTF-8'),
|
||||
'full_text' => (string)($p['content'] ?? ''),
|
||||
'score' => round((float)($pt['score'] ?? 0), 4),
|
||||
'document_id' => isset($p['document_id']) ? (int)$p['document_id'] : null,
|
||||
'chunk_id' => $pt['id'] ?? null,
|
||||
@@ -234,6 +237,7 @@ try {
|
||||
'category' => $d['category'] ?? '',
|
||||
'section' => $d['section_title'] ?? null,
|
||||
'excerpt' => mb_substr((string)($d['content'] ?? ''), 0, 600, 'UTF-8'),
|
||||
'full_text' => (string)($d['content'] ?? ''),
|
||||
'score' => round((float)($d['@search.rerankerScore'] ?? $d['@search.score'] ?? 0), 4),
|
||||
'document_id' => null,
|
||||
'chunk_id' => $d['chunk_id'] ?? $d['id'] ?? null,
|
||||
|
||||
+189
-41
@@ -78,6 +78,18 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
<button class="mode-pill" data-lang="pl" type="button">PL</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="search-cats" role="group" aria-label="Category filter" id="searchCatPills">
|
||||
<button class="mode-pill is-active" data-cat="" type="button">All</button>
|
||||
<button class="mode-pill" data-cat="family-law" type="button">Family Law</button>
|
||||
<button class="mode-pill" data-cat="child-welfare" type="button">Child Welfare</button>
|
||||
<button class="mode-pill" data-cat="labour-law" type="button">Labour Law</button>
|
||||
<button class="mode-pill" data-cat="social-welfare" type="button">Social Welfare</button>
|
||||
<button class="mode-pill" data-cat="tax-law" type="button">Tax Law</button>
|
||||
<button class="mode-pill" data-cat="administrative-law" type="button">Administrative</button>
|
||||
<button class="mode-pill" data-cat="consumer-law" type="button">Consumer</button>
|
||||
<button class="mode-pill" data-cat="immigration-law" type="button">Immigration</button>
|
||||
<button class="mode-pill" data-cat="government-documents" type="button">Gov Docs</button>
|
||||
</div>
|
||||
</div>
|
||||
<div id="corpusSearchResults" class="corpus-search-results" hidden></div>
|
||||
|
||||
@@ -178,6 +190,19 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
</div>
|
||||
<button class="drill-close-btn" id="drillCloseBtn" type="button" aria-label="Close">✕</button>
|
||||
</div>
|
||||
<div class="drill-controls" id="drillControls">
|
||||
<span class="drill-count" id="drillCount"></span>
|
||||
<div class="drill-controls-right">
|
||||
<input type="search" id="drillSearchInput" class="drill-search-input"
|
||||
placeholder="Filter by title…" autocomplete="off">
|
||||
<select id="drillSortSelect" class="drill-sort-select">
|
||||
<option value="newest">Newest first</option>
|
||||
<option value="oldest">Oldest first</option>
|
||||
<option value="alpha">A–Z</option>
|
||||
<option value="chunks">Most passages</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div id="drillDocList" class="doc-list"></div>
|
||||
<div class="doc-list__more-wrap" id="drillMoreWrap" hidden>
|
||||
<button class="doc-list__more" id="drillMoreBtn" type="button">Load more</button>
|
||||
@@ -556,45 +581,58 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
}
|
||||
|
||||
// ── Category drill-down ───────────────────────────────────────────────────
|
||||
let drillState = { category: null, sourceName: null, offset: 0, total: 0, limit: 20 };
|
||||
const drillPanel = document.getElementById('corpusDrillPanel');
|
||||
const drillDocList = document.getElementById('drillDocList');
|
||||
const drillTitle = document.getElementById('drillTitle');
|
||||
const drillEyebrow = document.getElementById('drillEyebrow');
|
||||
const drillMoreWrap = document.getElementById('drillMoreWrap');
|
||||
const drillMoreBtn = document.getElementById('drillMoreBtn');
|
||||
const drillCloseBtn = document.getElementById('drillCloseBtn');
|
||||
let drillState = { category: null, sourceName: null, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
|
||||
const drillPanel = document.getElementById('corpusDrillPanel');
|
||||
const drillDocList = document.getElementById('drillDocList');
|
||||
const drillTitle = document.getElementById('drillTitle');
|
||||
const drillEyebrow = document.getElementById('drillEyebrow');
|
||||
const drillMoreWrap = document.getElementById('drillMoreWrap');
|
||||
const drillMoreBtn = document.getElementById('drillMoreBtn');
|
||||
const drillCloseBtn = document.getElementById('drillCloseBtn');
|
||||
const drillCount = document.getElementById('drillCount');
|
||||
const drillSortSelect = document.getElementById('drillSortSelect');
|
||||
const drillSearchInput = document.getElementById('drillSearchInput');
|
||||
|
||||
document.querySelectorAll('.cat-browse-btn').forEach(btn => {
|
||||
btn.addEventListener('click', () => openDrillByCategory(btn.dataset.cat));
|
||||
});
|
||||
|
||||
function openDrillByCategory(cat) {
|
||||
drillState = { category: cat, sourceName: null, offset: 0, total: 0, limit: 20 };
|
||||
drillEyebrow.textContent = 'Category';
|
||||
drillTitle.textContent = catLabels[cat] || cat;
|
||||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||||
drillMoreWrap.hidden = true;
|
||||
drillPanel.hidden = false;
|
||||
drillState = { category: cat, sourceName: null, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
|
||||
drillEyebrow.textContent = 'Category';
|
||||
drillTitle.textContent = catLabels[cat] || cat;
|
||||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||||
drillMoreWrap.hidden = true;
|
||||
drillCount.textContent = '';
|
||||
if (drillSortSelect) drillSortSelect.value = 'newest';
|
||||
if (drillSearchInput) drillSearchInput.value = '';
|
||||
drillPanel.hidden = false;
|
||||
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||||
pushHash();
|
||||
fetchDrillPage(false);
|
||||
}
|
||||
|
||||
function openDrillBySource(sourceName) {
|
||||
drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20 };
|
||||
drillEyebrow.textContent = 'Source';
|
||||
drillTitle.textContent = sourceName;
|
||||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||||
drillMoreWrap.hidden = true;
|
||||
drillPanel.hidden = false;
|
||||
drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
|
||||
drillEyebrow.textContent = 'Source';
|
||||
drillTitle.textContent = sourceName;
|
||||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||||
drillMoreWrap.hidden = true;
|
||||
drillCount.textContent = '';
|
||||
if (drillSortSelect) drillSortSelect.value = 'newest';
|
||||
if (drillSearchInput) drillSearchInput.value = '';
|
||||
drillPanel.hidden = false;
|
||||
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||||
pushHash();
|
||||
fetchDrillPage(false);
|
||||
}
|
||||
|
||||
function fetchDrillPage(append) {
|
||||
const qs = new URLSearchParams({ offset: drillState.offset, limit: drillState.limit });
|
||||
if (drillState.category) qs.set('category', drillState.category);
|
||||
if (drillState.sourceName) qs.set('source_name', drillState.sourceName);
|
||||
if (drillState.category) qs.set('category', drillState.category);
|
||||
if (drillState.sourceName) qs.set('source_name', drillState.sourceName);
|
||||
if (drillState.sort) qs.set('sort', drillState.sort);
|
||||
if (drillState.titleFilter) qs.set('title', drillState.titleFilter);
|
||||
|
||||
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
|
||||
.then(r => r.json())
|
||||
@@ -637,6 +675,11 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
const loaded = drillState.offset + docs.length;
|
||||
drillMoreWrap.hidden = loaded >= drillState.total;
|
||||
drillState.offset = loaded;
|
||||
if (drillCount) {
|
||||
drillCount.textContent = drillState.total > 0
|
||||
? 'Showing ' + fmt(loaded) + ' of ' + fmt(drillState.total) + ' documents'
|
||||
: '';
|
||||
}
|
||||
})
|
||||
.catch(() => {
|
||||
if (!append) drillDocList.innerHTML = '<p class="drill-error">Network error.</p>';
|
||||
@@ -644,17 +687,43 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
}
|
||||
|
||||
drillMoreBtn.addEventListener('click', () => fetchDrillPage(true));
|
||||
drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; });
|
||||
drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; pushHash(); });
|
||||
|
||||
if (drillSortSelect) {
|
||||
drillSortSelect.addEventListener('change', () => {
|
||||
drillState.sort = drillSortSelect.value;
|
||||
drillState.offset = 0;
|
||||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||||
drillCount.textContent = '';
|
||||
fetchDrillPage(false);
|
||||
});
|
||||
}
|
||||
|
||||
let drillFilterTimer = null;
|
||||
if (drillSearchInput) {
|
||||
drillSearchInput.addEventListener('input', () => {
|
||||
clearTimeout(drillFilterTimer);
|
||||
drillFilterTimer = setTimeout(() => {
|
||||
drillState.titleFilter = drillSearchInput.value.trim();
|
||||
drillState.offset = 0;
|
||||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||||
drillCount.textContent = '';
|
||||
fetchDrillPage(false);
|
||||
}, 300);
|
||||
});
|
||||
}
|
||||
|
||||
// ── Search bar ────────────────────────────────────────────────────────────
|
||||
let searchMode = 'hybrid';
|
||||
let searchLang = 'en';
|
||||
let searchCat = '';
|
||||
|
||||
document.querySelectorAll('.search-modes .mode-pill').forEach(btn => {
|
||||
btn.addEventListener('click', () => {
|
||||
document.querySelectorAll('.search-modes .mode-pill').forEach(b => b.classList.remove('is-active'));
|
||||
btn.classList.add('is-active');
|
||||
searchMode = btn.dataset.mode;
|
||||
pushHash();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -663,6 +732,16 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
document.querySelectorAll('.lang-pills .mode-pill').forEach(b => b.classList.remove('is-active'));
|
||||
btn.classList.add('is-active');
|
||||
searchLang = btn.dataset.lang;
|
||||
pushHash();
|
||||
});
|
||||
});
|
||||
|
||||
document.querySelectorAll('#searchCatPills .mode-pill').forEach(btn => {
|
||||
btn.addEventListener('click', () => {
|
||||
document.querySelectorAll('#searchCatPills .mode-pill').forEach(b => b.classList.remove('is-active'));
|
||||
btn.classList.add('is-active');
|
||||
searchCat = btn.dataset.cat;
|
||||
pushHash();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -678,6 +757,7 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
return;
|
||||
}
|
||||
|
||||
pushHash();
|
||||
searchResults.hidden = false;
|
||||
searchResults.innerHTML = `<p class="search-loading">Searching in <strong>${esc(searchMode)}</strong> mode…</p>`;
|
||||
searchBtn.disabled = true;
|
||||
@@ -686,7 +766,7 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
method: 'POST',
|
||||
credentials: 'same-origin',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8 }),
|
||||
body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8, category: searchCat || null }),
|
||||
})
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
@@ -702,29 +782,50 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
}
|
||||
|
||||
const modeLabel = { hybrid: 'Hybrid RAG', bm25: 'BM25 keyword', vector: 'Vector semantic' }[data.mode] || data.mode;
|
||||
let html = `<div class="search-results-header"><span class="eyebrow">${esc(modeLabel)}</span><span class="search-results-count">${hits.length} passage${hits.length !== 1 ? 's' : ''}</span></div>`;
|
||||
const header = document.createElement('div');
|
||||
header.className = 'search-results-header';
|
||||
header.innerHTML = `<span class="eyebrow">${esc(modeLabel)}</span><span class="search-results-count">${hits.length} passage${hits.length !== 1 ? 's' : ''}</span>`;
|
||||
searchResults.innerHTML = '';
|
||||
searchResults.appendChild(header);
|
||||
|
||||
hits.forEach(hit => {
|
||||
const score = hit.score != null ? `<span class="passage-score">${Math.round(hit.score * 100)}%</span>` : '';
|
||||
const catAuth = authorityLabels[hit.category] || { label: hit.category || '—', cls: 'badge--muted' };
|
||||
const score = hit.score != null ? `<span class="passage-score">${Math.round(hit.score * 100)}%</span>` : '';
|
||||
const catAuth = authorityLabels[hit.category] || { label: hit.category || '—', cls: 'badge--muted' };
|
||||
const titleHtml = hit.source_url
|
||||
? `<a href="${esc(hit.source_url)}" target="_blank" rel="noopener" class="passage-card__title">${esc(hit.title || '(Untitled)')}</a>`
|
||||
: `<span class="passage-card__title">${esc(hit.title || '(Untitled)')}</span>`;
|
||||
const section = hit.section ? `<span class="passage-section">§ ${esc(hit.section)}</span>` : '';
|
||||
const excerpt = highlight(hit.excerpt || '', q);
|
||||
html += `
|
||||
<div class="passage-card">
|
||||
<div class="passage-card__meta">
|
||||
<span class="source-badge ${esc(catAuth.cls)}">${esc(catAuth.label)}</span>
|
||||
${section}
|
||||
${score}
|
||||
</div>
|
||||
${titleHtml}
|
||||
<p class="passage-card__excerpt">${excerpt}</p>
|
||||
</div>`;
|
||||
const section = hit.section ? `<span class="passage-section">§ ${esc(hit.section)}</span>` : '';
|
||||
const excerpt = highlight(hit.excerpt || '', q);
|
||||
const fullText = (hit.full_text || '').trim();
|
||||
const hasMore = fullText.length > (hit.excerpt || '').length;
|
||||
|
||||
const card = document.createElement('div');
|
||||
card.className = 'passage-card';
|
||||
card.innerHTML = `
|
||||
<div class="passage-card__meta">
|
||||
<span class="source-badge ${esc(catAuth.cls)}">${esc(catAuth.label)}</span>
|
||||
${section}
|
||||
${score}
|
||||
</div>
|
||||
${titleHtml}
|
||||
<p class="passage-card__excerpt">${excerpt}</p>
|
||||
${hasMore ? '<button class="passage-expand-btn" type="button" aria-expanded="false">Show full passage</button><div class="passage-full-text" hidden></div>' : ''}`;
|
||||
|
||||
if (hasMore) {
|
||||
const expandBtn = card.querySelector('.passage-expand-btn');
|
||||
const fullDiv = card.querySelector('.passage-full-text');
|
||||
fullDiv.innerHTML = esc(fullText).replace(/\n/g, '<br>');
|
||||
expandBtn.addEventListener('click', function () {
|
||||
const isOpen = !fullDiv.hidden;
|
||||
fullDiv.hidden = isOpen;
|
||||
this.textContent = isOpen ? 'Show full passage' : 'Hide passage';
|
||||
this.setAttribute('aria-expanded', String(!isOpen));
|
||||
});
|
||||
}
|
||||
searchResults.appendChild(card);
|
||||
});
|
||||
searchResults.innerHTML = html;
|
||||
})
|
||||
.catch(err => {
|
||||
.catch(() => {
|
||||
searchBtn.disabled = false;
|
||||
searchResults.innerHTML = `<p class="search-error">Network error.</p>`;
|
||||
});
|
||||
@@ -732,6 +833,53 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
|
||||
searchBtn.addEventListener('click', runSearch);
|
||||
searchInput.addEventListener('keydown', e => { if (e.key === 'Enter') runSearch(); });
|
||||
|
||||
// ── URL hash state ────────────────────────────────────────────────────────
|
||||
function pushHash() {
|
||||
const p = new URLSearchParams();
|
||||
const q = searchInput ? searchInput.value.trim() : '';
|
||||
if (q) p.set('q', q);
|
||||
if (searchMode !== 'hybrid') p.set('mode', searchMode);
|
||||
if (searchLang !== 'en') p.set('lang', searchLang);
|
||||
if (searchCat) p.set('cat', searchCat);
|
||||
if (drillPanel && !drillPanel.hidden) {
|
||||
if (drillState.category) p.set('drill', drillState.category);
|
||||
if (drillState.sourceName) p.set('drillsrc', drillState.sourceName);
|
||||
}
|
||||
const hash = p.toString();
|
||||
history.replaceState(null, '', hash ? '#' + hash : location.pathname + location.search);
|
||||
}
|
||||
|
||||
function activatePill(group, attr, value) {
|
||||
document.querySelectorAll(group).forEach(b => {
|
||||
b.classList.toggle('is-active', b.dataset[attr] === value);
|
||||
});
|
||||
}
|
||||
|
||||
function restoreHash() {
|
||||
if (!location.hash) return;
|
||||
const p = new URLSearchParams(location.hash.slice(1));
|
||||
if (p.has('mode')) {
|
||||
searchMode = p.get('mode');
|
||||
activatePill('.search-modes .mode-pill', 'mode', searchMode);
|
||||
}
|
||||
if (p.has('lang')) {
|
||||
searchLang = p.get('lang');
|
||||
activatePill('.lang-pills .mode-pill', 'lang', searchLang);
|
||||
}
|
||||
if (p.has('cat')) {
|
||||
searchCat = p.get('cat');
|
||||
activatePill('#searchCatPills .mode-pill', 'cat', searchCat);
|
||||
}
|
||||
if (p.has('drill')) openDrillByCategory(p.get('drill'));
|
||||
if (p.has('drillsrc')) openDrillBySource(p.get('drillsrc'));
|
||||
if (p.has('q') && searchInput) {
|
||||
searchInput.value = p.get('q');
|
||||
runSearch();
|
||||
}
|
||||
}
|
||||
|
||||
restoreHash();
|
||||
})();
|
||||
</script>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user