Add corpus explorer: search bar (Hybrid/BM25/Vector), category drill-down, source row expand

- api/corpus-search.php: new endpoint with three search modes (hybrid RAG, BM25 keyword, Qdrant vector)
- api/corpus-documents.php: paginated document browser by category or source name
- corpus.php: search bar with mode+language pills, Browse docs button on each category card with drill-down panel, expand toggle on each source row showing doc count and scraper class
- tools.css: all new corpus interactive styles appended

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-15 11:55:54 +02:00
parent 785de04f05
commit 38255669a9
4 changed files with 962 additions and 42 deletions
+92
View File
@@ -0,0 +1,92 @@
<?php
declare(strict_types=1);
require_once __DIR__ . '/../includes/bootstrap.php';
dbnToolsRequireMethod('GET');
dbnToolsRequireAuth();
try {
$ragDb = dbnToolsRagDb();
$bnlDb = dbnToolsDb();
$category = isset($_GET['category']) && $_GET['category'] !== '' ? trim((string)$_GET['category']) : null;
$sourceName = isset($_GET['source_name']) && $_GET['source_name'] !== '' ? trim((string)$_GET['source_name']) : null;
$offset = max(0, (int)($_GET['offset'] ?? 0));
$limit = max(1, min(50, (int)($_GET['limit'] ?? 20)));
// Build WHERE clause
$where = ["d.corpus_id = 1", "d.status = 'ready'"];
$params = [];
if ($category !== null) {
$where[] = 'd.category = ?';
$params[] = $category;
}
if ($sourceName !== null) {
// Filter by source via a JOIN to corpus_sources on category match
// or by matching the scraper's URL pattern in source_url
// We join bnl_admin.corpus_sources — but that's a different DB.
// Simplest: filter documents whose source_url LIKE the source's url.
// Fetch the source URL from bnl_admin first.
$srcStmt = $bnlDb->prepare(
"SELECT url FROM corpus_sources WHERE corpus_id = 1 AND name = ? LIMIT 1"
);
$srcStmt->execute([$sourceName]);
$srcRow = $srcStmt->fetch(PDO::FETCH_ASSOC);
if ($srcRow && !empty($srcRow['url'])) {
$parsed = parse_url($srcRow['url']);
$host = $parsed['host'] ?? '';
if ($host !== '') {
$where[] = "d.source_url LIKE ?";
$params[] = '%' . $host . '%';
}
}
}
$whereStr = implode(' AND ', $where);
// Total count
$countParams = $params;
$countStmt = $ragDb->prepare("SELECT COUNT(*) FROM documents d WHERE $whereStr");
$countStmt->execute($countParams);
$total = (int)$countStmt->fetchColumn();
// Paginated rows
$dataParams = $params;
$dataParams[] = $limit;
$dataParams[] = $offset;
$dataStmt = $ragDb->prepare(
"SELECT d.id, d.title, d.category, d.source_url, d.language, d.updated_at,
COUNT(c.id) AS chunk_count
FROM documents d
LEFT JOIN chunks c ON c.document_id = d.id
WHERE $whereStr
GROUP BY d.id
ORDER BY d.updated_at DESC
LIMIT ? OFFSET ?"
);
$dataStmt->execute($dataParams);
$documents = $dataStmt->fetchAll(PDO::FETCH_ASSOC);
// Normalise chunk_count to int
foreach ($documents as &$doc) {
$doc['chunk_count'] = (int)$doc['chunk_count'];
}
unset($doc);
dbnToolsRespond([
'ok' => true,
'documents' => $documents,
'total' => $total,
'offset' => $offset,
'limit' => $limit,
'filter' => [
'category' => $category,
'source_name' => $sourceName,
],
]);
} catch (Throwable $e) {
dbnToolsError('Could not load documents: ' . $e->getMessage(), 500, 'documents_error');
}
+163
View File
@@ -0,0 +1,163 @@
<?php
declare(strict_types=1);
require_once __DIR__ . '/../includes/LegalTools.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
$input = dbnToolsJsonInput(4000);
$query = trim(dbnToolsString($input, 'query', 1000));
$rawMode = $input['mode'] ?? 'hybrid';
$mode = in_array($rawMode, ['hybrid', 'bm25', 'vector'], true) ? $rawMode : 'hybrid';
$language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
$limit = max(1, min(20, (int)($input['limit'] ?? 8)));
$category = isset($input['category']) && $input['category'] !== '' ? trim((string)$input['category']) : null;
if (mb_strlen($query, 'UTF-8') < 3) {
dbnToolsError('Query must be at least 3 characters.', 422, 'query_too_short');
}
try {
// ── HYBRID: delegate to the existing RAG pipeline ──────────────────────
if ($mode === 'hybrid') {
$result = (new DbnLegalToolsService())->search($query, $language, $limit, 'disabled', null);
$hits = array_map(fn($h) => [
'title' => $h['title'] ?? '',
'category' => $h['category'] ?? '',
'section' => $h['section'] ?? null,
'excerpt' => $h['excerpt'] ?? ($h['chunk_text'] ?? ''),
'score' => $h['score'] ?? null,
'document_id' => $h['document_id'] ?? null,
'chunk_id' => $h['chunk_id'] ?? null,
'source_url' => $h['source_url'] ?? null,
'language' => null,
], $result['hits'] ?? []);
dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'hybrid', 'query' => $query]);
}
$ragDb = dbnToolsRagDb();
// ── BM25: FULLTEXT with LIKE fallback ───────────────────────────────────
if ($mode === 'bm25') {
$catClause = $category !== null ? ' AND d.category = ?' : '';
// Try FULLTEXT index first
try {
$sql = "SELECT d.id AS document_id, d.title, d.category,
d.source_url, c.id AS chunk_id, c.content AS excerpt,
c.section_title AS section, d.language,
MATCH(c.content) AGAINST (? IN BOOLEAN MODE) AS score
FROM chunks c
JOIN documents d ON c.document_id = d.id
WHERE d.corpus_id = ? AND d.status = 'ready'
AND MATCH(c.content) AGAINST (? IN BOOLEAN MODE) > 0
$catClause
ORDER BY score DESC
LIMIT ?";
$params = [$query, 1, $query];
if ($category !== null) $params[] = $category;
$params[] = $limit;
$stmt = $ragDb->prepare($sql);
$stmt->execute($params);
$rows = $stmt->fetchAll(PDO::FETCH_ASSOC);
} catch (Throwable $e) {
// FULLTEXT index absent — use LIKE
$like = '%' . str_replace(['%', '_'], ['\\%', '\\_'], $query) . '%';
$sql = "SELECT d.id AS document_id, d.title, d.category,
d.source_url, c.id AS chunk_id, c.content AS excerpt,
c.section_title AS section, d.language,
0.25 AS score
FROM chunks c
JOIN documents d ON c.document_id = d.id
WHERE d.corpus_id = ? AND d.status = 'ready'
AND (c.content LIKE ? OR d.title LIKE ?)
$catClause
ORDER BY (d.title LIKE ?) DESC
LIMIT ?";
$params = [1, $like, $like];
if ($category !== null) $params[] = $category;
$params[] = $like;
$params[] = $limit;
$stmt = $ragDb->prepare($sql);
$stmt->execute($params);
$rows = $stmt->fetchAll(PDO::FETCH_ASSOC);
}
$hits = array_map(fn($r) => [
'title' => $r['title'] ?? '',
'category' => $r['category'] ?? '',
'section' => $r['section'] ?? null,
'excerpt' => mb_substr((string)($r['excerpt'] ?? ''), 0, 600, 'UTF-8'),
'score' => isset($r['score']) ? round((float)$r['score'], 4) : null,
'document_id' => (int)$r['document_id'],
'chunk_id' => isset($r['chunk_id']) ? (int)$r['chunk_id'] : null,
'source_url' => $r['source_url'] ?? null,
'language' => $r['language'] ?? null,
], $rows);
dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'bm25', 'query' => $query]);
}
// ── VECTOR: embed → Qdrant ─────────────────────────────────────────────
if ($mode === 'vector') {
$embeddings = dbnToolsLiteLLMEmbedBatch([$query]);
if (empty($embeddings) || !is_array($embeddings[0])) {
dbnToolsError('Embedding failed — vector search unavailable.', 502, 'embed_error');
}
$filter = ['must' => [['key' => 'corpus_id', 'match' => ['value' => 1]]]];
if ($category !== null) {
$filter['must'][] = ['key' => 'category', 'match' => ['value' => $category]];
}
$qdrantPayload = json_encode([
'vector' => $embeddings[0],
'limit' => $limit,
'with_payload' => true,
'filter' => $filter,
]);
$ch = curl_init('http://10.0.2.10:6333/collections/bnl_chunks/points/search');
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $qdrantPayload,
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
CURLOPT_TIMEOUT => 15,
]);
$resp = curl_exec($ch);
$curlErr = curl_error($ch);
curl_close($ch);
if ($resp === false) {
dbnToolsError('Qdrant unreachable: ' . $curlErr, 502, 'qdrant_error');
}
$qdrantResult = json_decode($resp, true);
$points = $qdrantResult['result'] ?? [];
$hits = [];
foreach ($points as $pt) {
$p = $pt['payload'] ?? [];
$hits[] = [
'title' => $p['title'] ?? $p['document_title'] ?? '',
'category' => $p['category'] ?? '',
'section' => $p['section_title'] ?? null,
'excerpt' => mb_substr((string)($p['content'] ?? ''), 0, 600, 'UTF-8'),
'score' => round((float)($pt['score'] ?? 0), 4),
'document_id' => isset($p['document_id']) ? (int)$p['document_id'] : null,
'chunk_id' => $pt['id'] ?? null,
'source_url' => $p['source_url'] ?? null,
'language' => $p['language'] ?? null,
];
}
dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'vector', 'query' => $query]);
}
dbnToolsError('Unknown search mode.', 422, 'invalid_mode');
} catch (DbnToolsHttpException $e) {
throw $e;
} catch (Throwable $e) {
dbnToolsError('Corpus search failed: ' . $e->getMessage(), 500, 'search_error');
}
+344
View File
@@ -2708,3 +2708,347 @@ a.dr-source-title-link:hover {
color: var(--teal-dark); color: var(--teal-dark);
text-decoration: underline; text-decoration: underline;
} }
/* ── Corpus Explorer: Search bar ─────────────────────────────────────── */
.corpus-search-box {
padding: 0 0 24px;
border-bottom: 1px solid var(--line);
margin-bottom: 32px;
}
.corpus-search-row {
display: flex;
gap: 8px;
align-items: center;
}
.corpus-search-input {
flex: 1;
height: 40px;
padding: 0 12px;
border: 1px solid var(--line);
border-radius: 6px;
font-size: 0.9rem;
background: var(--panel);
color: var(--ink);
outline: none;
transition: border-color 0.15s;
}
.corpus-search-input:focus { border-color: var(--teal); }
.corpus-search-controls {
display: flex;
align-items: center;
justify-content: space-between;
margin-top: 10px;
gap: 12px;
flex-wrap: wrap;
}
.search-modes,
.lang-pills {
display: flex;
gap: 4px;
}
.mode-pill {
padding: 3px 12px;
border: 1px solid var(--line);
border-radius: 999px;
background: transparent;
color: var(--muted);
font-size: 0.8rem;
cursor: pointer;
transition: background 0.12s, color 0.12s, border-color 0.12s;
}
.mode-pill:hover { border-color: var(--teal); color: var(--teal); }
.mode-pill.is-active {
background: var(--teal);
border-color: var(--teal);
color: #fff;
}
/* ── Search results ───────────────────────────────────────────────────── */
.corpus-search-results {
margin: 0 0 32px;
}
.search-results-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 12px;
}
.search-results-count {
font-size: 0.8rem;
color: var(--muted);
}
.search-loading,
.search-empty,
.search-error,
.search-hint {
font-size: 0.85rem;
color: var(--muted);
padding: 12px 0;
}
.search-error { color: var(--coral); }
.passage-card {
background: var(--panel);
border: 1px solid var(--line);
border-left: 3px solid var(--teal);
border-radius: 6px;
padding: 14px 16px;
margin-bottom: 10px;
}
.passage-card__meta {
display: flex;
align-items: center;
gap: 8px;
margin-bottom: 6px;
flex-wrap: wrap;
}
.passage-section {
font-size: 0.78rem;
color: var(--muted);
font-style: italic;
}
.passage-score {
font-size: 0.75rem;
background: var(--soft-teal);
color: var(--teal-dark);
padding: 1px 7px;
border-radius: 999px;
margin-left: auto;
}
.passage-card__title {
display: block;
font-size: 0.88rem;
font-weight: 600;
color: var(--ink);
text-decoration: none;
margin-bottom: 6px;
}
.passage-card__title:hover { color: var(--teal); text-decoration: underline; }
.passage-card__excerpt {
font-size: 0.82rem;
color: var(--muted);
line-height: 1.55;
margin: 0;
display: -webkit-box;
-webkit-line-clamp: 4;
-webkit-box-orient: vertical;
overflow: hidden;
}
.passage-card mark {
background: var(--soft-teal);
color: var(--teal-dark);
border-radius: 2px;
padding: 0 1px;
}
/* ── Category card browse button ──────────────────────────────────────── */
.cat-browse-btn {
display: inline-block;
margin-top: 10px;
padding: 4px 12px;
background: var(--soft-teal);
border: 1px solid var(--teal);
border-radius: 5px;
color: var(--teal);
font-size: 0.8rem;
cursor: pointer;
transition: background 0.12s, color 0.12s;
}
.cat-browse-btn:hover {
background: var(--teal);
color: #fff;
}
/* ── Drill-down panel ─────────────────────────────────────────────────── */
.corpus-drill-panel {
margin-top: 20px;
background: var(--panel);
border: 1px solid var(--line);
border-top: 3px solid var(--teal);
border-radius: 0 0 8px 8px;
padding: 24px;
}
.drill-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: 20px;
}
.drill-header h3 {
margin: 4px 0 0;
font-size: 1.05rem;
}
.drill-close-btn {
background: transparent;
border: 1px solid var(--line);
border-radius: 50%;
width: 28px;
height: 28px;
cursor: pointer;
color: var(--muted);
font-size: 0.9rem;
display: flex;
align-items: center;
justify-content: center;
flex-shrink: 0;
}
.drill-close-btn:hover { border-color: var(--teal); color: var(--teal); }
.drill-loading,
.drill-empty,
.drill-error {
font-size: 0.85rem;
color: var(--muted);
padding: 8px 0;
}
.drill-error { color: var(--coral); }
.doc-list { display: flex; flex-direction: column; gap: 8px; }
.doc-list__item {
display: flex;
align-items: flex-start;
justify-content: space-between;
gap: 12px;
padding: 10px 12px;
background: var(--bg);
border: 1px solid var(--line);
border-radius: 5px;
}
.doc-list__item:hover { border-color: var(--teal); }
.doc-list__info { flex: 1; min-width: 0; }
.doc-list__title {
display: block;
font-size: 0.88rem;
font-weight: 500;
color: var(--ink);
text-decoration: none;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.doc-list__title:hover { color: var(--teal); text-decoration: underline; }
.doc-list__meta {
display: flex;
align-items: center;
gap: 8px;
margin-top: 4px;
flex-wrap: wrap;
}
.doc-list__date {
font-size: 0.78rem;
color: var(--muted);
}
.doc-list__chunks {
flex-shrink: 0;
font-size: 0.75rem;
background: var(--soft-teal);
color: var(--teal-dark);
padding: 2px 8px;
border-radius: 999px;
white-space: nowrap;
}
.doc-list__more-wrap {
text-align: center;
margin-top: 16px;
}
.doc-list__more {
padding: 7px 20px;
border: 1px solid var(--teal);
border-radius: 5px;
background: transparent;
color: var(--teal);
font-size: 0.85rem;
cursor: pointer;
transition: background 0.12s, color 0.12s;
}
.doc-list__more:hover { background: var(--teal); color: #fff; }
/* ── Sources table expand column ──────────────────────────────────────── */
.source-expand-cell { width: 32px; padding: 0 4px !important; text-align: center; }
.source-expand-btn {
background: transparent;
border: 1px solid var(--line);
border-radius: 4px;
width: 22px;
height: 22px;
font-size: 0.65rem;
cursor: pointer;
color: var(--muted);
display: inline-flex;
align-items: center;
justify-content: center;
transition: border-color 0.12s, color 0.12s;
}
.source-expand-btn:hover { border-color: var(--teal); color: var(--teal); }
.source-expand-row > td {
padding: 0 !important;
background: var(--soft-teal);
border-top: none;
}
.source-expand-inner {
padding: 16px 20px;
}
.source-expand-loading,
.source-expand-error {
font-size: 0.82rem;
color: var(--muted);
}
.source-expand-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 16px;
}
.source-expand-dl {
display: grid;
grid-template-columns: auto 1fr;
gap: 4px 12px;
font-size: 0.82rem;
margin: 0;
}
.source-expand-dl dt { color: var(--muted); white-space: nowrap; }
.source-expand-dl dd { margin: 0; }
.source-expand-url {
font-size: 0.78rem;
word-break: break-all;
margin: 0 0 12px;
}
.source-expand-url a { color: var(--teal); }
.source-browse-btn {
font-size: 0.82rem !important;
padding: 5px 14px !important;
}
@media (max-width: 760px) {
.source-expand-grid { grid-template-columns: 1fr; }
.corpus-search-controls { flex-direction: column; align-items: flex-start; }
}
+363 -42
View File
@@ -37,6 +37,7 @@ $reasoningPanelOverride = ob_get_clean();
require_once __DIR__ . '/includes/layout.php'; require_once __DIR__ . '/includes/layout.php';
?> ?>
<!-- STATS BAR -->
<div class="corpus-stats-bar" id="corpusStatsBar"> <div class="corpus-stats-bar" id="corpusStatsBar">
<div class="corpus-stat" id="statChunks"> <div class="corpus-stat" id="statChunks">
<span class="corpus-stat__value is-loading">—</span> <span class="corpus-stat__value is-loading">—</span>
@@ -56,6 +57,28 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
</div> </div>
<!-- CORPUS SEARCH -->
<div class="corpus-search-box">
<div class="corpus-search-row">
<input type="search" id="corpusSearchInput" class="corpus-search-input"
placeholder="Search 220 K passages — try «samvær», «arbeidsgiver», «barnevernloven»…"
autocomplete="off" spellcheck="false">
<button id="corpusSearchBtn" class="primary-button" type="button">Search</button>
</div>
<div class="corpus-search-controls">
<div class="search-modes" role="group" aria-label="Search mode">
<button class="mode-pill is-active" data-mode="hybrid" type="button">Hybrid</button>
<button class="mode-pill" data-mode="bm25" type="button">BM25</button>
<button class="mode-pill" data-mode="vector" type="button">Vector</button>
</div>
<div class="lang-pills" role="group" aria-label="Language">
<button class="mode-pill is-active" data-lang="en" type="button">EN</button>
<button class="mode-pill" data-lang="no" type="button">NO</button>
</div>
</div>
</div>
<div id="corpusSearchResults" class="corpus-search-results" hidden></div>
<!-- COVERAGE --> <!-- COVERAGE -->
<div class="corpus-section"> <div class="corpus-section">
<p class="eyebrow">Coverage</p> <p class="eyebrow">Coverage</p>
@@ -68,6 +91,7 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
<h4>Family Law</h4> <h4>Family Law</h4>
<p>Barneloven, child custody (foreldreansvar), samvær, mediation (mekling), separation and divorce proceedings.</p> <p>Barneloven, child custody (foreldreansvar), samvær, mediation (mekling), separation and divorce proceedings.</p>
<button class="cat-browse-btn" data-cat="family-law" type="button">Browse docs →</button>
</div> </div>
<div class="category-card" data-category="child-welfare"> <div class="category-card" data-category="child-welfare">
<div class="category-card__top"> <div class="category-card__top">
@@ -76,6 +100,7 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
<h4>Child Welfare</h4> <h4>Child Welfare</h4>
<p>Barnevernloven, omsorgsovertakelse, emergency care orders, foster placement, CPS (barnevernet) case law.</p> <p>Barnevernloven, omsorgsovertakelse, emergency care orders, foster placement, CPS (barnevernet) case law.</p>
<button class="cat-browse-btn" data-cat="child-welfare" type="button">Browse docs →</button>
</div> </div>
<div class="category-card" data-category="labour-law"> <div class="category-card" data-category="labour-law">
<div class="category-card__top"> <div class="category-card__top">
@@ -84,6 +109,7 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
<h4>Labour Law</h4> <h4>Labour Law</h4>
<p>Arbeidsmiljøloven, collective agreements (tariffavtaler), Arbeidsretten rulings, dismissal, sick leave obligations.</p> <p>Arbeidsmiljøloven, collective agreements (tariffavtaler), Arbeidsretten rulings, dismissal, sick leave obligations.</p>
<button class="cat-browse-btn" data-cat="labour-law" type="button">Browse docs →</button>
</div> </div>
<div class="category-card" data-category="social-welfare"> <div class="category-card" data-category="social-welfare">
<div class="category-card__top"> <div class="category-card__top">
@@ -92,6 +118,7 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
<h4>Social Welfare</h4> <h4>Social Welfare</h4>
<p>NAV guidance on sykepenger, dagpenger, AAP, uføretrygd, alderspensjon, yrkesskade and social assistance.</p> <p>NAV guidance on sykepenger, dagpenger, AAP, uføretrygd, alderspensjon, yrkesskade and social assistance.</p>
<button class="cat-browse-btn" data-cat="social-welfare" type="button">Browse docs →</button>
</div> </div>
<div class="category-card" data-category="tax-law"> <div class="category-card" data-category="tax-law">
<div class="category-card__top"> <div class="category-card__top">
@@ -100,6 +127,7 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
<h4>Tax Law</h4> <h4>Tax Law</h4>
<p>Skatteetaten's Skatte-ABC, binding advance rulings (BFU), Skatteklagenemnda decisions, income and capital tax.</p> <p>Skatteetaten's Skatte-ABC, binding advance rulings (BFU), Skatteklagenemnda decisions, income and capital tax.</p>
<button class="cat-browse-btn" data-cat="tax-law" type="button">Browse docs →</button>
</div> </div>
<div class="category-card" data-category="administrative-law"> <div class="category-card" data-category="administrative-law">
<div class="category-card__top"> <div class="category-card__top">
@@ -108,6 +136,7 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
<h4>Administrative Law</h4> <h4>Administrative Law</h4>
<p>Sivilombudet reports, Forvaltningsloven, procedural rights, official complaints, Stortinget oversight.</p> <p>Sivilombudet reports, Forvaltningsloven, procedural rights, official complaints, Stortinget oversight.</p>
<button class="cat-browse-btn" data-cat="administrative-law" type="button">Browse docs →</button>
</div> </div>
<div class="category-card" data-category="consumer-law"> <div class="category-card" data-category="consumer-law">
<div class="category-card__top"> <div class="category-card__top">
@@ -116,6 +145,7 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
<h4>Consumer &amp; Housing</h4> <h4>Consumer &amp; Housing</h4>
<p>HTU (rental disputes), Finansklagenemnda, Forbrukertilsynet, Forbrukerrådet, Pakkereisenemnda decisions.</p> <p>HTU (rental disputes), Finansklagenemnda, Forbrukertilsynet, Forbrukerrådet, Pakkereisenemnda decisions.</p>
<button class="cat-browse-btn" data-cat="consumer-law" type="button">Browse docs →</button>
</div> </div>
<div class="category-card" data-category="immigration-law"> <div class="category-card" data-category="immigration-law">
<div class="category-card__top"> <div class="category-card__top">
@@ -124,6 +154,7 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
<h4>Immigration &amp; International</h4> <h4>Immigration &amp; International</h4>
<p>UNE (Utlendingsnemnda) decisions, ECHR Art. 8 family rights, EMD case law, Hague Convention (cross-border child abduction).</p> <p>UNE (Utlendingsnemnda) decisions, ECHR Art. 8 family rights, EMD case law, Hague Convention (cross-border child abduction).</p>
<button class="cat-browse-btn" data-cat="immigration-law" type="button">Browse docs →</button>
</div> </div>
<div class="category-card" data-category="government-documents"> <div class="category-card" data-category="government-documents">
<div class="category-card__top"> <div class="category-card__top">
@@ -132,6 +163,22 @@ require_once __DIR__ . '/includes/layout.php';
</div> </div>
<h4>Government Documents</h4> <h4>Government Documents</h4>
<p>NOUer, Stortingsmeldinger, government white papers and regulatory guidance from Regjeringen.no.</p> <p>NOUer, Stortingsmeldinger, government white papers and regulatory guidance from Regjeringen.no.</p>
<button class="cat-browse-btn" data-cat="government-documents" type="button">Browse docs →</button>
</div>
</div>
<!-- DRILL-DOWN PANEL -->
<div id="corpusDrillPanel" class="corpus-drill-panel" hidden>
<div class="drill-header">
<div>
<p class="eyebrow" id="drillEyebrow">Category</p>
<h3 id="drillTitle">Documents</h3>
</div>
<button class="drill-close-btn" id="drillCloseBtn" type="button" aria-label="Close">✕</button>
</div>
<div id="drillDocList" class="doc-list"></div>
<div class="doc-list__more-wrap" id="drillMoreWrap" hidden>
<button class="doc-list__more" id="drillMoreBtn" type="button">Load more</button>
</div> </div>
</div> </div>
</div> </div>
@@ -144,6 +191,7 @@ require_once __DIR__ . '/includes/layout.php';
<table class="sources-table" id="sourcesTable"> <table class="sources-table" id="sourcesTable">
<thead> <thead>
<tr> <tr>
<th></th>
<th>Source</th> <th>Source</th>
<th>Type</th> <th>Type</th>
<th>Category</th> <th>Category</th>
@@ -153,7 +201,7 @@ require_once __DIR__ . '/includes/layout.php';
</tr> </tr>
</thead> </thead>
<tbody id="sourcesTableBody"> <tbody id="sourcesTableBody">
<tr class="sources-skeleton"><td colspan="6">Loading sources…</td></tr> <tr class="sources-skeleton"><td colspan="7">Loading sources…</td></tr>
</tbody> </tbody>
</table> </table>
</div> </div>
@@ -280,6 +328,13 @@ require_once __DIR__ . '/includes/layout.php';
<script> <script>
(function () { (function () {
'use strict';
// ── Utilities ────────────────────────────────────────────────────────────
function esc(s) {
return String(s ?? '').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
}
function fmt(n) { function fmt(n) {
if (n === null || n === undefined) return '—'; if (n === null || n === undefined) return '—';
return Number(n).toLocaleString('en'); return Number(n).toLocaleString('en');
@@ -293,6 +348,17 @@ require_once __DIR__ . '/includes/layout.php';
} catch (e) { return s; } } catch (e) { return s; }
} }
function highlight(text, query) {
if (!query) return esc(text);
const safe = esc(text);
const safeQ = query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return safe.replace(new RegExp(safeQ.replace(/&amp;/g,'&').replace(/&lt;/g,'<'), 'gi'),
m => '<mark>' + esc(m) + '</mark>');
}
function setLoaded(el) { el.classList.remove('is-loading'); }
// ── Authority / schedule label maps ─────────────────────────────────────
const authorityLabels = { const authorityLabels = {
case_law: { label: 'Case law', cls: 'badge--teal' }, case_law: { label: 'Case law', cls: 'badge--teal' },
guidance: { label: 'Guidance', cls: 'badge--amber' }, guidance: { label: 'Guidance', cls: 'badge--amber' },
@@ -305,13 +371,9 @@ require_once __DIR__ . '/includes/layout.php';
}; };
const scheduleLabels = { const scheduleLabels = {
daily: 'Daily', daily: 'Daily', weekly: 'Weekly', monthly: 'Monthly', manual: 'Manual',
weekly: 'Weekly',
monthly: 'Monthly',
manual: 'Manual',
}; };
// Category slug → element id map (for live counts)
const catIds = { const catIds = {
'family-law': 'cat-family-law', 'family-law': 'cat-family-law',
'family_law': 'cat-family-law', 'family_law': 'cat-family-law',
@@ -338,9 +400,20 @@ require_once __DIR__ . '/includes/layout.php';
'procurement-law': 'cat-administrative-law', 'procurement-law': 'cat-administrative-law',
}; };
function setLoaded(el) { const catLabels = {
el.classList.remove('is-loading'); 'family-law': 'Family Law',
} 'child-welfare': 'Child Welfare',
'labour-law': 'Labour Law',
'social-welfare': 'Social Welfare',
'tax-law': 'Tax Law',
'administrative-law': 'Administrative Law',
'consumer-law': 'Consumer & Housing',
'immigration-law': 'Immigration & International',
'government-documents': 'Government Documents',
};
// ── STATS + SOURCES table load ───────────────────────────────────────────
let cachedSources = [];
fetch('/api/corpus-stats.php', { credentials: 'same-origin' }) fetch('/api/corpus-stats.php', { credentials: 'same-origin' })
.then(r => r.json()) .then(r => r.json())
@@ -353,12 +426,11 @@ require_once __DIR__ . '/includes/layout.php';
const elSrc = document.querySelector('#statSources .corpus-stat__value'); const elSrc = document.querySelector('#statSources .corpus-stat__value');
const elUpd = document.querySelector('#statUpdated .corpus-stat__value'); const elUpd = document.querySelector('#statUpdated .corpus-stat__value');
if (elChunks) { elChunks.textContent = fmt(s.total_chunks); setLoaded(elChunks); } if (elChunks) { elChunks.textContent = fmt(s.total_chunks); setLoaded(elChunks); }
if (elDocs) { elDocs.textContent = fmt(s.total_docs); setLoaded(elDocs); } if (elDocs) { elDocs.textContent = fmt(s.total_docs); setLoaded(elDocs); }
if (elSrc) { elSrc.textContent = fmt(s.active_sources); setLoaded(elSrc); } if (elSrc) { elSrc.textContent = fmt(s.active_sources); setLoaded(elSrc); }
if (elUpd) { elUpd.textContent = fmtDate(s.last_updated); setLoaded(elUpd); } if (elUpd) { elUpd.textContent = fmtDate(s.last_updated); setLoaded(elUpd); }
// Category counts
(s.by_category || []).forEach(row => { (s.by_category || []).forEach(row => {
const elId = catIds[row.category]; const elId = catIds[row.category];
if (!elId) return; if (!elId) return;
@@ -368,47 +440,296 @@ require_once __DIR__ . '/includes/layout.php';
el.textContent = fmt(cur + parseInt(row.doc_count, 10)); el.textContent = fmt(cur + parseInt(row.doc_count, 10));
setLoaded(el); setLoaded(el);
}); });
// Zero out remaining loading badges
document.querySelectorAll('.category-card__count.is-loading').forEach(el => { document.querySelectorAll('.category-card__count.is-loading').forEach(el => {
el.textContent = '0'; el.textContent = '0'; setLoaded(el);
setLoaded(el);
}); });
// Sources table // Sources table
const tbody = document.getElementById('sourcesTableBody'); cachedSources = data.sources || [];
if (!tbody) return; renderSourcesTable(cachedSources);
tbody.innerHTML = '';
(data.sources || []).forEach(src => {
const auth = authorityLabels[src.authority_type] || { label: src.authority_type || '—', cls: 'badge--muted' };
const sched = scheduleLabels[src.schedule] || (src.schedule || 'Manual');
const langFlag = src.language === 'no' ? '🇳🇴' : src.language === 'en' ? '🇬🇧' : (src.language || '—');
const statusHtml = src.is_active
? '<span class="status-active">● Active</span>'
: '<span class="status-inactive">○ Inactive</span>';
const nameHtml = src.url
? `<a href="${escHtml(src.url)}" target="_blank" rel="noopener">${escHtml(src.name)}</a>`
: escHtml(src.name);
const tr = document.createElement('tr');
tr.innerHTML = `
<td class="source-name">${nameHtml}</td>
<td><span class="source-badge ${escHtml(auth.cls)}">${escHtml(auth.label)}</span></td>
<td><span class="source-cat">${escHtml(src.category || '—')}</span></td>
<td>${langFlag}</td>
<td>${escHtml(sched)}</td>
<td>${statusHtml}</td>`;
tbody.appendChild(tr);
});
}) })
.catch(() => { .catch(() => {
document.querySelectorAll('.corpus-stat__value').forEach(el => { document.querySelectorAll('.corpus-stat__value').forEach(el => {
el.textContent = '—'; el.textContent = '—'; el.classList.remove('is-loading');
el.classList.remove('is-loading');
}); });
}); });
function escHtml(s) { // ── Sources table rendering ───────────────────────────────────────────────
return String(s ?? '').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;'); function renderSourcesTable(sources) {
const tbody = document.getElementById('sourcesTableBody');
if (!tbody) return;
tbody.innerHTML = '';
sources.forEach((src, idx) => {
const auth = authorityLabels[src.authority_type] || { label: src.authority_type || '—', cls: 'badge--muted' };
const sched = scheduleLabels[src.schedule] || (src.schedule || 'Manual');
const langFlag = src.language === 'no' ? '🇳🇴' : src.language === 'en' ? '🇬🇧' : (src.language || '—');
const statusHtml = src.is_active
? '<span class="status-active">● Active</span>'
: '<span class="status-inactive">○ Inactive</span>';
const nameHtml = src.url
? `<a href="${esc(src.url)}" target="_blank" rel="noopener">${esc(src.name)}</a>`
: esc(src.name);
const tr = document.createElement('tr');
tr.dataset.idx = idx;
tr.innerHTML = `
<td class="source-expand-cell">
<button class="source-expand-btn" type="button" aria-expanded="false" aria-label="Expand ${esc(src.name)}">▶</button>
</td>
<td class="source-name">${nameHtml}</td>
<td><span class="source-badge ${esc(auth.cls)}">${esc(auth.label)}</span></td>
<td><span class="source-cat">${esc(src.category || '—')}</span></td>
<td>${langFlag}</td>
<td>${esc(sched)}</td>
<td>${statusHtml}</td>`;
tbody.appendChild(tr);
// Expand row (hidden)
const expandTr = document.createElement('tr');
expandTr.className = 'source-expand-row';
expandTr.hidden = true;
expandTr.dataset.name = src.name;
expandTr.innerHTML = `<td colspan="7"><div class="source-expand-inner" id="source-expand-${idx}">
<div class="source-expand-loading">Loading…</div></div></td>`;
tbody.appendChild(expandTr);
// Toggle handler
tr.querySelector('.source-expand-btn').addEventListener('click', function () {
const isOpen = expandTr.hidden === false;
if (isOpen) {
expandTr.hidden = true;
this.textContent = '▶';
this.setAttribute('aria-expanded', 'false');
} else {
expandTr.hidden = false;
this.textContent = '▼';
this.setAttribute('aria-expanded', 'true');
loadSourceExpand(idx, src, `source-expand-${idx}`);
}
});
});
} }
function loadSourceExpand(idx, src, containerId) {
const container = document.getElementById(containerId);
if (!container || container.dataset.loaded) return;
container.dataset.loaded = '1';
// Fetch doc count for this source
const qs = new URLSearchParams({ source_name: src.name, limit: 1 });
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
.then(r => r.json())
.then(data => {
const total = data.ok ? data.total : '?';
container.innerHTML = `
<div class="source-expand-grid">
<div>
<dl class="source-expand-dl">
<dt>Scraper class</dt>
<dd><code>${esc(src.scraper_class || '—')}</code></dd>
<dt>Category</dt>
<dd>${esc(src.category || '—')}</dd>
<dt>Authority type</dt>
<dd>${esc(src.authority_type || '—')}</dd>
<dt>Language</dt>
<dd>${src.language === 'no' ? '🇳🇴 Norwegian' : src.language === 'en' ? '🇬🇧 English' : esc(src.language || '—')}</dd>
<dt>Update schedule</dt>
<dd>${esc(scheduleLabels[src.schedule] || src.schedule || '—')}</dd>
<dt>Documents indexed</dt>
<dd><strong>${fmt(total)}</strong></dd>
</dl>
</div>
<div>
${src.url ? `<p class="source-expand-url"><a href="${esc(src.url)}" target="_blank" rel="noopener">${esc(src.url)}</a></p>` : ''}
${total > 0 ? `<button class="doc-list__more source-browse-btn" data-source="${esc(src.name)}" type="button">Browse ${fmt(total)} documents →</button>` : ''}
</div>
</div>`;
container.querySelectorAll('.source-browse-btn').forEach(btn => {
btn.addEventListener('click', () => openDrillBySource(src.name));
});
})
.catch(() => {
container.innerHTML = `<p class="source-expand-error">Could not load source details.</p>`;
});
}
// ── Category drill-down ───────────────────────────────────────────────────
let drillState = { category: null, sourceName: null, offset: 0, total: 0, limit: 20 };
const drillPanel = document.getElementById('corpusDrillPanel');
const drillDocList = document.getElementById('drillDocList');
const drillTitle = document.getElementById('drillTitle');
const drillEyebrow = document.getElementById('drillEyebrow');
const drillMoreWrap = document.getElementById('drillMoreWrap');
const drillMoreBtn = document.getElementById('drillMoreBtn');
const drillCloseBtn = document.getElementById('drillCloseBtn');
document.querySelectorAll('.cat-browse-btn').forEach(btn => {
btn.addEventListener('click', () => openDrillByCategory(btn.dataset.cat));
});
function openDrillByCategory(cat) {
drillState = { category: cat, sourceName: null, offset: 0, total: 0, limit: 20 };
drillEyebrow.textContent = 'Category';
drillTitle.textContent = catLabels[cat] || cat;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillPanel.hidden = false;
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
fetchDrillPage(false);
}
function openDrillBySource(sourceName) {
drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20 };
drillEyebrow.textContent = 'Source';
drillTitle.textContent = sourceName;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillPanel.hidden = false;
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
fetchDrillPage(false);
}
function fetchDrillPage(append) {
const qs = new URLSearchParams({ offset: drillState.offset, limit: drillState.limit });
if (drillState.category) qs.set('category', drillState.category);
if (drillState.sourceName) qs.set('source_name', drillState.sourceName);
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
.then(r => r.json())
.then(data => {
if (!data.ok) {
if (!append) drillDocList.innerHTML = '<p class="drill-error">Could not load documents.</p>';
return;
}
drillState.total = data.total;
const docs = data.documents || [];
if (!append) drillDocList.innerHTML = '';
if (docs.length === 0 && !append) {
drillDocList.innerHTML = '<p class="drill-empty">No documents found in this category.</p>';
drillMoreWrap.hidden = true;
return;
}
docs.forEach(doc => {
const item = document.createElement('div');
item.className = 'doc-list__item';
const titleHtml = doc.source_url
? `<a href="${esc(doc.source_url)}" target="_blank" rel="noopener" class="doc-list__title">${esc(doc.title || '(Untitled)')}</a>`
: `<span class="doc-list__title">${esc(doc.title || '(Untitled)')}</span>`;
const langFlag = doc.language === 'no' ? '🇳🇴' : doc.language === 'en' ? '🇬🇧' : '';
item.innerHTML = `
<div class="doc-list__info">
${titleHtml}
<div class="doc-list__meta">
<span class="source-cat">${esc(doc.category || '—')}</span>
${langFlag ? `<span>${langFlag}</span>` : ''}
<span class="doc-list__date">${fmtDate(doc.updated_at)}</span>
</div>
</div>
<span class="doc-list__chunks">${fmt(doc.chunk_count)} passages</span>`;
drillDocList.appendChild(item);
});
const loaded = drillState.offset + docs.length;
drillMoreWrap.hidden = loaded >= drillState.total;
drillState.offset = loaded;
})
.catch(() => {
if (!append) drillDocList.innerHTML = '<p class="drill-error">Network error.</p>';
});
}
drillMoreBtn.addEventListener('click', () => fetchDrillPage(true));
drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; });
// ── Search bar ────────────────────────────────────────────────────────────
let searchMode = 'hybrid';
let searchLang = 'en';
document.querySelectorAll('.search-modes .mode-pill').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.search-modes .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchMode = btn.dataset.mode;
});
});
document.querySelectorAll('.lang-pills .mode-pill').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.lang-pills .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchLang = btn.dataset.lang;
});
});
const searchInput = document.getElementById('corpusSearchInput');
const searchBtn = document.getElementById('corpusSearchBtn');
const searchResults = document.getElementById('corpusSearchResults');
function runSearch() {
const q = searchInput.value.trim();
if (q.length < 3) {
searchResults.innerHTML = '<p class="search-hint">Enter at least 3 characters.</p>';
searchResults.hidden = false;
return;
}
searchResults.hidden = false;
searchResults.innerHTML = `<p class="search-loading">Searching in <strong>${esc(searchMode)}</strong> mode…</p>`;
searchBtn.disabled = true;
fetch('/api/corpus-search.php', {
method: 'POST',
credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8 }),
})
.then(r => r.json())
.then(data => {
searchBtn.disabled = false;
if (!data.ok) {
searchResults.innerHTML = `<p class="search-error">Search error: ${esc(data.error?.message || 'Unknown error')}</p>`;
return;
}
const hits = data.hits || [];
if (hits.length === 0) {
searchResults.innerHTML = `<p class="search-empty">No results for <strong>${esc(q)}</strong> in ${esc(data.mode)} mode.</p>`;
return;
}
const modeLabel = { hybrid: 'Hybrid RAG', bm25: 'BM25 keyword', vector: 'Vector semantic' }[data.mode] || data.mode;
let html = `<div class="search-results-header"><span class="eyebrow">${esc(modeLabel)}</span><span class="search-results-count">${hits.length} passage${hits.length !== 1 ? 's' : ''}</span></div>`;
hits.forEach(hit => {
const score = hit.score != null ? `<span class="passage-score">${Math.round(hit.score * 100)}%</span>` : '';
const catAuth = authorityLabels[hit.category] || { label: hit.category || '—', cls: 'badge--muted' };
const titleHtml = hit.source_url
? `<a href="${esc(hit.source_url)}" target="_blank" rel="noopener" class="passage-card__title">${esc(hit.title || '(Untitled)')}</a>`
: `<span class="passage-card__title">${esc(hit.title || '(Untitled)')}</span>`;
const section = hit.section ? `<span class="passage-section">§ ${esc(hit.section)}</span>` : '';
const excerpt = highlight(hit.excerpt || '', q);
html += `
<div class="passage-card">
<div class="passage-card__meta">
<span class="source-badge ${esc(catAuth.cls)}">${esc(catAuth.label)}</span>
${section}
${score}
</div>
${titleHtml}
<p class="passage-card__excerpt">${excerpt}</p>
</div>`;
});
searchResults.innerHTML = html;
})
.catch(err => {
searchBtn.disabled = false;
searchResults.innerHTML = `<p class="search-error">Network error.</p>`;
});
}
searchBtn.addEventListener('click', runSearch);
searchInput.addEventListener('keydown', e => { if (e.key === 'Enter') runSearch(); });
})(); })();
</script> </script>