Files
dobetternorge-tools/corpus.php
T
daveadmin 38255669a9 Add corpus explorer: search bar (Hybrid/BM25/Vector), category drill-down, source row expand
- api/corpus-search.php: new endpoint with three search modes (hybrid RAG, BM25 keyword, Qdrant vector)
- api/corpus-documents.php: paginated document browser by category or source name
- corpus.php: search bar with mode+language pills, Browse docs button on each category card with drill-down panel, expand toggle on each source row showing doc count and scraper class
- tools.css: all new corpus interactive styles appended

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-15 11:55:54 +02:00

737 lines
36 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
declare(strict_types=1);
$toolName = 'corpus';
$toolTitle = 'Legal Knowledge Base';
$toolKind = 'Corpus Intelligence';
$toolBadge = '~220 K passages';
ob_start();
?>
<div class="reasoning-head">
<p class="eyebrow">Corpus health</p>
<h2 id="reasoningTitle">Vector index</h2>
</div>
<dl class="corpus-health-dl">
<dt>Collection</dt>
<dd><code>bnl_chunks</code></dd>
<dt>Dimensions</dt>
<dd>768 (nomic-embed-text)</dd>
<dt>Similarity</dt>
<dd>Cosine</dd>
<dt>RAG strategy</dt>
<dd>Hybrid vector + keyword<br>Reciprocal rank fusion</dd>
<dt>Private boost</dt>
<dd>1.5×</dd>
<dt>Temporal mode</dt>
<dd>legal_conservative</dd>
<dt>Chunk target</dt>
<dd>600 words · 75 overlap</dd>
<dt>Vector DB</dt>
<dd>Qdrant on Colin Docker<br><code>10.0.2.10:6333</code></dd>
<dt>Hybrid search</dt>
<dd>Azure AI Search<br><code>bnl-legal-search</code><br>West Europe · Basic SKU</dd>
</dl>
<?php
$reasoningPanelOverride = ob_get_clean();
require_once __DIR__ . '/includes/layout.php';
?>
<!-- STATS BAR -->
<div class="corpus-stats-bar" id="corpusStatsBar">
<div class="corpus-stat" id="statChunks">
<span class="corpus-stat__value is-loading">—</span>
<span class="corpus-stat__label">Indexed passages</span>
</div>
<div class="corpus-stat" id="statDocs">
<span class="corpus-stat__value is-loading">—</span>
<span class="corpus-stat__label">Source documents</span>
</div>
<div class="corpus-stat" id="statSources">
<span class="corpus-stat__value is-loading">—</span>
<span class="corpus-stat__label">Active scrapers</span>
</div>
<div class="corpus-stat" id="statUpdated">
<span class="corpus-stat__value is-loading">—</span>
<span class="corpus-stat__label">Last ingested</span>
</div>
</div>
<!-- CORPUS SEARCH -->
<div class="corpus-search-box">
<div class="corpus-search-row">
<input type="search" id="corpusSearchInput" class="corpus-search-input"
placeholder="Search 220 K passages — try «samvær», «arbeidsgiver», «barnevernloven»…"
autocomplete="off" spellcheck="false">
<button id="corpusSearchBtn" class="primary-button" type="button">Search</button>
</div>
<div class="corpus-search-controls">
<div class="search-modes" role="group" aria-label="Search mode">
<button class="mode-pill is-active" data-mode="hybrid" type="button">Hybrid</button>
<button class="mode-pill" data-mode="bm25" type="button">BM25</button>
<button class="mode-pill" data-mode="vector" type="button">Vector</button>
</div>
<div class="lang-pills" role="group" aria-label="Language">
<button class="mode-pill is-active" data-lang="en" type="button">EN</button>
<button class="mode-pill" data-lang="no" type="button">NO</button>
</div>
</div>
</div>
<div id="corpusSearchResults" class="corpus-search-results" hidden></div>
<!-- COVERAGE -->
<div class="corpus-section">
<p class="eyebrow">Coverage</p>
<h3 class="corpus-section__title">Legal categories</h3>
<div class="corpus-categories" id="corpusCategories">
<div class="category-card" data-category="family-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">⚖</span>
<span class="category-card__count is-loading" id="cat-family-law">—</span>
</div>
<h4>Family Law</h4>
<p>Barneloven, child custody (foreldreansvar), samvær, mediation (mekling), separation and divorce proceedings.</p>
<button class="cat-browse-btn" data-cat="family-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="child-welfare">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🧒</span>
<span class="category-card__count is-loading" id="cat-child-welfare">—</span>
</div>
<h4>Child Welfare</h4>
<p>Barnevernloven, omsorgsovertakelse, emergency care orders, foster placement, CPS (barnevernet) case law.</p>
<button class="cat-browse-btn" data-cat="child-welfare" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="labour-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🏭</span>
<span class="category-card__count is-loading" id="cat-labour-law">—</span>
</div>
<h4>Labour Law</h4>
<p>Arbeidsmiljøloven, collective agreements (tariffavtaler), Arbeidsretten rulings, dismissal, sick leave obligations.</p>
<button class="cat-browse-btn" data-cat="labour-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="social-welfare">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🛡</span>
<span class="category-card__count is-loading" id="cat-social-welfare">—</span>
</div>
<h4>Social Welfare</h4>
<p>NAV guidance on sykepenger, dagpenger, AAP, uføretrygd, alderspensjon, yrkesskade and social assistance.</p>
<button class="cat-browse-btn" data-cat="social-welfare" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="tax-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">📊</span>
<span class="category-card__count is-loading" id="cat-tax-law">—</span>
</div>
<h4>Tax Law</h4>
<p>Skatteetaten's Skatte-ABC, binding advance rulings (BFU), Skatteklagenemnda decisions, income and capital tax.</p>
<button class="cat-browse-btn" data-cat="tax-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="administrative-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🏛</span>
<span class="category-card__count is-loading" id="cat-administrative-law">—</span>
</div>
<h4>Administrative Law</h4>
<p>Sivilombudet reports, Forvaltningsloven, procedural rights, official complaints, Stortinget oversight.</p>
<button class="cat-browse-btn" data-cat="administrative-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="consumer-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🏠</span>
<span class="category-card__count is-loading" id="cat-consumer-law">—</span>
</div>
<h4>Consumer &amp; Housing</h4>
<p>HTU (rental disputes), Finansklagenemnda, Forbrukertilsynet, Forbrukerrådet, Pakkereisenemnda decisions.</p>
<button class="cat-browse-btn" data-cat="consumer-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="immigration-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🌐</span>
<span class="category-card__count is-loading" id="cat-immigration-law">—</span>
</div>
<h4>Immigration &amp; International</h4>
<p>UNE (Utlendingsnemnda) decisions, ECHR Art. 8 family rights, EMD case law, Hague Convention (cross-border child abduction).</p>
<button class="cat-browse-btn" data-cat="immigration-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="government-documents">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">📄</span>
<span class="category-card__count is-loading" id="cat-government-documents">—</span>
</div>
<h4>Government Documents</h4>
<p>NOUer, Stortingsmeldinger, government white papers and regulatory guidance from Regjeringen.no.</p>
<button class="cat-browse-btn" data-cat="government-documents" type="button">Browse docs →</button>
</div>
</div>
<!-- DRILL-DOWN PANEL -->
<div id="corpusDrillPanel" class="corpus-drill-panel" hidden>
<div class="drill-header">
<div>
<p class="eyebrow" id="drillEyebrow">Category</p>
<h3 id="drillTitle">Documents</h3>
</div>
<button class="drill-close-btn" id="drillCloseBtn" type="button" aria-label="Close">✕</button>
</div>
<div id="drillDocList" class="doc-list"></div>
<div class="doc-list__more-wrap" id="drillMoreWrap" hidden>
<button class="doc-list__more" id="drillMoreBtn" type="button">Load more</button>
</div>
</div>
</div>
<!-- SOURCES TABLE -->
<div class="corpus-section">
<p class="eyebrow">Data sources</p>
<h3 class="corpus-section__title">Active scrapers</h3>
<div class="corpus-table-wrap">
<table class="sources-table" id="sourcesTable">
<thead>
<tr>
<th></th>
<th>Source</th>
<th>Type</th>
<th>Category</th>
<th>Lang</th>
<th>Schedule</th>
<th>Status</th>
</tr>
</thead>
<tbody id="sourcesTableBody">
<tr class="sources-skeleton"><td colspan="7">Loading sources…</td></tr>
</tbody>
</table>
</div>
</div>
<!-- AI STACK -->
<div class="corpus-section">
<p class="eyebrow">Software</p>
<h3 class="corpus-section__title">AI stack</h3>
<div class="stack-grid">
<div class="stack-card">
<h3>Reasoning LLMs</h3>
<ul class="stack-list">
<li><span class="stack-badge stack-badge--azure">Azure</span> <strong>gpt-4o-mini</strong> <span class="stack-star">★ default</span> — fast, cost-efficient</li>
<li><span class="stack-badge stack-badge--azure">Azure</span> <strong>gpt-4o</strong> — highest quality</li>
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>qwen2.5:14b</strong> — local, private</li>
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>qwen3:14b</strong> — reasoning mode</li>
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>dbn-legal-agent</strong> — Norwegian law fine-tune (QLoRA on qwen2.5:7b, NorwAI-24B distillation)</li>
</ul>
<p class="stack-note">All routed via LiteLLM on Colin · <code>10.0.1.10:4000</code></p>
</div>
<div class="stack-card">
<h3>Transcription</h3>
<ul class="stack-list">
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>Whisper large-v3</strong> <span class="stack-star">★ primary</span><br>Cuttlefish · RTX 3060 12 GB VRAM</li>
<li><span class="stack-badge stack-badge--api">API</span> OpenAI Whisper API</li>
<li><span class="stack-badge stack-badge--azure">Azure</span> AI Speech <code>nb-NO</code> (Norway East)</li>
</ul>
<p class="stack-note">Speaker diarization · VAD silence filter · beam size 5 · vocabulary presets (barnerett, mediation)</p>
</div>
<div class="stack-card">
<h3>Embeddings</h3>
<ul class="stack-list">
<li><strong>nomic-embed-text</strong> — 768-dim dense vectors</li>
<li>Ollama on Chloe <code>10.0.1.11:11434</code></li>
<li>Cosine similarity in Qdrant</li>
</ul>
<p class="stack-note">All documents chunked and embedded before indexing; chunks stored in both Qdrant (vector) and MariaDB (keyword fallback)</p>
</div>
<div class="stack-card">
<h3>Vector &amp; Hybrid Search</h3>
<ul class="stack-list">
<li><strong>Qdrant</strong> <code>bnl_chunks</code> · ~220 K vectors<br>Colin Docker · <code>10.0.2.10:6333</code></li>
<li><strong>Azure AI Search</strong> <code>bnl-legal-search</code><br>Basic SKU · West Europe · hybrid keyword + semantic</li>
<li>Reciprocal rank fusion (vector + keyword)</li>
<li>Private corpus boosted 1.5×</li>
</ul>
</div>
<div class="stack-card">
<h3>Chunking pipeline</h3>
<ul class="stack-list">
<li>Heading-aware semantic splitting</li>
<li>600-word target · 75-word overlap</li>
<li>50-word minimum chunk</li>
<li>SHA-256 deduplication</li>
<li>PDF, DOCX, HTML text extraction</li>
<li>Temporal metadata (valid_from / valid_until)</li>
</ul>
<p class="stack-note">Legal temporal reranking: <code>legal_conservative</code> — surfaces current versions first</p>
</div>
</div>
</div>
<!-- DATA PIPELINE -->
<div class="corpus-section">
<p class="eyebrow">How it works</p>
<h3 class="corpus-section__title">Ingestion pipeline</h3>
<div class="pipeline-flow" role="list" aria-label="Data pipeline steps">
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">🌐</span>
<span>Source</span>
<small>gov websites, APIs, PDFs</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">🕷</span>
<span>Scraper</span>
<small>HTTP / API / PDF</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">📝</span>
<span>Text extract</span>
<small>PDF, DOCX, HTML</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">✂</span>
<span>TextChunker</span>
<small>600w · 75w overlap</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">🔢</span>
<span>Embed</span>
<small>nomic · 768-dim</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">⚡</span>
<span>Qdrant</span>
<small>cosine upsert</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">🤖</span>
<span>LiteLLM</span>
<small>RAG + LLM</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step pipeline-step--end" role="listitem">
<span class="pipeline-step__icon">🔍</span>
<span>Your tool</span>
<small>Ask, Search, Research…</small>
</div>
</div>
</div>
<script>
(function () {
'use strict';
// ── Utilities ────────────────────────────────────────────────────────────
function esc(s) {
return String(s ?? '').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
}
function fmt(n) {
if (n === null || n === undefined) return '—';
return Number(n).toLocaleString('en');
}
function fmtDate(s) {
if (!s) return '—';
try {
const d = new Date(s);
return d.toLocaleDateString('en-GB', { day: 'numeric', month: 'short', year: 'numeric' });
} catch (e) { return s; }
}
function highlight(text, query) {
if (!query) return esc(text);
const safe = esc(text);
const safeQ = query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return safe.replace(new RegExp(safeQ.replace(/&amp;/g,'&').replace(/&lt;/g,'<'), 'gi'),
m => '<mark>' + esc(m) + '</mark>');
}
function setLoaded(el) { el.classList.remove('is-loading'); }
// ── Authority / schedule label maps ─────────────────────────────────────
const authorityLabels = {
case_law: { label: 'Case law', cls: 'badge--teal' },
guidance: { label: 'Guidance', cls: 'badge--amber' },
report: { label: 'Report', cls: 'badge--muted' },
ombudsman: { label: 'Ombudsman', cls: 'badge--muted' },
tribunal: { label: 'Tribunal', cls: 'badge--coral' },
regulatory: { label: 'Regulatory', cls: 'badge--coral' },
law: { label: 'Statute', cls: 'badge--teal' },
treaty: { label: 'Treaty', cls: 'badge--muted' },
};
const scheduleLabels = {
daily: 'Daily', weekly: 'Weekly', monthly: 'Monthly', manual: 'Manual',
};
const catIds = {
'family-law': 'cat-family-law',
'family_law': 'cat-family-law',
'child-welfare': 'cat-child-welfare',
'child_welfare': 'cat-child-welfare',
'labour-law': 'cat-labour-law',
'labour_law': 'cat-labour-law',
'social-welfare': 'cat-social-welfare',
'social_welfare': 'cat-social-welfare',
'tax-law': 'cat-tax-law',
'tax_law': 'cat-tax-law',
'administrative-law': 'cat-administrative-law',
'administrative_law': 'cat-administrative-law',
'consumer-law': 'cat-consumer-law',
'consumer_law': 'cat-consumer-law',
'tenancy-law': 'cat-consumer-law',
'financial-law': 'cat-consumer-law',
'immigration-law': 'cat-immigration-law',
'immigration_law': 'cat-immigration-law',
'government-documents':'cat-government-documents',
'government_documents':'cat-government-documents',
'case-law': 'cat-administrative-law',
'victim-compensation': 'cat-administrative-law',
'procurement-law': 'cat-administrative-law',
};
const catLabels = {
'family-law': 'Family Law',
'child-welfare': 'Child Welfare',
'labour-law': 'Labour Law',
'social-welfare': 'Social Welfare',
'tax-law': 'Tax Law',
'administrative-law': 'Administrative Law',
'consumer-law': 'Consumer & Housing',
'immigration-law': 'Immigration & International',
'government-documents': 'Government Documents',
};
// ── STATS + SOURCES table load ───────────────────────────────────────────
let cachedSources = [];
fetch('/api/corpus-stats.php', { credentials: 'same-origin' })
.then(r => r.json())
.then(data => {
if (!data.ok) return;
const s = data.stats;
const elChunks = document.querySelector('#statChunks .corpus-stat__value');
const elDocs = document.querySelector('#statDocs .corpus-stat__value');
const elSrc = document.querySelector('#statSources .corpus-stat__value');
const elUpd = document.querySelector('#statUpdated .corpus-stat__value');
if (elChunks) { elChunks.textContent = fmt(s.total_chunks); setLoaded(elChunks); }
if (elDocs) { elDocs.textContent = fmt(s.total_docs); setLoaded(elDocs); }
if (elSrc) { elSrc.textContent = fmt(s.active_sources); setLoaded(elSrc); }
if (elUpd) { elUpd.textContent = fmtDate(s.last_updated); setLoaded(elUpd); }
(s.by_category || []).forEach(row => {
const elId = catIds[row.category];
if (!elId) return;
const el = document.getElementById(elId);
if (!el) return;
const cur = parseInt(el.textContent, 10) || 0;
el.textContent = fmt(cur + parseInt(row.doc_count, 10));
setLoaded(el);
});
document.querySelectorAll('.category-card__count.is-loading').forEach(el => {
el.textContent = '0'; setLoaded(el);
});
// Sources table
cachedSources = data.sources || [];
renderSourcesTable(cachedSources);
})
.catch(() => {
document.querySelectorAll('.corpus-stat__value').forEach(el => {
el.textContent = '—'; el.classList.remove('is-loading');
});
});
// ── Sources table rendering ───────────────────────────────────────────────
function renderSourcesTable(sources) {
const tbody = document.getElementById('sourcesTableBody');
if (!tbody) return;
tbody.innerHTML = '';
sources.forEach((src, idx) => {
const auth = authorityLabels[src.authority_type] || { label: src.authority_type || '—', cls: 'badge--muted' };
const sched = scheduleLabels[src.schedule] || (src.schedule || 'Manual');
const langFlag = src.language === 'no' ? '🇳🇴' : src.language === 'en' ? '🇬🇧' : (src.language || '—');
const statusHtml = src.is_active
? '<span class="status-active">● Active</span>'
: '<span class="status-inactive">○ Inactive</span>';
const nameHtml = src.url
? `<a href="${esc(src.url)}" target="_blank" rel="noopener">${esc(src.name)}</a>`
: esc(src.name);
const tr = document.createElement('tr');
tr.dataset.idx = idx;
tr.innerHTML = `
<td class="source-expand-cell">
<button class="source-expand-btn" type="button" aria-expanded="false" aria-label="Expand ${esc(src.name)}">▶</button>
</td>
<td class="source-name">${nameHtml}</td>
<td><span class="source-badge ${esc(auth.cls)}">${esc(auth.label)}</span></td>
<td><span class="source-cat">${esc(src.category || '—')}</span></td>
<td>${langFlag}</td>
<td>${esc(sched)}</td>
<td>${statusHtml}</td>`;
tbody.appendChild(tr);
// Expand row (hidden)
const expandTr = document.createElement('tr');
expandTr.className = 'source-expand-row';
expandTr.hidden = true;
expandTr.dataset.name = src.name;
expandTr.innerHTML = `<td colspan="7"><div class="source-expand-inner" id="source-expand-${idx}">
<div class="source-expand-loading">Loading…</div></div></td>`;
tbody.appendChild(expandTr);
// Toggle handler
tr.querySelector('.source-expand-btn').addEventListener('click', function () {
const isOpen = expandTr.hidden === false;
if (isOpen) {
expandTr.hidden = true;
this.textContent = '▶';
this.setAttribute('aria-expanded', 'false');
} else {
expandTr.hidden = false;
this.textContent = '▼';
this.setAttribute('aria-expanded', 'true');
loadSourceExpand(idx, src, `source-expand-${idx}`);
}
});
});
}
function loadSourceExpand(idx, src, containerId) {
const container = document.getElementById(containerId);
if (!container || container.dataset.loaded) return;
container.dataset.loaded = '1';
// Fetch doc count for this source
const qs = new URLSearchParams({ source_name: src.name, limit: 1 });
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
.then(r => r.json())
.then(data => {
const total = data.ok ? data.total : '?';
container.innerHTML = `
<div class="source-expand-grid">
<div>
<dl class="source-expand-dl">
<dt>Scraper class</dt>
<dd><code>${esc(src.scraper_class || '—')}</code></dd>
<dt>Category</dt>
<dd>${esc(src.category || '—')}</dd>
<dt>Authority type</dt>
<dd>${esc(src.authority_type || '—')}</dd>
<dt>Language</dt>
<dd>${src.language === 'no' ? '🇳🇴 Norwegian' : src.language === 'en' ? '🇬🇧 English' : esc(src.language || '—')}</dd>
<dt>Update schedule</dt>
<dd>${esc(scheduleLabels[src.schedule] || src.schedule || '—')}</dd>
<dt>Documents indexed</dt>
<dd><strong>${fmt(total)}</strong></dd>
</dl>
</div>
<div>
${src.url ? `<p class="source-expand-url"><a href="${esc(src.url)}" target="_blank" rel="noopener">${esc(src.url)}</a></p>` : ''}
${total > 0 ? `<button class="doc-list__more source-browse-btn" data-source="${esc(src.name)}" type="button">Browse ${fmt(total)} documents →</button>` : ''}
</div>
</div>`;
container.querySelectorAll('.source-browse-btn').forEach(btn => {
btn.addEventListener('click', () => openDrillBySource(src.name));
});
})
.catch(() => {
container.innerHTML = `<p class="source-expand-error">Could not load source details.</p>`;
});
}
// ── Category drill-down ───────────────────────────────────────────────────
let drillState = { category: null, sourceName: null, offset: 0, total: 0, limit: 20 };
const drillPanel = document.getElementById('corpusDrillPanel');
const drillDocList = document.getElementById('drillDocList');
const drillTitle = document.getElementById('drillTitle');
const drillEyebrow = document.getElementById('drillEyebrow');
const drillMoreWrap = document.getElementById('drillMoreWrap');
const drillMoreBtn = document.getElementById('drillMoreBtn');
const drillCloseBtn = document.getElementById('drillCloseBtn');
document.querySelectorAll('.cat-browse-btn').forEach(btn => {
btn.addEventListener('click', () => openDrillByCategory(btn.dataset.cat));
});
function openDrillByCategory(cat) {
drillState = { category: cat, sourceName: null, offset: 0, total: 0, limit: 20 };
drillEyebrow.textContent = 'Category';
drillTitle.textContent = catLabels[cat] || cat;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillPanel.hidden = false;
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
fetchDrillPage(false);
}
function openDrillBySource(sourceName) {
drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20 };
drillEyebrow.textContent = 'Source';
drillTitle.textContent = sourceName;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillPanel.hidden = false;
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
fetchDrillPage(false);
}
function fetchDrillPage(append) {
const qs = new URLSearchParams({ offset: drillState.offset, limit: drillState.limit });
if (drillState.category) qs.set('category', drillState.category);
if (drillState.sourceName) qs.set('source_name', drillState.sourceName);
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
.then(r => r.json())
.then(data => {
if (!data.ok) {
if (!append) drillDocList.innerHTML = '<p class="drill-error">Could not load documents.</p>';
return;
}
drillState.total = data.total;
const docs = data.documents || [];
if (!append) drillDocList.innerHTML = '';
if (docs.length === 0 && !append) {
drillDocList.innerHTML = '<p class="drill-empty">No documents found in this category.</p>';
drillMoreWrap.hidden = true;
return;
}
docs.forEach(doc => {
const item = document.createElement('div');
item.className = 'doc-list__item';
const titleHtml = doc.source_url
? `<a href="${esc(doc.source_url)}" target="_blank" rel="noopener" class="doc-list__title">${esc(doc.title || '(Untitled)')}</a>`
: `<span class="doc-list__title">${esc(doc.title || '(Untitled)')}</span>`;
const langFlag = doc.language === 'no' ? '🇳🇴' : doc.language === 'en' ? '🇬🇧' : '';
item.innerHTML = `
<div class="doc-list__info">
${titleHtml}
<div class="doc-list__meta">
<span class="source-cat">${esc(doc.category || '—')}</span>
${langFlag ? `<span>${langFlag}</span>` : ''}
<span class="doc-list__date">${fmtDate(doc.updated_at)}</span>
</div>
</div>
<span class="doc-list__chunks">${fmt(doc.chunk_count)} passages</span>`;
drillDocList.appendChild(item);
});
const loaded = drillState.offset + docs.length;
drillMoreWrap.hidden = loaded >= drillState.total;
drillState.offset = loaded;
})
.catch(() => {
if (!append) drillDocList.innerHTML = '<p class="drill-error">Network error.</p>';
});
}
drillMoreBtn.addEventListener('click', () => fetchDrillPage(true));
drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; });
// ── Search bar ────────────────────────────────────────────────────────────
let searchMode = 'hybrid';
let searchLang = 'en';
document.querySelectorAll('.search-modes .mode-pill').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.search-modes .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchMode = btn.dataset.mode;
});
});
document.querySelectorAll('.lang-pills .mode-pill').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.lang-pills .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchLang = btn.dataset.lang;
});
});
const searchInput = document.getElementById('corpusSearchInput');
const searchBtn = document.getElementById('corpusSearchBtn');
const searchResults = document.getElementById('corpusSearchResults');
function runSearch() {
const q = searchInput.value.trim();
if (q.length < 3) {
searchResults.innerHTML = '<p class="search-hint">Enter at least 3 characters.</p>';
searchResults.hidden = false;
return;
}
searchResults.hidden = false;
searchResults.innerHTML = `<p class="search-loading">Searching in <strong>${esc(searchMode)}</strong> mode…</p>`;
searchBtn.disabled = true;
fetch('/api/corpus-search.php', {
method: 'POST',
credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8 }),
})
.then(r => r.json())
.then(data => {
searchBtn.disabled = false;
if (!data.ok) {
searchResults.innerHTML = `<p class="search-error">Search error: ${esc(data.error?.message || 'Unknown error')}</p>`;
return;
}
const hits = data.hits || [];
if (hits.length === 0) {
searchResults.innerHTML = `<p class="search-empty">No results for <strong>${esc(q)}</strong> in ${esc(data.mode)} mode.</p>`;
return;
}
const modeLabel = { hybrid: 'Hybrid RAG', bm25: 'BM25 keyword', vector: 'Vector semantic' }[data.mode] || data.mode;
let html = `<div class="search-results-header"><span class="eyebrow">${esc(modeLabel)}</span><span class="search-results-count">${hits.length} passage${hits.length !== 1 ? 's' : ''}</span></div>`;
hits.forEach(hit => {
const score = hit.score != null ? `<span class="passage-score">${Math.round(hit.score * 100)}%</span>` : '';
const catAuth = authorityLabels[hit.category] || { label: hit.category || '—', cls: 'badge--muted' };
const titleHtml = hit.source_url
? `<a href="${esc(hit.source_url)}" target="_blank" rel="noopener" class="passage-card__title">${esc(hit.title || '(Untitled)')}</a>`
: `<span class="passage-card__title">${esc(hit.title || '(Untitled)')}</span>`;
const section = hit.section ? `<span class="passage-section">§ ${esc(hit.section)}</span>` : '';
const excerpt = highlight(hit.excerpt || '', q);
html += `
<div class="passage-card">
<div class="passage-card__meta">
<span class="source-badge ${esc(catAuth.cls)}">${esc(catAuth.label)}</span>
${section}
${score}
</div>
${titleHtml}
<p class="passage-card__excerpt">${excerpt}</p>
</div>`;
});
searchResults.innerHTML = html;
})
.catch(err => {
searchBtn.disabled = false;
searchResults.innerHTML = `<p class="search-error">Network error.</p>`;
});
}
searchBtn.addEventListener('click', runSearch);
searchInput.addEventListener('keydown', e => { if (e.key === 'Enter') runSearch(); });
})();
</script>
<?php require_once __DIR__ . '/includes/layout_footer.php'; ?>