2e2dfd7310
- Search: category filter pills scope results to a legal domain
- Search: full chunk text returned; click to expand inline beyond 600-char excerpt
- Drill panel: total count label ("Showing X of Y"), sort dropdown, title filter (300ms debounce)
- URL hash: preserves query/mode/lang/category/drill state for bookmarking
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
887 lines
43 KiB
PHP
887 lines
43 KiB
PHP
<?php
|
||
declare(strict_types=1);
|
||
$toolName = 'corpus';
|
||
$toolTitle = 'Legal Knowledge Base';
|
||
$toolKind = 'Corpus Intelligence';
|
||
$toolBadge = '~220 K passages';
|
||
|
||
ob_start();
|
||
?>
|
||
<div class="reasoning-head">
|
||
<p class="eyebrow">Corpus health</p>
|
||
<h2 id="reasoningTitle">Vector index</h2>
|
||
</div>
|
||
<dl class="corpus-health-dl">
|
||
<dt>Collection</dt>
|
||
<dd><code>bnl_chunks</code></dd>
|
||
<dt>Dimensions</dt>
|
||
<dd>768 (nomic-embed-text)</dd>
|
||
<dt>Similarity</dt>
|
||
<dd>Cosine</dd>
|
||
<dt>RAG strategy</dt>
|
||
<dd>Hybrid vector + keyword<br>Reciprocal rank fusion</dd>
|
||
<dt>Private boost</dt>
|
||
<dd>1.5×</dd>
|
||
<dt>Temporal mode</dt>
|
||
<dd>legal_conservative</dd>
|
||
<dt>Chunk target</dt>
|
||
<dd>600 words · 75 overlap</dd>
|
||
<dt>Vector DB</dt>
|
||
<dd>Qdrant on Colin Docker<br><code>10.0.2.10:6333</code></dd>
|
||
<dt>Hybrid search</dt>
|
||
<dd>Azure AI Search<br><code>bnl-legal-search</code><br>West Europe · Basic SKU</dd>
|
||
</dl>
|
||
<?php
|
||
$reasoningPanelOverride = ob_get_clean();
|
||
|
||
require_once __DIR__ . '/includes/layout.php';
|
||
?>
|
||
|
||
<!-- STATS BAR -->
|
||
<div class="corpus-stats-bar" id="corpusStatsBar">
|
||
<div class="corpus-stat" id="statChunks">
|
||
<span class="corpus-stat__value is-loading">—</span>
|
||
<span class="corpus-stat__label">Indexed passages</span>
|
||
</div>
|
||
<div class="corpus-stat" id="statDocs">
|
||
<span class="corpus-stat__value is-loading">—</span>
|
||
<span class="corpus-stat__label">Source documents</span>
|
||
</div>
|
||
<div class="corpus-stat" id="statSources">
|
||
<span class="corpus-stat__value is-loading">—</span>
|
||
<span class="corpus-stat__label">Active scrapers</span>
|
||
</div>
|
||
<div class="corpus-stat" id="statUpdated">
|
||
<span class="corpus-stat__value is-loading">—</span>
|
||
<span class="corpus-stat__label">Last ingested</span>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- CORPUS SEARCH -->
|
||
<div class="corpus-search-box">
|
||
<div class="corpus-search-row">
|
||
<input type="search" id="corpusSearchInput" class="corpus-search-input"
|
||
placeholder="Search 220 K passages — try «samvær», «arbeidsgiver», «barnevernloven»…"
|
||
autocomplete="off" spellcheck="false">
|
||
<button id="corpusSearchBtn" class="primary-button" type="button">Search</button>
|
||
</div>
|
||
<div class="corpus-search-controls">
|
||
<div class="search-modes" role="group" aria-label="Search mode">
|
||
<button class="mode-pill is-active" data-mode="hybrid" type="button">Hybrid</button>
|
||
<button class="mode-pill" data-mode="bm25" type="button">BM25</button>
|
||
<button class="mode-pill" data-mode="vector" type="button">Vector</button>
|
||
</div>
|
||
<div class="lang-pills" role="group" aria-label="Language">
|
||
<button class="mode-pill is-active" data-lang="en" type="button">EN</button>
|
||
<button class="mode-pill" data-lang="no" type="button">NO</button>
|
||
<button class="mode-pill" data-lang="uk" type="button">UK</button>
|
||
<button class="mode-pill" data-lang="pl" type="button">PL</button>
|
||
</div>
|
||
</div>
|
||
<div class="search-cats" role="group" aria-label="Category filter" id="searchCatPills">
|
||
<button class="mode-pill is-active" data-cat="" type="button">All</button>
|
||
<button class="mode-pill" data-cat="family-law" type="button">Family Law</button>
|
||
<button class="mode-pill" data-cat="child-welfare" type="button">Child Welfare</button>
|
||
<button class="mode-pill" data-cat="labour-law" type="button">Labour Law</button>
|
||
<button class="mode-pill" data-cat="social-welfare" type="button">Social Welfare</button>
|
||
<button class="mode-pill" data-cat="tax-law" type="button">Tax Law</button>
|
||
<button class="mode-pill" data-cat="administrative-law" type="button">Administrative</button>
|
||
<button class="mode-pill" data-cat="consumer-law" type="button">Consumer</button>
|
||
<button class="mode-pill" data-cat="immigration-law" type="button">Immigration</button>
|
||
<button class="mode-pill" data-cat="government-documents" type="button">Gov Docs</button>
|
||
</div>
|
||
</div>
|
||
<div id="corpusSearchResults" class="corpus-search-results" hidden></div>
|
||
|
||
<!-- COVERAGE -->
|
||
<div class="corpus-section">
|
||
<p class="eyebrow">Coverage</p>
|
||
<h3 class="corpus-section__title">Legal categories</h3>
|
||
<div class="corpus-categories" id="corpusCategories">
|
||
<div class="category-card" data-category="family-law">
|
||
<div class="category-card__top">
|
||
<span class="category-card__icon" aria-hidden="true">⚖</span>
|
||
<span class="category-card__count is-loading" id="cat-family-law">—</span>
|
||
</div>
|
||
<h4>Family Law</h4>
|
||
<p>Barneloven, child custody (foreldreansvar), samvær, mediation (mekling), separation and divorce proceedings.</p>
|
||
<button class="cat-browse-btn" data-cat="family-law" type="button">Browse docs →</button>
|
||
</div>
|
||
<div class="category-card" data-category="child-welfare">
|
||
<div class="category-card__top">
|
||
<span class="category-card__icon" aria-hidden="true">🧒</span>
|
||
<span class="category-card__count is-loading" id="cat-child-welfare">—</span>
|
||
</div>
|
||
<h4>Child Welfare</h4>
|
||
<p>Barnevernloven, omsorgsovertakelse, emergency care orders, foster placement, CPS (barnevernet) case law.</p>
|
||
<button class="cat-browse-btn" data-cat="child-welfare" type="button">Browse docs →</button>
|
||
</div>
|
||
<div class="category-card" data-category="labour-law">
|
||
<div class="category-card__top">
|
||
<span class="category-card__icon" aria-hidden="true">🏭</span>
|
||
<span class="category-card__count is-loading" id="cat-labour-law">—</span>
|
||
</div>
|
||
<h4>Labour Law</h4>
|
||
<p>Arbeidsmiljøloven, collective agreements (tariffavtaler), Arbeidsretten rulings, dismissal, sick leave obligations.</p>
|
||
<button class="cat-browse-btn" data-cat="labour-law" type="button">Browse docs →</button>
|
||
</div>
|
||
<div class="category-card" data-category="social-welfare">
|
||
<div class="category-card__top">
|
||
<span class="category-card__icon" aria-hidden="true">🛡</span>
|
||
<span class="category-card__count is-loading" id="cat-social-welfare">—</span>
|
||
</div>
|
||
<h4>Social Welfare</h4>
|
||
<p>NAV guidance on sykepenger, dagpenger, AAP, uføretrygd, alderspensjon, yrkesskade and social assistance.</p>
|
||
<button class="cat-browse-btn" data-cat="social-welfare" type="button">Browse docs →</button>
|
||
</div>
|
||
<div class="category-card" data-category="tax-law">
|
||
<div class="category-card__top">
|
||
<span class="category-card__icon" aria-hidden="true">📊</span>
|
||
<span class="category-card__count is-loading" id="cat-tax-law">—</span>
|
||
</div>
|
||
<h4>Tax Law</h4>
|
||
<p>Skatteetaten's Skatte-ABC, binding advance rulings (BFU), Skatteklagenemnda decisions, income and capital tax.</p>
|
||
<button class="cat-browse-btn" data-cat="tax-law" type="button">Browse docs →</button>
|
||
</div>
|
||
<div class="category-card" data-category="administrative-law">
|
||
<div class="category-card__top">
|
||
<span class="category-card__icon" aria-hidden="true">🏛</span>
|
||
<span class="category-card__count is-loading" id="cat-administrative-law">—</span>
|
||
</div>
|
||
<h4>Administrative Law</h4>
|
||
<p>Sivilombudet reports, Forvaltningsloven, procedural rights, official complaints, Stortinget oversight.</p>
|
||
<button class="cat-browse-btn" data-cat="administrative-law" type="button">Browse docs →</button>
|
||
</div>
|
||
<div class="category-card" data-category="consumer-law">
|
||
<div class="category-card__top">
|
||
<span class="category-card__icon" aria-hidden="true">🏠</span>
|
||
<span class="category-card__count is-loading" id="cat-consumer-law">—</span>
|
||
</div>
|
||
<h4>Consumer & Housing</h4>
|
||
<p>HTU (rental disputes), Finansklagenemnda, Forbrukertilsynet, Forbrukerrådet, Pakkereisenemnda decisions.</p>
|
||
<button class="cat-browse-btn" data-cat="consumer-law" type="button">Browse docs →</button>
|
||
</div>
|
||
<div class="category-card" data-category="immigration-law">
|
||
<div class="category-card__top">
|
||
<span class="category-card__icon" aria-hidden="true">🌐</span>
|
||
<span class="category-card__count is-loading" id="cat-immigration-law">—</span>
|
||
</div>
|
||
<h4>Immigration & International</h4>
|
||
<p>UNE (Utlendingsnemnda) decisions, ECHR Art. 8 family rights, EMD case law, Hague Convention (cross-border child abduction).</p>
|
||
<button class="cat-browse-btn" data-cat="immigration-law" type="button">Browse docs →</button>
|
||
</div>
|
||
<div class="category-card" data-category="government-documents">
|
||
<div class="category-card__top">
|
||
<span class="category-card__icon" aria-hidden="true">📄</span>
|
||
<span class="category-card__count is-loading" id="cat-government-documents">—</span>
|
||
</div>
|
||
<h4>Government Documents</h4>
|
||
<p>NOUer, Stortingsmeldinger, government white papers and regulatory guidance from Regjeringen.no.</p>
|
||
<button class="cat-browse-btn" data-cat="government-documents" type="button">Browse docs →</button>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- DRILL-DOWN PANEL -->
|
||
<div id="corpusDrillPanel" class="corpus-drill-panel" hidden>
|
||
<div class="drill-header">
|
||
<div>
|
||
<p class="eyebrow" id="drillEyebrow">Category</p>
|
||
<h3 id="drillTitle">Documents</h3>
|
||
</div>
|
||
<button class="drill-close-btn" id="drillCloseBtn" type="button" aria-label="Close">✕</button>
|
||
</div>
|
||
<div class="drill-controls" id="drillControls">
|
||
<span class="drill-count" id="drillCount"></span>
|
||
<div class="drill-controls-right">
|
||
<input type="search" id="drillSearchInput" class="drill-search-input"
|
||
placeholder="Filter by title…" autocomplete="off">
|
||
<select id="drillSortSelect" class="drill-sort-select">
|
||
<option value="newest">Newest first</option>
|
||
<option value="oldest">Oldest first</option>
|
||
<option value="alpha">A–Z</option>
|
||
<option value="chunks">Most passages</option>
|
||
</select>
|
||
</div>
|
||
</div>
|
||
<div id="drillDocList" class="doc-list"></div>
|
||
<div class="doc-list__more-wrap" id="drillMoreWrap" hidden>
|
||
<button class="doc-list__more" id="drillMoreBtn" type="button">Load more</button>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- SOURCES TABLE -->
|
||
<div class="corpus-section">
|
||
<p class="eyebrow">Data sources</p>
|
||
<h3 class="corpus-section__title">Active scrapers</h3>
|
||
<div class="corpus-table-wrap">
|
||
<table class="sources-table" id="sourcesTable">
|
||
<thead>
|
||
<tr>
|
||
<th></th>
|
||
<th>Source</th>
|
||
<th>Type</th>
|
||
<th>Category</th>
|
||
<th>Lang</th>
|
||
<th>Schedule</th>
|
||
<th>Status</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody id="sourcesTableBody">
|
||
<tr class="sources-skeleton"><td colspan="7">Loading sources…</td></tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- AI STACK -->
|
||
<div class="corpus-section">
|
||
<p class="eyebrow">Software</p>
|
||
<h3 class="corpus-section__title">AI stack</h3>
|
||
<div class="stack-grid">
|
||
|
||
<div class="stack-card">
|
||
<h3>Reasoning LLMs</h3>
|
||
<ul class="stack-list">
|
||
<li><span class="stack-badge stack-badge--azure">Azure</span> <strong>gpt-4o-mini</strong> <span class="stack-star">★ default</span> — fast, cost-efficient</li>
|
||
<li><span class="stack-badge stack-badge--azure">Azure</span> <strong>gpt-4o</strong> — highest quality</li>
|
||
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>qwen2.5:14b</strong> — local, private</li>
|
||
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>qwen3:14b</strong> — reasoning mode</li>
|
||
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>dbn-legal-agent</strong> — Norwegian law fine-tune (QLoRA on qwen2.5:7b, NorwAI-24B distillation)</li>
|
||
</ul>
|
||
<p class="stack-note">All routed via LiteLLM on Colin · <code>10.0.1.10:4000</code></p>
|
||
</div>
|
||
|
||
<div class="stack-card">
|
||
<h3>Transcription</h3>
|
||
<ul class="stack-list">
|
||
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>Whisper large-v3</strong> <span class="stack-star">★ primary</span><br>Cuttlefish · RTX 3060 12 GB VRAM</li>
|
||
<li><span class="stack-badge stack-badge--api">API</span> OpenAI Whisper API</li>
|
||
<li><span class="stack-badge stack-badge--azure">Azure</span> AI Speech <code>nb-NO</code> (Norway East)</li>
|
||
</ul>
|
||
<p class="stack-note">Speaker diarization · VAD silence filter · beam size 5 · vocabulary presets (barnerett, mediation)</p>
|
||
</div>
|
||
|
||
<div class="stack-card">
|
||
<h3>Embeddings</h3>
|
||
<ul class="stack-list">
|
||
<li><strong>nomic-embed-text</strong> — 768-dim dense vectors</li>
|
||
<li>Ollama on Chloe <code>10.0.1.11:11434</code></li>
|
||
<li>Cosine similarity in Qdrant</li>
|
||
</ul>
|
||
<p class="stack-note">All documents chunked and embedded before indexing; chunks stored in both Qdrant (vector) and MariaDB (keyword fallback)</p>
|
||
</div>
|
||
|
||
<div class="stack-card">
|
||
<h3>Vector & Hybrid Search</h3>
|
||
<ul class="stack-list">
|
||
<li><strong>Qdrant</strong> <code>bnl_chunks</code> · ~220 K vectors<br>Colin Docker · <code>10.0.2.10:6333</code></li>
|
||
<li><strong>Azure AI Search</strong> <code>bnl-legal-search</code><br>Basic SKU · West Europe · hybrid keyword + semantic</li>
|
||
<li>Reciprocal rank fusion (vector + keyword)</li>
|
||
<li>Private corpus boosted 1.5×</li>
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="stack-card">
|
||
<h3>Chunking pipeline</h3>
|
||
<ul class="stack-list">
|
||
<li>Heading-aware semantic splitting</li>
|
||
<li>600-word target · 75-word overlap</li>
|
||
<li>50-word minimum chunk</li>
|
||
<li>SHA-256 deduplication</li>
|
||
<li>PDF, DOCX, HTML text extraction</li>
|
||
<li>Temporal metadata (valid_from / valid_until)</li>
|
||
</ul>
|
||
<p class="stack-note">Legal temporal reranking: <code>legal_conservative</code> — surfaces current versions first</p>
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
|
||
<!-- DATA PIPELINE -->
|
||
<div class="corpus-section">
|
||
<p class="eyebrow">How it works</p>
|
||
<h3 class="corpus-section__title">Ingestion pipeline</h3>
|
||
<div class="pipeline-flow" role="list" aria-label="Data pipeline steps">
|
||
<div class="pipeline-step" role="listitem">
|
||
<span class="pipeline-step__icon">🌐</span>
|
||
<span>Source</span>
|
||
<small>gov websites, APIs, PDFs</small>
|
||
</div>
|
||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||
<div class="pipeline-step" role="listitem">
|
||
<span class="pipeline-step__icon">🕷</span>
|
||
<span>Scraper</span>
|
||
<small>HTTP / API / PDF</small>
|
||
</div>
|
||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||
<div class="pipeline-step" role="listitem">
|
||
<span class="pipeline-step__icon">📝</span>
|
||
<span>Text extract</span>
|
||
<small>PDF, DOCX, HTML</small>
|
||
</div>
|
||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||
<div class="pipeline-step" role="listitem">
|
||
<span class="pipeline-step__icon">✂</span>
|
||
<span>TextChunker</span>
|
||
<small>600w · 75w overlap</small>
|
||
</div>
|
||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||
<div class="pipeline-step" role="listitem">
|
||
<span class="pipeline-step__icon">🔢</span>
|
||
<span>Embed</span>
|
||
<small>nomic · 768-dim</small>
|
||
</div>
|
||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||
<div class="pipeline-step" role="listitem">
|
||
<span class="pipeline-step__icon">⚡</span>
|
||
<span>Qdrant</span>
|
||
<small>cosine upsert</small>
|
||
</div>
|
||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||
<div class="pipeline-step" role="listitem">
|
||
<span class="pipeline-step__icon">🤖</span>
|
||
<span>LiteLLM</span>
|
||
<small>RAG + LLM</small>
|
||
</div>
|
||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||
<div class="pipeline-step pipeline-step--end" role="listitem">
|
||
<span class="pipeline-step__icon">🔍</span>
|
||
<span>Your tool</span>
|
||
<small>Ask, Search, Research…</small>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<script>
|
||
(function () {
|
||
'use strict';
|
||
|
||
// ── Utilities ────────────────────────────────────────────────────────────
|
||
function esc(s) {
|
||
return String(s ?? '').replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>').replace(/"/g,'"');
|
||
}
|
||
|
||
function fmt(n) {
|
||
if (n === null || n === undefined) return '—';
|
||
return Number(n).toLocaleString('en');
|
||
}
|
||
|
||
function fmtDate(s) {
|
||
if (!s) return '—';
|
||
try {
|
||
const d = new Date(s);
|
||
return d.toLocaleDateString('en-GB', { day: 'numeric', month: 'short', year: 'numeric' });
|
||
} catch (e) { return s; }
|
||
}
|
||
|
||
function highlight(text, query) {
|
||
if (!query) return esc(text);
|
||
const safe = esc(text);
|
||
const safeQ = query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||
return safe.replace(new RegExp(safeQ.replace(/&/g,'&').replace(/</g,'<'), 'gi'),
|
||
m => '<mark>' + esc(m) + '</mark>');
|
||
}
|
||
|
||
function setLoaded(el) { el.classList.remove('is-loading'); }
|
||
|
||
// ── Authority / schedule label maps ─────────────────────────────────────
|
||
const authorityLabels = {
|
||
case_law: { label: 'Case law', cls: 'badge--teal' },
|
||
guidance: { label: 'Guidance', cls: 'badge--amber' },
|
||
report: { label: 'Report', cls: 'badge--muted' },
|
||
ombudsman: { label: 'Ombudsman', cls: 'badge--muted' },
|
||
tribunal: { label: 'Tribunal', cls: 'badge--coral' },
|
||
regulatory: { label: 'Regulatory', cls: 'badge--coral' },
|
||
law: { label: 'Statute', cls: 'badge--teal' },
|
||
treaty: { label: 'Treaty', cls: 'badge--muted' },
|
||
};
|
||
|
||
const scheduleLabels = {
|
||
daily: 'Daily', weekly: 'Weekly', monthly: 'Monthly', manual: 'Manual',
|
||
};
|
||
|
||
const catIds = {
|
||
'family-law': 'cat-family-law',
|
||
'family_law': 'cat-family-law',
|
||
'child-welfare': 'cat-child-welfare',
|
||
'child_welfare': 'cat-child-welfare',
|
||
'labour-law': 'cat-labour-law',
|
||
'labour_law': 'cat-labour-law',
|
||
'social-welfare': 'cat-social-welfare',
|
||
'social_welfare': 'cat-social-welfare',
|
||
'tax-law': 'cat-tax-law',
|
||
'tax_law': 'cat-tax-law',
|
||
'administrative-law': 'cat-administrative-law',
|
||
'administrative_law': 'cat-administrative-law',
|
||
'consumer-law': 'cat-consumer-law',
|
||
'consumer_law': 'cat-consumer-law',
|
||
'tenancy-law': 'cat-consumer-law',
|
||
'financial-law': 'cat-consumer-law',
|
||
'immigration-law': 'cat-immigration-law',
|
||
'immigration_law': 'cat-immigration-law',
|
||
'government-documents':'cat-government-documents',
|
||
'government_documents':'cat-government-documents',
|
||
'case-law': 'cat-administrative-law',
|
||
'victim-compensation': 'cat-administrative-law',
|
||
'procurement-law': 'cat-administrative-law',
|
||
};
|
||
|
||
const catLabels = {
|
||
'family-law': 'Family Law',
|
||
'child-welfare': 'Child Welfare',
|
||
'labour-law': 'Labour Law',
|
||
'social-welfare': 'Social Welfare',
|
||
'tax-law': 'Tax Law',
|
||
'administrative-law': 'Administrative Law',
|
||
'consumer-law': 'Consumer & Housing',
|
||
'immigration-law': 'Immigration & International',
|
||
'government-documents': 'Government Documents',
|
||
};
|
||
|
||
// ── STATS + SOURCES table load ───────────────────────────────────────────
|
||
let cachedSources = [];
|
||
|
||
fetch('/api/corpus-stats.php', { credentials: 'same-origin' })
|
||
.then(r => r.json())
|
||
.then(data => {
|
||
if (!data.ok) return;
|
||
const s = data.stats;
|
||
|
||
const elChunks = document.querySelector('#statChunks .corpus-stat__value');
|
||
const elDocs = document.querySelector('#statDocs .corpus-stat__value');
|
||
const elSrc = document.querySelector('#statSources .corpus-stat__value');
|
||
const elUpd = document.querySelector('#statUpdated .corpus-stat__value');
|
||
|
||
if (elChunks) { elChunks.textContent = fmt(s.total_chunks); setLoaded(elChunks); }
|
||
if (elDocs) { elDocs.textContent = fmt(s.total_docs); setLoaded(elDocs); }
|
||
if (elSrc) { elSrc.textContent = fmt(s.active_sources); setLoaded(elSrc); }
|
||
if (elUpd) { elUpd.textContent = fmtDate(s.last_updated); setLoaded(elUpd); }
|
||
|
||
(s.by_category || []).forEach(row => {
|
||
const elId = catIds[row.category];
|
||
if (!elId) return;
|
||
const el = document.getElementById(elId);
|
||
if (!el) return;
|
||
const cur = parseInt(el.textContent, 10) || 0;
|
||
el.textContent = fmt(cur + parseInt(row.doc_count, 10));
|
||
setLoaded(el);
|
||
});
|
||
document.querySelectorAll('.category-card__count.is-loading').forEach(el => {
|
||
el.textContent = '0'; setLoaded(el);
|
||
});
|
||
|
||
// Sources table
|
||
cachedSources = data.sources || [];
|
||
renderSourcesTable(cachedSources);
|
||
})
|
||
.catch(() => {
|
||
document.querySelectorAll('.corpus-stat__value').forEach(el => {
|
||
el.textContent = '—'; el.classList.remove('is-loading');
|
||
});
|
||
});
|
||
|
||
// ── Sources table rendering ───────────────────────────────────────────────
|
||
function renderSourcesTable(sources) {
|
||
const tbody = document.getElementById('sourcesTableBody');
|
||
if (!tbody) return;
|
||
tbody.innerHTML = '';
|
||
sources.forEach((src, idx) => {
|
||
const auth = authorityLabels[src.authority_type] || { label: src.authority_type || '—', cls: 'badge--muted' };
|
||
const sched = scheduleLabels[src.schedule] || (src.schedule || 'Manual');
|
||
const langFlag = src.language === 'no' ? '🇳🇴' : src.language === 'en' ? '🇬🇧' : (src.language || '—');
|
||
const statusHtml = src.is_active
|
||
? '<span class="status-active">● Active</span>'
|
||
: '<span class="status-inactive">○ Inactive</span>';
|
||
const nameHtml = src.url
|
||
? `<a href="${esc(src.url)}" target="_blank" rel="noopener">${esc(src.name)}</a>`
|
||
: esc(src.name);
|
||
|
||
const tr = document.createElement('tr');
|
||
tr.dataset.idx = idx;
|
||
tr.innerHTML = `
|
||
<td class="source-expand-cell">
|
||
<button class="source-expand-btn" type="button" aria-expanded="false" aria-label="Expand ${esc(src.name)}">▶</button>
|
||
</td>
|
||
<td class="source-name">${nameHtml}</td>
|
||
<td><span class="source-badge ${esc(auth.cls)}">${esc(auth.label)}</span></td>
|
||
<td><span class="source-cat">${esc(src.category || '—')}</span></td>
|
||
<td>${langFlag}</td>
|
||
<td>${esc(sched)}</td>
|
||
<td>${statusHtml}</td>`;
|
||
tbody.appendChild(tr);
|
||
|
||
// Expand row (hidden)
|
||
const expandTr = document.createElement('tr');
|
||
expandTr.className = 'source-expand-row';
|
||
expandTr.hidden = true;
|
||
expandTr.dataset.name = src.name;
|
||
expandTr.innerHTML = `<td colspan="7"><div class="source-expand-inner" id="source-expand-${idx}">
|
||
<div class="source-expand-loading">Loading…</div></div></td>`;
|
||
tbody.appendChild(expandTr);
|
||
|
||
// Toggle handler
|
||
tr.querySelector('.source-expand-btn').addEventListener('click', function () {
|
||
const isOpen = expandTr.hidden === false;
|
||
if (isOpen) {
|
||
expandTr.hidden = true;
|
||
this.textContent = '▶';
|
||
this.setAttribute('aria-expanded', 'false');
|
||
} else {
|
||
expandTr.hidden = false;
|
||
this.textContent = '▼';
|
||
this.setAttribute('aria-expanded', 'true');
|
||
loadSourceExpand(idx, src, `source-expand-${idx}`);
|
||
}
|
||
});
|
||
});
|
||
}
|
||
|
||
function loadSourceExpand(idx, src, containerId) {
|
||
const container = document.getElementById(containerId);
|
||
if (!container || container.dataset.loaded) return;
|
||
container.dataset.loaded = '1';
|
||
|
||
// Fetch doc count for this source
|
||
const qs = new URLSearchParams({ source_name: src.name, limit: 1 });
|
||
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
|
||
.then(r => r.json())
|
||
.then(data => {
|
||
const total = data.ok ? data.total : '?';
|
||
container.innerHTML = `
|
||
<div class="source-expand-grid">
|
||
<div>
|
||
<dl class="source-expand-dl">
|
||
<dt>Scraper class</dt>
|
||
<dd><code>${esc(src.scraper_class || '—')}</code></dd>
|
||
<dt>Category</dt>
|
||
<dd>${esc(src.category || '—')}</dd>
|
||
<dt>Authority type</dt>
|
||
<dd>${esc(src.authority_type || '—')}</dd>
|
||
<dt>Language</dt>
|
||
<dd>${src.language === 'no' ? '🇳🇴 Norwegian' : src.language === 'en' ? '🇬🇧 English' : esc(src.language || '—')}</dd>
|
||
<dt>Update schedule</dt>
|
||
<dd>${esc(scheduleLabels[src.schedule] || src.schedule || '—')}</dd>
|
||
<dt>Documents indexed</dt>
|
||
<dd><strong>${fmt(total)}</strong></dd>
|
||
</dl>
|
||
</div>
|
||
<div>
|
||
${src.url ? `<p class="source-expand-url"><a href="${esc(src.url)}" target="_blank" rel="noopener">${esc(src.url)}</a></p>` : ''}
|
||
${total > 0 ? `<button class="doc-list__more source-browse-btn" data-source="${esc(src.name)}" type="button">Browse ${fmt(total)} documents →</button>` : ''}
|
||
</div>
|
||
</div>`;
|
||
container.querySelectorAll('.source-browse-btn').forEach(btn => {
|
||
btn.addEventListener('click', () => openDrillBySource(src.name));
|
||
});
|
||
})
|
||
.catch(() => {
|
||
container.innerHTML = `<p class="source-expand-error">Could not load source details.</p>`;
|
||
});
|
||
}
|
||
|
||
// ── Category drill-down ───────────────────────────────────────────────────
|
||
let drillState = { category: null, sourceName: null, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
|
||
const drillPanel = document.getElementById('corpusDrillPanel');
|
||
const drillDocList = document.getElementById('drillDocList');
|
||
const drillTitle = document.getElementById('drillTitle');
|
||
const drillEyebrow = document.getElementById('drillEyebrow');
|
||
const drillMoreWrap = document.getElementById('drillMoreWrap');
|
||
const drillMoreBtn = document.getElementById('drillMoreBtn');
|
||
const drillCloseBtn = document.getElementById('drillCloseBtn');
|
||
const drillCount = document.getElementById('drillCount');
|
||
const drillSortSelect = document.getElementById('drillSortSelect');
|
||
const drillSearchInput = document.getElementById('drillSearchInput');
|
||
|
||
document.querySelectorAll('.cat-browse-btn').forEach(btn => {
|
||
btn.addEventListener('click', () => openDrillByCategory(btn.dataset.cat));
|
||
});
|
||
|
||
function openDrillByCategory(cat) {
|
||
drillState = { category: cat, sourceName: null, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
|
||
drillEyebrow.textContent = 'Category';
|
||
drillTitle.textContent = catLabels[cat] || cat;
|
||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||
drillMoreWrap.hidden = true;
|
||
drillCount.textContent = '';
|
||
if (drillSortSelect) drillSortSelect.value = 'newest';
|
||
if (drillSearchInput) drillSearchInput.value = '';
|
||
drillPanel.hidden = false;
|
||
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||
pushHash();
|
||
fetchDrillPage(false);
|
||
}
|
||
|
||
function openDrillBySource(sourceName) {
|
||
drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
|
||
drillEyebrow.textContent = 'Source';
|
||
drillTitle.textContent = sourceName;
|
||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||
drillMoreWrap.hidden = true;
|
||
drillCount.textContent = '';
|
||
if (drillSortSelect) drillSortSelect.value = 'newest';
|
||
if (drillSearchInput) drillSearchInput.value = '';
|
||
drillPanel.hidden = false;
|
||
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||
pushHash();
|
||
fetchDrillPage(false);
|
||
}
|
||
|
||
function fetchDrillPage(append) {
|
||
const qs = new URLSearchParams({ offset: drillState.offset, limit: drillState.limit });
|
||
if (drillState.category) qs.set('category', drillState.category);
|
||
if (drillState.sourceName) qs.set('source_name', drillState.sourceName);
|
||
if (drillState.sort) qs.set('sort', drillState.sort);
|
||
if (drillState.titleFilter) qs.set('title', drillState.titleFilter);
|
||
|
||
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
|
||
.then(r => r.json())
|
||
.then(data => {
|
||
if (!data.ok) {
|
||
if (!append) drillDocList.innerHTML = '<p class="drill-error">Could not load documents.</p>';
|
||
return;
|
||
}
|
||
drillState.total = data.total;
|
||
const docs = data.documents || [];
|
||
|
||
if (!append) drillDocList.innerHTML = '';
|
||
|
||
if (docs.length === 0 && !append) {
|
||
drillDocList.innerHTML = '<p class="drill-empty">No documents found in this category.</p>';
|
||
drillMoreWrap.hidden = true;
|
||
return;
|
||
}
|
||
|
||
docs.forEach(doc => {
|
||
const item = document.createElement('div');
|
||
item.className = 'doc-list__item';
|
||
const titleHtml = doc.source_url
|
||
? `<a href="${esc(doc.source_url)}" target="_blank" rel="noopener" class="doc-list__title">${esc(doc.title || '(Untitled)')}</a>`
|
||
: `<span class="doc-list__title">${esc(doc.title || '(Untitled)')}</span>`;
|
||
const langFlag = doc.language === 'no' ? '🇳🇴' : doc.language === 'en' ? '🇬🇧' : '';
|
||
item.innerHTML = `
|
||
<div class="doc-list__info">
|
||
${titleHtml}
|
||
<div class="doc-list__meta">
|
||
<span class="source-cat">${esc(doc.category || '—')}</span>
|
||
${langFlag ? `<span>${langFlag}</span>` : ''}
|
||
<span class="doc-list__date">${fmtDate(doc.updated_at)}</span>
|
||
</div>
|
||
</div>
|
||
<span class="doc-list__chunks">${fmt(doc.chunk_count)} passages</span>`;
|
||
drillDocList.appendChild(item);
|
||
});
|
||
|
||
const loaded = drillState.offset + docs.length;
|
||
drillMoreWrap.hidden = loaded >= drillState.total;
|
||
drillState.offset = loaded;
|
||
if (drillCount) {
|
||
drillCount.textContent = drillState.total > 0
|
||
? 'Showing ' + fmt(loaded) + ' of ' + fmt(drillState.total) + ' documents'
|
||
: '';
|
||
}
|
||
})
|
||
.catch(() => {
|
||
if (!append) drillDocList.innerHTML = '<p class="drill-error">Network error.</p>';
|
||
});
|
||
}
|
||
|
||
drillMoreBtn.addEventListener('click', () => fetchDrillPage(true));
|
||
drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; pushHash(); });
|
||
|
||
if (drillSortSelect) {
|
||
drillSortSelect.addEventListener('change', () => {
|
||
drillState.sort = drillSortSelect.value;
|
||
drillState.offset = 0;
|
||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||
drillCount.textContent = '';
|
||
fetchDrillPage(false);
|
||
});
|
||
}
|
||
|
||
let drillFilterTimer = null;
|
||
if (drillSearchInput) {
|
||
drillSearchInput.addEventListener('input', () => {
|
||
clearTimeout(drillFilterTimer);
|
||
drillFilterTimer = setTimeout(() => {
|
||
drillState.titleFilter = drillSearchInput.value.trim();
|
||
drillState.offset = 0;
|
||
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
|
||
drillCount.textContent = '';
|
||
fetchDrillPage(false);
|
||
}, 300);
|
||
});
|
||
}
|
||
|
||
// ── Search bar ────────────────────────────────────────────────────────────
|
||
let searchMode = 'hybrid';
|
||
let searchLang = 'en';
|
||
let searchCat = '';
|
||
|
||
document.querySelectorAll('.search-modes .mode-pill').forEach(btn => {
|
||
btn.addEventListener('click', () => {
|
||
document.querySelectorAll('.search-modes .mode-pill').forEach(b => b.classList.remove('is-active'));
|
||
btn.classList.add('is-active');
|
||
searchMode = btn.dataset.mode;
|
||
pushHash();
|
||
});
|
||
});
|
||
|
||
document.querySelectorAll('.lang-pills .mode-pill').forEach(btn => {
|
||
btn.addEventListener('click', () => {
|
||
document.querySelectorAll('.lang-pills .mode-pill').forEach(b => b.classList.remove('is-active'));
|
||
btn.classList.add('is-active');
|
||
searchLang = btn.dataset.lang;
|
||
pushHash();
|
||
});
|
||
});
|
||
|
||
document.querySelectorAll('#searchCatPills .mode-pill').forEach(btn => {
|
||
btn.addEventListener('click', () => {
|
||
document.querySelectorAll('#searchCatPills .mode-pill').forEach(b => b.classList.remove('is-active'));
|
||
btn.classList.add('is-active');
|
||
searchCat = btn.dataset.cat;
|
||
pushHash();
|
||
});
|
||
});
|
||
|
||
const searchInput = document.getElementById('corpusSearchInput');
|
||
const searchBtn = document.getElementById('corpusSearchBtn');
|
||
const searchResults = document.getElementById('corpusSearchResults');
|
||
|
||
function runSearch() {
|
||
const q = searchInput.value.trim();
|
||
if (q.length < 3) {
|
||
searchResults.innerHTML = '<p class="search-hint">Enter at least 3 characters.</p>';
|
||
searchResults.hidden = false;
|
||
return;
|
||
}
|
||
|
||
pushHash();
|
||
searchResults.hidden = false;
|
||
searchResults.innerHTML = `<p class="search-loading">Searching in <strong>${esc(searchMode)}</strong> mode…</p>`;
|
||
searchBtn.disabled = true;
|
||
|
||
fetch('/api/corpus-search.php', {
|
||
method: 'POST',
|
||
credentials: 'same-origin',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8, category: searchCat || null }),
|
||
})
|
||
.then(r => r.json())
|
||
.then(data => {
|
||
searchBtn.disabled = false;
|
||
if (!data.ok) {
|
||
searchResults.innerHTML = `<p class="search-error">Search error: ${esc(data.error?.message || 'Unknown error')}</p>`;
|
||
return;
|
||
}
|
||
const hits = data.hits || [];
|
||
if (hits.length === 0) {
|
||
searchResults.innerHTML = `<p class="search-empty">No results for <strong>${esc(q)}</strong> in ${esc(data.mode)} mode.</p>`;
|
||
return;
|
||
}
|
||
|
||
const modeLabel = { hybrid: 'Hybrid RAG', bm25: 'BM25 keyword', vector: 'Vector semantic' }[data.mode] || data.mode;
|
||
const header = document.createElement('div');
|
||
header.className = 'search-results-header';
|
||
header.innerHTML = `<span class="eyebrow">${esc(modeLabel)}</span><span class="search-results-count">${hits.length} passage${hits.length !== 1 ? 's' : ''}</span>`;
|
||
searchResults.innerHTML = '';
|
||
searchResults.appendChild(header);
|
||
|
||
hits.forEach(hit => {
|
||
const score = hit.score != null ? `<span class="passage-score">${Math.round(hit.score * 100)}%</span>` : '';
|
||
const catAuth = authorityLabels[hit.category] || { label: hit.category || '—', cls: 'badge--muted' };
|
||
const titleHtml = hit.source_url
|
||
? `<a href="${esc(hit.source_url)}" target="_blank" rel="noopener" class="passage-card__title">${esc(hit.title || '(Untitled)')}</a>`
|
||
: `<span class="passage-card__title">${esc(hit.title || '(Untitled)')}</span>`;
|
||
const section = hit.section ? `<span class="passage-section">§ ${esc(hit.section)}</span>` : '';
|
||
const excerpt = highlight(hit.excerpt || '', q);
|
||
const fullText = (hit.full_text || '').trim();
|
||
const hasMore = fullText.length > (hit.excerpt || '').length;
|
||
|
||
const card = document.createElement('div');
|
||
card.className = 'passage-card';
|
||
card.innerHTML = `
|
||
<div class="passage-card__meta">
|
||
<span class="source-badge ${esc(catAuth.cls)}">${esc(catAuth.label)}</span>
|
||
${section}
|
||
${score}
|
||
</div>
|
||
${titleHtml}
|
||
<p class="passage-card__excerpt">${excerpt}</p>
|
||
${hasMore ? '<button class="passage-expand-btn" type="button" aria-expanded="false">Show full passage</button><div class="passage-full-text" hidden></div>' : ''}`;
|
||
|
||
if (hasMore) {
|
||
const expandBtn = card.querySelector('.passage-expand-btn');
|
||
const fullDiv = card.querySelector('.passage-full-text');
|
||
fullDiv.innerHTML = esc(fullText).replace(/\n/g, '<br>');
|
||
expandBtn.addEventListener('click', function () {
|
||
const isOpen = !fullDiv.hidden;
|
||
fullDiv.hidden = isOpen;
|
||
this.textContent = isOpen ? 'Show full passage' : 'Hide passage';
|
||
this.setAttribute('aria-expanded', String(!isOpen));
|
||
});
|
||
}
|
||
searchResults.appendChild(card);
|
||
});
|
||
})
|
||
.catch(() => {
|
||
searchBtn.disabled = false;
|
||
searchResults.innerHTML = `<p class="search-error">Network error.</p>`;
|
||
});
|
||
}
|
||
|
||
searchBtn.addEventListener('click', runSearch);
|
||
searchInput.addEventListener('keydown', e => { if (e.key === 'Enter') runSearch(); });
|
||
|
||
// ── URL hash state ────────────────────────────────────────────────────────
|
||
function pushHash() {
|
||
const p = new URLSearchParams();
|
||
const q = searchInput ? searchInput.value.trim() : '';
|
||
if (q) p.set('q', q);
|
||
if (searchMode !== 'hybrid') p.set('mode', searchMode);
|
||
if (searchLang !== 'en') p.set('lang', searchLang);
|
||
if (searchCat) p.set('cat', searchCat);
|
||
if (drillPanel && !drillPanel.hidden) {
|
||
if (drillState.category) p.set('drill', drillState.category);
|
||
if (drillState.sourceName) p.set('drillsrc', drillState.sourceName);
|
||
}
|
||
const hash = p.toString();
|
||
history.replaceState(null, '', hash ? '#' + hash : location.pathname + location.search);
|
||
}
|
||
|
||
function activatePill(group, attr, value) {
|
||
document.querySelectorAll(group).forEach(b => {
|
||
b.classList.toggle('is-active', b.dataset[attr] === value);
|
||
});
|
||
}
|
||
|
||
function restoreHash() {
|
||
if (!location.hash) return;
|
||
const p = new URLSearchParams(location.hash.slice(1));
|
||
if (p.has('mode')) {
|
||
searchMode = p.get('mode');
|
||
activatePill('.search-modes .mode-pill', 'mode', searchMode);
|
||
}
|
||
if (p.has('lang')) {
|
||
searchLang = p.get('lang');
|
||
activatePill('.lang-pills .mode-pill', 'lang', searchLang);
|
||
}
|
||
if (p.has('cat')) {
|
||
searchCat = p.get('cat');
|
||
activatePill('#searchCatPills .mode-pill', 'cat', searchCat);
|
||
}
|
||
if (p.has('drill')) openDrillByCategory(p.get('drill'));
|
||
if (p.has('drillsrc')) openDrillBySource(p.get('drillsrc'));
|
||
if (p.has('q') && searchInput) {
|
||
searchInput.value = p.get('q');
|
||
runSearch();
|
||
}
|
||
}
|
||
|
||
restoreHash();
|
||
})();
|
||
</script>
|
||
|
||
<?php require_once __DIR__ . '/includes/layout_footer.php'; ?>
|