Files
dobetternorge-tools/corpus.php
T
daveadmin d156f8cf6b feat(tools): persona selector across standalone tools + dashboard chat
Wire the legal-domain persona picker into corpus, deep-research, korrespond and
the dashboard chat. Each endpoint reads the chosen profile, resolves its packages
against client 57, and scopes retrieval via package_ids (falling back to family
when omitted). New dashboard tenants now subscribe to all DBN domain packages so
persona switching survives the subscription intersection.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-01 23:03:31 +02:00

961 lines
47 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
declare(strict_types=1);
$toolName = 'corpus';
$toolTitle = 'Legal Knowledge Base';
$toolKind = 'Corpus Intelligence';
$toolBadge = '~220 K passages';
ob_start();
?>
<div class="reasoning-head">
<p class="eyebrow">Corpus health</p>
<h2 id="reasoningTitle">Vector index</h2>
</div>
<dl class="corpus-health-dl">
<dt>Collection</dt>
<dd><code>bnl_chunks</code></dd>
<dt>Dimensions</dt>
<dd>768 (nomic-embed-text)</dd>
<dt>Similarity</dt>
<dd>Cosine</dd>
<dt>RAG strategy</dt>
<dd>Hybrid vector + keyword<br>Reciprocal rank fusion</dd>
<dt>Private boost</dt>
<dd>1.5×</dd>
<dt>Temporal mode</dt>
<dd>legal_conservative</dd>
<dt>Chunk target</dt>
<dd>600 words · 75 overlap</dd>
<dt>Vector DB</dt>
<dd>Qdrant on Colin Docker<br><code>10.0.2.10:6333</code></dd>
<dt>Hybrid search</dt>
<dd>Azure AI Search<br><code>bnl-legal-search</code><br>West Europe · Basic SKU</dd>
</dl>
<?php
$reasoningPanelOverride = ob_get_clean();
require_once __DIR__ . '/includes/layout.php';
?>
<!-- STATS BAR -->
<div class="corpus-stats-bar" id="corpusStatsBar">
<div class="corpus-stat" id="statChunks">
<span class="corpus-stat__value is-loading">—</span>
<span class="corpus-stat__label">Indexed passages</span>
</div>
<div class="corpus-stat" id="statDocs">
<span class="corpus-stat__value is-loading">—</span>
<span class="corpus-stat__label">Source documents</span>
</div>
<div class="corpus-stat" id="statSources">
<span class="corpus-stat__value is-loading">—</span>
<span class="corpus-stat__label">Active scrapers</span>
</div>
<div class="corpus-stat" id="statUpdated">
<span class="corpus-stat__value is-loading">—</span>
<span class="corpus-stat__label">Last ingested</span>
</div>
</div>
<!-- CORPUS SEARCH -->
<div class="corpus-search-box">
<div class="corpus-search-row">
<input type="search" id="corpusSearchInput" class="corpus-search-input"
placeholder="Search 220 K passages — try «samvær», «arbeidsgiver», «barnevernloven»…"
autocomplete="off" spellcheck="false">
<button id="corpusSearchBtn" class="primary-button" type="button">Search</button>
</div>
<div class="corpus-search-controls">
<div class="search-modes" role="group" aria-label="Search mode">
<button class="mode-pill is-active" data-mode="hybrid" type="button">Hybrid</button>
<button class="mode-pill" data-mode="bm25" type="button">BM25</button>
<button class="mode-pill" data-mode="vector" type="button">Vector</button>
</div>
<div class="lang-pills" role="group" aria-label="Language">
<button class="mode-pill is-active" data-lang="en" type="button">EN</button>
<button class="mode-pill" data-lang="no" type="button">NO</button>
<button class="mode-pill" data-lang="uk" type="button">UK</button>
<button class="mode-pill" data-lang="pl" type="button">PL</button>
</div>
<label class="corpus-persona is-hidden" id="corpusPersonaControl" for="corpusPersonaSelect" title="Legal domain — scopes Hybrid search">
<span class="corpus-persona__label">Domain</span>
<select id="corpusPersonaSelect" class="drill-sort-select" aria-label="Legal domain persona"></select>
</label>
</div>
<div class="search-cats" role="group" aria-label="Category filter" id="searchCatPills">
<button class="mode-pill is-active" data-cat="" type="button">All</button>
<button class="mode-pill" data-cat="family-law" type="button">Family Law</button>
<button class="mode-pill" data-cat="child-welfare" type="button">Child Welfare</button>
<button class="mode-pill" data-cat="labour-law" type="button">Labour Law</button>
<button class="mode-pill" data-cat="social-welfare" type="button">Social Welfare</button>
<button class="mode-pill" data-cat="tax-law" type="button">Tax Law</button>
<button class="mode-pill" data-cat="administrative-law" type="button">Administrative</button>
<button class="mode-pill" data-cat="consumer-law" type="button">Consumer</button>
<button class="mode-pill" data-cat="immigration-law" type="button">Immigration</button>
<button class="mode-pill" data-cat="government-documents" type="button">Gov Docs</button>
</div>
</div>
<div id="corpusSearchResults" class="corpus-search-results" hidden></div>
<!-- COVERAGE -->
<div class="corpus-section">
<p class="eyebrow">Coverage</p>
<h3 class="corpus-section__title">Legal categories</h3>
<div class="corpus-categories" id="corpusCategories">
<div class="category-card" data-category="family-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">⚖</span>
<span class="category-card__count is-loading" id="cat-family-law">—</span>
</div>
<h4>Family Law</h4>
<p>Barneloven, child custody (foreldreansvar), samvær, mediation (mekling), separation and divorce proceedings.</p>
<button class="cat-browse-btn" data-cat="family-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="child-welfare">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🧒</span>
<span class="category-card__count is-loading" id="cat-child-welfare">—</span>
</div>
<h4>Child Welfare</h4>
<p>Barnevernloven, omsorgsovertakelse, emergency care orders, foster placement, CPS (barnevernet) case law.</p>
<button class="cat-browse-btn" data-cat="child-welfare" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="labour-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🏭</span>
<span class="category-card__count is-loading" id="cat-labour-law">—</span>
</div>
<h4>Labour Law</h4>
<p>Arbeidsmiljøloven, collective agreements (tariffavtaler), Arbeidsretten rulings, dismissal, sick leave obligations.</p>
<button class="cat-browse-btn" data-cat="labour-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="social-welfare">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🛡</span>
<span class="category-card__count is-loading" id="cat-social-welfare">—</span>
</div>
<h4>Social Welfare</h4>
<p>NAV guidance on sykepenger, dagpenger, AAP, uføretrygd, alderspensjon, yrkesskade and social assistance.</p>
<button class="cat-browse-btn" data-cat="social-welfare" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="tax-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">📊</span>
<span class="category-card__count is-loading" id="cat-tax-law">—</span>
</div>
<h4>Tax Law</h4>
<p>Skatteetaten's Skatte-ABC, binding advance rulings (BFU), Skatteklagenemnda decisions, income and capital tax.</p>
<button class="cat-browse-btn" data-cat="tax-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="administrative-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🏛</span>
<span class="category-card__count is-loading" id="cat-administrative-law">—</span>
</div>
<h4>Administrative Law</h4>
<p>Sivilombudet reports, Forvaltningsloven, procedural rights, official complaints, Stortinget oversight.</p>
<button class="cat-browse-btn" data-cat="administrative-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="consumer-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🏠</span>
<span class="category-card__count is-loading" id="cat-consumer-law">—</span>
</div>
<h4>Consumer &amp; Housing</h4>
<p>HTU (rental disputes), Finansklagenemnda, Forbrukertilsynet, Forbrukerrådet, Pakkereisenemnda decisions.</p>
<button class="cat-browse-btn" data-cat="consumer-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="immigration-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🌐</span>
<span class="category-card__count is-loading" id="cat-immigration-law">—</span>
</div>
<h4>Immigration &amp; International</h4>
<p>UNE (Utlendingsnemnda) decisions, ECHR Art. 8 family rights, EMD case law, Hague Convention (cross-border child abduction).</p>
<button class="cat-browse-btn" data-cat="immigration-law" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="government-documents">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">📄</span>
<span class="category-card__count is-loading" id="cat-government-documents">—</span>
</div>
<h4>Government Documents</h4>
<p>NOUer, Stortingsmeldinger, government white papers and regulatory guidance from Regjeringen.no.</p>
<button class="cat-browse-btn" data-cat="government-documents" type="button">Browse docs →</button>
</div>
<div class="category-card" data-category="health-law">
<div class="category-card__top">
<span class="category-card__icon" aria-hidden="true">🏥</span>
<span class="category-card__count is-loading" id="cat-health-law">—</span>
</div>
<h4>Health Law</h4>
<p>Helsetilsynet supervision reports, patient rights (pasientrettigheter), NPE compensation claims, mental health law and barnevern health oversight.</p>
<button class="cat-browse-btn" data-cat="health-law" type="button">Browse docs →</button>
</div>
</div>
<!-- DRILL-DOWN PANEL -->
<div id="corpusDrillPanel" class="corpus-drill-panel" hidden>
<div class="drill-header">
<div>
<p class="eyebrow" id="drillEyebrow">Category</p>
<h3 id="drillTitle">Documents</h3>
</div>
<button class="drill-close-btn" id="drillCloseBtn" type="button" aria-label="Close">✕</button>
</div>
<div class="drill-controls" id="drillControls">
<span class="drill-count" id="drillCount"></span>
<div class="drill-controls-right">
<input type="search" id="drillSearchInput" class="drill-search-input"
placeholder="Filter by title…" autocomplete="off">
<select id="drillSortSelect" class="drill-sort-select">
<option value="newest">Newest first</option>
<option value="oldest">Oldest first</option>
<option value="alpha">AZ</option>
<option value="chunks">Most passages</option>
</select>
</div>
</div>
<div id="drillDocList" class="doc-list"></div>
<div class="doc-list__more-wrap" id="drillMoreWrap" hidden>
<button class="doc-list__more" id="drillMoreBtn" type="button">Load more</button>
</div>
</div>
</div>
<!-- SOURCES TABLE -->
<div class="corpus-section">
<p class="eyebrow">Data sources</p>
<h3 class="corpus-section__title">Active scrapers</h3>
<div class="corpus-table-wrap">
<table class="sources-table" id="sourcesTable">
<thead>
<tr>
<th></th>
<th>Source</th>
<th>Type</th>
<th>Category</th>
<th>Lang</th>
<th>Schedule</th>
<th>Status</th>
</tr>
</thead>
<tbody id="sourcesTableBody">
<tr class="sources-skeleton"><td colspan="7">Loading sources…</td></tr>
</tbody>
</table>
</div>
</div>
<!-- AI STACK -->
<div class="corpus-section">
<p class="eyebrow">Software</p>
<h3 class="corpus-section__title">AI stack</h3>
<div class="stack-grid">
<div class="stack-card">
<h3>Reasoning LLMs</h3>
<ul class="stack-list">
<li><span class="stack-badge stack-badge--azure">Azure</span> <strong>gpt-4o-mini</strong> <span class="stack-star">★ default</span> — fast, cost-efficient</li>
<li><span class="stack-badge stack-badge--azure">Azure</span> <strong>gpt-4o</strong> — highest quality</li>
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>qwen2.5:14b</strong> — local, private</li>
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>qwen3:14b</strong> — reasoning mode</li>
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>dbn-legal-agent</strong> — Norwegian law fine-tune (QLoRA on qwen2.5:7b, NorwAI-24B distillation)</li>
</ul>
<p class="stack-note">All routed via LiteLLM on Colin · <code>10.0.1.10:4000</code></p>
</div>
<div class="stack-card">
<h3>Transcription</h3>
<ul class="stack-list">
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>Whisper large-v3</strong> <span class="stack-star">★ primary</span><br>Cuttlefish · RTX 3060 12 GB VRAM</li>
<li><span class="stack-badge stack-badge--api">API</span> OpenAI Whisper API</li>
<li><span class="stack-badge stack-badge--azure">Azure</span> AI Speech <code>nb-NO</code> (Norway East)</li>
</ul>
<p class="stack-note">Speaker diarization · VAD silence filter · beam size 5 · vocabulary presets (barnerett, mediation)</p>
</div>
<div class="stack-card">
<h3>Embeddings</h3>
<ul class="stack-list">
<li><strong>nomic-embed-text</strong> — 768-dim dense vectors</li>
<li>Ollama on Chloe <code>10.0.1.11:11434</code></li>
<li>Cosine similarity in Qdrant</li>
</ul>
<p class="stack-note">All documents chunked and embedded before indexing; chunks stored in both Qdrant (vector) and MariaDB (keyword fallback)</p>
</div>
<div class="stack-card">
<h3>Vector &amp; Hybrid Search</h3>
<ul class="stack-list">
<li><strong>Qdrant</strong> <code>bnl_chunks</code> · ~220 K vectors<br>Colin Docker · <code>10.0.2.10:6333</code></li>
<li><strong>Azure AI Search</strong> <code>bnl-legal-search</code><br>Basic SKU · West Europe · hybrid keyword + semantic</li>
<li>Reciprocal rank fusion (vector + keyword)</li>
<li>Private corpus boosted 1.5×</li>
</ul>
</div>
<div class="stack-card">
<h3>Chunking pipeline</h3>
<ul class="stack-list">
<li>Heading-aware semantic splitting</li>
<li>600-word target · 75-word overlap</li>
<li>50-word minimum chunk</li>
<li>SHA-256 deduplication</li>
<li>PDF, DOCX, HTML text extraction</li>
<li>Temporal metadata (valid_from / valid_until)</li>
</ul>
<p class="stack-note">Legal temporal reranking: <code>legal_conservative</code> — surfaces current versions first</p>
</div>
</div>
</div>
<!-- DATA PIPELINE -->
<div class="corpus-section">
<p class="eyebrow">How it works</p>
<h3 class="corpus-section__title">Ingestion pipeline</h3>
<div class="pipeline-flow" role="list" aria-label="Data pipeline steps">
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">🌐</span>
<span>Source</span>
<small>gov websites, APIs, PDFs</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">🕷</span>
<span>Scraper</span>
<small>HTTP / API / PDF</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">📝</span>
<span>Text extract</span>
<small>PDF, DOCX, HTML</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">✂</span>
<span>TextChunker</span>
<small>600w · 75w overlap</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">🔢</span>
<span>Embed</span>
<small>nomic · 768-dim</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">⚡</span>
<span>Qdrant</span>
<small>cosine upsert</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step" role="listitem">
<span class="pipeline-step__icon">🤖</span>
<span>LiteLLM</span>
<small>RAG + LLM</small>
</div>
<div class="pipeline-arrow" aria-hidden="true"></div>
<div class="pipeline-step pipeline-step--end" role="listitem">
<span class="pipeline-step__icon">🔍</span>
<span>Your tool</span>
<small>Ask, Search, Research…</small>
</div>
</div>
</div>
<script>
(function () {
'use strict';
// ── Utilities ────────────────────────────────────────────────────────────
function esc(s) {
return String(s ?? '').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
}
function fmt(n) {
if (n === null || n === undefined) return '—';
return Number(n).toLocaleString('en');
}
function fmtDate(s) {
if (!s) return '—';
try {
const d = new Date(s);
return d.toLocaleDateString('en-GB', { day: 'numeric', month: 'short', year: 'numeric' });
} catch (e) { return s; }
}
function highlight(text, query) {
if (!query) return esc(text);
const safe = esc(text);
const safeQ = query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return safe.replace(new RegExp(safeQ.replace(/&amp;/g,'&').replace(/&lt;/g,'<'), 'gi'),
m => '<mark>' + esc(m) + '</mark>');
}
function setLoaded(el) { el.classList.remove('is-loading'); }
// ── Authority / schedule label maps ─────────────────────────────────────
const authorityLabels = {
case_law: { label: 'Case law', cls: 'badge--teal' },
guidance: { label: 'Guidance', cls: 'badge--amber' },
report: { label: 'Report', cls: 'badge--muted' },
ombudsman: { label: 'Ombudsman', cls: 'badge--muted' },
tribunal: { label: 'Tribunal', cls: 'badge--coral' },
regulatory: { label: 'Regulatory', cls: 'badge--coral' },
law: { label: 'Statute', cls: 'badge--teal' },
treaty: { label: 'Treaty', cls: 'badge--muted' },
};
const scheduleLabels = {
daily: 'Daily', weekly: 'Weekly', monthly: 'Monthly', manual: 'Manual',
};
const catIds = {
'family-law': 'cat-family-law',
'family_law': 'cat-family-law',
'child-welfare': 'cat-child-welfare',
'child_welfare': 'cat-child-welfare',
'children-rights': 'cat-child-welfare',
'labour-law': 'cat-labour-law',
'labour_law': 'cat-labour-law',
'social-welfare': 'cat-social-welfare',
'social_welfare': 'cat-social-welfare',
'social-services': 'cat-social-welfare',
'tax-law': 'cat-tax-law',
'tax_law': 'cat-tax-law',
'administrative-law': 'cat-administrative-law',
'administrative_law': 'cat-administrative-law',
'anti-discrimination': 'cat-administrative-law',
'legal': 'cat-administrative-law',
'civil-litigation': 'cat-administrative-law',
'ombudsman': 'cat-administrative-law',
'bankruptcy': 'cat-administrative-law',
'consumer-law': 'cat-consumer-law',
'consumer_law': 'cat-consumer-law',
'tenancy-law': 'cat-consumer-law',
'financial-law': 'cat-consumer-law',
'immigration-law': 'cat-immigration-law',
'immigration_law': 'cat-immigration-law',
'immigration': 'cat-immigration-law',
'echr-case-law': 'cat-immigration-law',
'child-abduction': 'cat-immigration-law',
'government-documents':'cat-government-documents',
'government_documents':'cat-government-documents',
'legislation': 'cat-government-documents',
'parliamentary': 'cat-government-documents',
'government-policy': 'cat-government-documents',
'policy-reports': 'cat-government-documents',
'case-law': 'cat-administrative-law',
'victim-compensation': 'cat-administrative-law',
'procurement-law': 'cat-administrative-law',
'health-law': 'cat-health-law',
'patient-rights': 'cat-health-law',
};
const catLabels = {
'family-law': 'Family Law',
'child-welfare': 'Child Welfare',
'labour-law': 'Labour Law',
'social-welfare': 'Social Welfare',
'tax-law': 'Tax Law',
'administrative-law': 'Administrative Law',
'consumer-law': 'Consumer & Housing',
'immigration-law': 'Immigration & International',
'government-documents': 'Government Documents',
'health-law': 'Health Law',
};
// ── STATS + SOURCES table load ───────────────────────────────────────────
let cachedSources = [];
fetch('/api/corpus-stats.php', { credentials: 'same-origin' })
.then(r => r.json())
.then(data => {
if (!data.ok) return;
const s = data.stats;
const elChunks = document.querySelector('#statChunks .corpus-stat__value');
const elDocs = document.querySelector('#statDocs .corpus-stat__value');
const elSrc = document.querySelector('#statSources .corpus-stat__value');
const elUpd = document.querySelector('#statUpdated .corpus-stat__value');
if (elChunks) { elChunks.textContent = fmt(s.total_chunks); setLoaded(elChunks); }
if (elDocs) { elDocs.textContent = fmt(s.total_docs); setLoaded(elDocs); }
if (elSrc) { elSrc.textContent = fmt(s.active_sources); setLoaded(elSrc); }
if (elUpd) { elUpd.textContent = fmtDate(s.last_updated); setLoaded(elUpd); }
(s.by_category || []).forEach(row => {
const elId = catIds[row.category];
if (!elId) return;
const el = document.getElementById(elId);
if (!el) return;
const cur = parseInt(el.textContent, 10) || 0;
el.textContent = fmt(cur + parseInt(row.doc_count, 10));
setLoaded(el);
});
document.querySelectorAll('.category-card__count.is-loading').forEach(el => {
el.textContent = '0'; setLoaded(el);
});
// Sources table
cachedSources = data.sources || [];
renderSourcesTable(cachedSources);
})
.catch(() => {
document.querySelectorAll('.corpus-stat__value').forEach(el => {
el.textContent = '—'; el.classList.remove('is-loading');
});
});
// ── Sources table rendering ───────────────────────────────────────────────
function renderSourcesTable(sources) {
const tbody = document.getElementById('sourcesTableBody');
if (!tbody) return;
tbody.innerHTML = '';
sources.forEach((src, idx) => {
const auth = authorityLabels[src.authority_type] || { label: src.authority_type || '—', cls: 'badge--muted' };
const sched = scheduleLabels[src.schedule] || (src.schedule || 'Manual');
const langFlag = src.language === 'no' ? '🇳🇴' : src.language === 'en' ? '🇬🇧' : (src.language || '—');
const statusHtml = src.is_active
? '<span class="status-active">● Active</span>'
: '<span class="status-inactive">○ Inactive</span>';
const nameHtml = src.url
? `<a href="${esc(src.url)}" target="_blank" rel="noopener">${esc(src.name)}</a>`
: esc(src.name);
const tr = document.createElement('tr');
tr.dataset.idx = idx;
tr.innerHTML = `
<td class="source-expand-cell">
<button class="source-expand-btn" type="button" aria-expanded="false" aria-label="Expand ${esc(src.name)}">▶</button>
</td>
<td class="source-name">${nameHtml}</td>
<td><span class="source-badge ${esc(auth.cls)}">${esc(auth.label)}</span></td>
<td><span class="source-cat">${esc(src.category || '—')}</span></td>
<td>${langFlag}</td>
<td>${esc(sched)}</td>
<td>${statusHtml}</td>`;
tbody.appendChild(tr);
// Expand row (hidden)
const expandTr = document.createElement('tr');
expandTr.className = 'source-expand-row';
expandTr.hidden = true;
expandTr.dataset.name = src.name;
expandTr.innerHTML = `<td colspan="7"><div class="source-expand-inner" id="source-expand-${idx}">
<div class="source-expand-loading">Loading…</div></div></td>`;
tbody.appendChild(expandTr);
// Toggle handler
tr.querySelector('.source-expand-btn').addEventListener('click', function () {
const isOpen = expandTr.hidden === false;
if (isOpen) {
expandTr.hidden = true;
this.textContent = '▶';
this.setAttribute('aria-expanded', 'false');
} else {
expandTr.hidden = false;
this.textContent = '▼';
this.setAttribute('aria-expanded', 'true');
loadSourceExpand(idx, src, `source-expand-${idx}`);
}
});
});
}
function loadSourceExpand(idx, src, containerId) {
const container = document.getElementById(containerId);
if (!container || container.dataset.loaded) return;
container.dataset.loaded = '1';
// Fetch doc count for this source
const qs = new URLSearchParams({ source_name: src.name, limit: 1 });
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
.then(r => r.json())
.then(data => {
const total = data.ok ? data.total : '?';
container.innerHTML = `
<div class="source-expand-grid">
<div>
<dl class="source-expand-dl">
<dt>Scraper class</dt>
<dd><code>${esc(src.scraper_class || '—')}</code></dd>
<dt>Category</dt>
<dd>${esc(src.category || '—')}</dd>
<dt>Authority type</dt>
<dd>${esc(src.authority_type || '—')}</dd>
<dt>Language</dt>
<dd>${src.language === 'no' ? '🇳🇴 Norwegian' : src.language === 'en' ? '🇬🇧 English' : esc(src.language || '—')}</dd>
<dt>Update schedule</dt>
<dd>${esc(scheduleLabels[src.schedule] || src.schedule || '—')}</dd>
<dt>Documents indexed</dt>
<dd><strong>${fmt(total)}</strong></dd>
</dl>
</div>
<div>
${src.url ? `<p class="source-expand-url"><a href="${esc(src.url)}" target="_blank" rel="noopener">${esc(src.url)}</a></p>` : ''}
${total > 0 ? `<button class="doc-list__more source-browse-btn" data-source="${esc(src.name)}" type="button">Browse ${fmt(total)} documents →</button>` : ''}
</div>
</div>`;
container.querySelectorAll('.source-browse-btn').forEach(btn => {
btn.addEventListener('click', () => openDrillBySource(src.name));
});
})
.catch(() => {
container.innerHTML = `<p class="source-expand-error">Could not load source details.</p>`;
});
}
// ── Category drill-down ───────────────────────────────────────────────────
let drillState = { category: null, sourceName: null, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
const drillPanel = document.getElementById('corpusDrillPanel');
const drillDocList = document.getElementById('drillDocList');
const drillTitle = document.getElementById('drillTitle');
const drillEyebrow = document.getElementById('drillEyebrow');
const drillMoreWrap = document.getElementById('drillMoreWrap');
const drillMoreBtn = document.getElementById('drillMoreBtn');
const drillCloseBtn = document.getElementById('drillCloseBtn');
const drillCount = document.getElementById('drillCount');
const drillSortSelect = document.getElementById('drillSortSelect');
const drillSearchInput = document.getElementById('drillSearchInput');
document.querySelectorAll('.cat-browse-btn').forEach(btn => {
btn.addEventListener('click', () => openDrillByCategory(btn.dataset.cat));
});
function openDrillByCategory(cat) {
drillState = { category: cat, sourceName: null, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
drillEyebrow.textContent = 'Category';
drillTitle.textContent = catLabels[cat] || cat;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillCount.textContent = '';
if (drillSortSelect) drillSortSelect.value = 'newest';
if (drillSearchInput) drillSearchInput.value = '';
drillPanel.hidden = false;
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
pushHash();
fetchDrillPage(false);
}
function openDrillBySource(sourceName) {
drillState = { category: null, sourceName: sourceName, offset: 0, total: 0, limit: 20, sort: 'newest', titleFilter: '' };
drillEyebrow.textContent = 'Source';
drillTitle.textContent = sourceName;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillMoreWrap.hidden = true;
drillCount.textContent = '';
if (drillSortSelect) drillSortSelect.value = 'newest';
if (drillSearchInput) drillSearchInput.value = '';
drillPanel.hidden = false;
drillPanel.scrollIntoView({ behavior: 'smooth', block: 'start' });
pushHash();
fetchDrillPage(false);
}
function fetchDrillPage(append) {
const qs = new URLSearchParams({ offset: drillState.offset, limit: drillState.limit });
if (drillState.category) qs.set('category', drillState.category);
if (drillState.sourceName) qs.set('source_name', drillState.sourceName);
if (drillState.sort) qs.set('sort', drillState.sort);
if (drillState.titleFilter) qs.set('title', drillState.titleFilter);
fetch('/api/corpus-documents.php?' + qs, { credentials: 'same-origin' })
.then(r => r.json())
.then(data => {
if (!data.ok) {
if (!append) drillDocList.innerHTML = '<p class="drill-error">Could not load documents.</p>';
return;
}
drillState.total = data.total;
const docs = data.documents || [];
if (!append) drillDocList.innerHTML = '';
if (docs.length === 0 && !append) {
drillDocList.innerHTML = '<p class="drill-empty">No documents found in this category.</p>';
drillMoreWrap.hidden = true;
return;
}
docs.forEach(doc => {
const item = document.createElement('div');
item.className = 'doc-list__item';
const titleHtml = doc.source_url
? `<a href="${esc(doc.source_url)}" target="_blank" rel="noopener" class="doc-list__title">${esc(doc.title || '(Untitled)')}</a>`
: `<span class="doc-list__title">${esc(doc.title || '(Untitled)')}</span>`;
const langFlag = doc.language === 'no' ? '🇳🇴' : doc.language === 'en' ? '🇬🇧' : '';
item.innerHTML = `
<div class="doc-list__info">
${titleHtml}
<div class="doc-list__meta">
<span class="source-cat">${esc(doc.category || '—')}</span>
${langFlag ? `<span>${langFlag}</span>` : ''}
<span class="doc-list__date">${fmtDate(doc.updated_at)}</span>
</div>
</div>
<span class="doc-list__chunks">${fmt(doc.chunk_count)} passages</span>`;
drillDocList.appendChild(item);
});
const loaded = drillState.offset + docs.length;
drillMoreWrap.hidden = loaded >= drillState.total;
drillState.offset = loaded;
if (drillCount) {
drillCount.textContent = drillState.total > 0
? 'Showing ' + fmt(loaded) + ' of ' + fmt(drillState.total) + ' documents'
: '';
}
})
.catch(() => {
if (!append) drillDocList.innerHTML = '<p class="drill-error">Network error.</p>';
});
}
drillMoreBtn.addEventListener('click', () => fetchDrillPage(true));
drillCloseBtn.addEventListener('click', () => { drillPanel.hidden = true; pushHash(); });
if (drillSortSelect) {
drillSortSelect.addEventListener('change', () => {
drillState.sort = drillSortSelect.value;
drillState.offset = 0;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillCount.textContent = '';
fetchDrillPage(false);
});
}
let drillFilterTimer = null;
if (drillSearchInput) {
drillSearchInput.addEventListener('input', () => {
clearTimeout(drillFilterTimer);
drillFilterTimer = setTimeout(() => {
drillState.titleFilter = drillSearchInput.value.trim();
drillState.offset = 0;
drillDocList.innerHTML = '<p class="drill-loading">Loading documents…</p>';
drillCount.textContent = '';
fetchDrillPage(false);
}, 300);
});
}
// ── Search bar ────────────────────────────────────────────────────────────
let searchMode = 'hybrid';
let searchLang = 'en';
let searchCat = '';
let searchPersona = '';
document.querySelectorAll('.search-modes .mode-pill').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.search-modes .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchMode = btn.dataset.mode;
pushHash();
});
});
document.querySelectorAll('.lang-pills .mode-pill').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.lang-pills .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchLang = btn.dataset.lang;
pushHash();
});
});
document.querySelectorAll('#searchCatPills .mode-pill').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('#searchCatPills .mode-pill').forEach(b => b.classList.remove('is-active'));
btn.classList.add('is-active');
searchCat = btn.dataset.cat;
pushHash();
});
});
const searchInput = document.getElementById('corpusSearchInput');
const searchBtn = document.getElementById('corpusSearchBtn');
const searchResults = document.getElementById('corpusSearchResults');
// ── Persona (legal domain) selector — scopes Hybrid search ──────────────────
const personaControl = document.getElementById('corpusPersonaControl');
const personaSelect = document.getElementById('corpusPersonaSelect');
async function loadPersonas() {
if (!personaSelect) return;
try {
const r = await fetch('/api/personas.php', { credentials: 'same-origin', headers: { Accept: 'application/json' } });
const data = await r.json().catch(() => ({}));
if (!r.ok || data.ok !== true || !Array.isArray(data.personas) || !data.personas.length) return;
const fallback = data.default_persona || 'family';
personaSelect.innerHTML = '';
for (const p of data.personas) {
const opt = document.createElement('option');
opt.value = p.slug;
opt.textContent = p.name || p.slug;
personaSelect.appendChild(opt);
}
const hashPersona = new URLSearchParams(location.hash.slice(1)).get('persona');
const saved = hashPersona || sessionStorage.getItem('dbnPersona');
const initial = (saved && data.personas.some(p => p.slug === saved)) ? saved
: (data.personas.some(p => p.slug === fallback) ? fallback : data.personas[0].slug);
searchPersona = initial;
personaSelect.value = initial;
personaSelect.addEventListener('change', () => {
searchPersona = personaSelect.value;
sessionStorage.setItem('dbnPersona', searchPersona);
pushHash();
});
personaControl?.classList.remove('is-hidden');
} catch (_) { /* personas are optional UI sugar; ignore failures */ }
}
loadPersonas();
function runSearch() {
const q = searchInput.value.trim();
if (q.length < 3) {
searchResults.innerHTML = '<p class="search-hint">Enter at least 3 characters.</p>';
searchResults.hidden = false;
return;
}
pushHash();
searchResults.hidden = false;
searchResults.innerHTML = `<p class="search-loading">Searching in <strong>${esc(searchMode)}</strong> mode…</p>`;
searchBtn.disabled = true;
fetch('/api/corpus-search.php', {
method: 'POST',
credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: q, mode: searchMode, language: searchLang, limit: 8, category: searchCat || null, profile: searchPersona || null }),
})
.then(r => r.json())
.then(data => {
searchBtn.disabled = false;
if (!data.ok) {
searchResults.innerHTML = `<p class="search-error">Search error: ${esc(data.error?.message || 'Unknown error')}</p>`;
return;
}
const hits = data.hits || [];
if (hits.length === 0) {
searchResults.innerHTML = `<p class="search-empty">No results for <strong>${esc(q)}</strong> in ${esc(data.mode)} mode.</p>`;
return;
}
const modeLabel = { hybrid: 'Hybrid RAG', bm25: 'BM25 keyword', vector: 'Vector semantic' }[data.mode] || data.mode;
const header = document.createElement('div');
header.className = 'search-results-header';
header.innerHTML = `<span class="eyebrow">${esc(modeLabel)}</span><span class="search-results-count">${hits.length} passage${hits.length !== 1 ? 's' : ''}</span>`;
searchResults.innerHTML = '';
searchResults.appendChild(header);
hits.forEach(hit => {
const score = hit.score != null ? `<span class="passage-score">${Math.round(hit.score * 100)}%</span>` : '';
const catAuth = authorityLabels[hit.category] || { label: hit.category || '—', cls: 'badge--muted' };
const titleHtml = hit.source_url
? `<a href="${esc(hit.source_url)}" target="_blank" rel="noopener" class="passage-card__title">${esc(hit.title || '(Untitled)')}</a>`
: `<span class="passage-card__title">${esc(hit.title || '(Untitled)')}</span>`;
const section = hit.section ? `<span class="passage-section">§ ${esc(hit.section)}</span>` : '';
const excerpt = highlight(hit.excerpt || '', q);
const fullText = (hit.full_text || '').trim();
const hasMore = fullText.length > (hit.excerpt || '').length;
const sourceLink = hit.source_url
? `<a href="${esc(hit.source_url)}" target="_blank" rel="noopener" class="passage-source-link">View source ↗</a>`
: '';
const card = document.createElement('div');
card.className = 'passage-card';
card.innerHTML = `
<div class="passage-card__meta">
<span class="source-badge ${esc(catAuth.cls)}">${esc(catAuth.label)}</span>
${section}
${score}
</div>
${titleHtml}
<p class="passage-card__excerpt">${excerpt}</p>
${sourceLink}
${hasMore ? '<button class="passage-expand-btn" type="button" aria-expanded="false">Show full passage</button><div class="passage-full-text" hidden></div>' : ''}`;
if (hasMore) {
const expandBtn = card.querySelector('.passage-expand-btn');
const fullDiv = card.querySelector('.passage-full-text');
fullDiv.innerHTML = esc(fullText).replace(/\n/g, '<br>');
expandBtn.addEventListener('click', function () {
const isOpen = !fullDiv.hidden;
fullDiv.hidden = isOpen;
this.textContent = isOpen ? 'Show full passage' : 'Hide passage';
this.setAttribute('aria-expanded', String(!isOpen));
});
}
searchResults.appendChild(card);
});
})
.catch(() => {
searchBtn.disabled = false;
searchResults.innerHTML = `<p class="search-error">Network error.</p>`;
});
}
searchBtn.addEventListener('click', runSearch);
searchInput.addEventListener('keydown', e => { if (e.key === 'Enter') runSearch(); });
// ── URL hash state ────────────────────────────────────────────────────────
function pushHash() {
const p = new URLSearchParams();
const q = searchInput ? searchInput.value.trim() : '';
if (q) p.set('q', q);
if (searchMode !== 'hybrid') p.set('mode', searchMode);
if (searchLang !== 'en') p.set('lang', searchLang);
if (searchCat) p.set('cat', searchCat);
if (searchPersona && searchPersona !== 'family') p.set('persona', searchPersona);
if (drillPanel && !drillPanel.hidden) {
if (drillState.category) p.set('drill', drillState.category);
if (drillState.sourceName) p.set('drillsrc', drillState.sourceName);
}
const hash = p.toString();
history.replaceState(null, '', hash ? '#' + hash : location.pathname + location.search);
}
function activatePill(group, attr, value) {
document.querySelectorAll(group).forEach(b => {
b.classList.toggle('is-active', b.dataset[attr] === value);
});
}
function restoreHash() {
if (!location.hash) return;
const p = new URLSearchParams(location.hash.slice(1));
if (p.has('mode')) {
searchMode = p.get('mode');
activatePill('.search-modes .mode-pill', 'mode', searchMode);
}
if (p.has('lang')) {
searchLang = p.get('lang');
activatePill('.lang-pills .mode-pill', 'lang', searchLang);
}
if (p.has('cat')) {
searchCat = p.get('cat');
activatePill('#searchCatPills .mode-pill', 'cat', searchCat);
}
if (p.has('persona')) {
searchPersona = p.get('persona');
if (personaSelect) personaSelect.value = searchPersona;
}
if (p.has('drill')) openDrillByCategory(p.get('drill'));
if (p.has('drillsrc')) openDrillBySource(p.get('drillsrc'));
if (p.has('q') && searchInput) {
searchInput.value = p.get('q');
runSearch();
}
}
restoreHash();
})();
</script>
<?php require_once __DIR__ . '/includes/layout_footer.php'; ?>