feat: Corpus Intelligence page + timeline background events
Adds /corpus.php — a data transparency page showing what powers the legal tools: 9 coverage categories with live doc counts, a full sources table pulled from the corpus DB, the AI stack (LLMs, Whisper, Qdrant, Azure AI Search, embeddings, chunking), and a pipeline flow diagram. Stats are live via a new /api/corpus-stats.php endpoint (queries dobetter_rag + bnl_admin). The reasoning sidebar is repurposed as a Corpus health panel on this page. Also ships the in-progress timeline background events toggle: API and UI wired together via include_background param. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/../includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('GET');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
try {
|
||||
$bnlDb = dbnToolsDb();
|
||||
$ragDb = dbnToolsRagDb();
|
||||
|
||||
// Total documents in the do-better-legal corpus (corpus_id=1)
|
||||
$stmt = $ragDb->prepare('SELECT COUNT(*) FROM documents WHERE corpus_id = 1');
|
||||
$stmt->execute();
|
||||
$totalDocs = (int)$stmt->fetchColumn();
|
||||
|
||||
// Total chunks for corpus_id=1 documents
|
||||
$stmt = $ragDb->prepare(
|
||||
'SELECT COUNT(*) FROM chunks c
|
||||
JOIN documents d ON c.document_id = d.id
|
||||
WHERE d.corpus_id = 1'
|
||||
);
|
||||
$stmt->execute();
|
||||
$totalChunks = (int)$stmt->fetchColumn();
|
||||
|
||||
// Doc counts by category
|
||||
$stmt = $ragDb->prepare(
|
||||
'SELECT category, COUNT(*) AS doc_count
|
||||
FROM documents
|
||||
WHERE corpus_id = 1 AND category IS NOT NULL AND category != \'\'
|
||||
GROUP BY category
|
||||
ORDER BY doc_count DESC'
|
||||
);
|
||||
$stmt->execute();
|
||||
$byCategory = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
|
||||
// Last updated timestamp
|
||||
$stmt = $ragDb->prepare('SELECT MAX(updated_at) FROM documents WHERE corpus_id = 1');
|
||||
$stmt->execute();
|
||||
$lastUpdated = $stmt->fetchColumn() ?: null;
|
||||
|
||||
// Active sources from bnl_admin
|
||||
$stmt = $bnlDb->prepare(
|
||||
'SELECT name, url, category, authority_type, language, schedule, is_active, scraper_class
|
||||
FROM corpus_sources
|
||||
WHERE corpus_id = 1
|
||||
ORDER BY category, name'
|
||||
);
|
||||
$stmt->execute();
|
||||
$sources = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
|
||||
$activeSources = count(array_filter($sources, fn($s) => !empty($s['is_active'])));
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'stats' => [
|
||||
'total_chunks' => $totalChunks,
|
||||
'total_docs' => $totalDocs,
|
||||
'active_sources' => $activeSources,
|
||||
'last_updated' => $lastUpdated,
|
||||
'by_category' => $byCategory,
|
||||
],
|
||||
'sources' => $sources,
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsError('Could not load corpus statistics: ' . $e->getMessage(), 500, 'corpus_stats_error');
|
||||
}
|
||||
+2
-1
@@ -23,6 +23,7 @@ dbnToolsWithTelemetry('timeline', $language, function () use ($input, $language)
|
||||
? 'high_medium' : 'all';
|
||||
|
||||
$includeRelative = ($input['include_relative'] ?? true) !== false;
|
||||
$includeBackground = ($input['include_background'] ?? true) !== false;
|
||||
|
||||
return (new DbnLegalToolsService())->timeline($text, $language, $engine, $focus, $confidenceFilter, $includeRelative);
|
||||
return (new DbnLegalToolsService())->timeline($text, $language, $engine, $focus, $confidenceFilter, $includeRelative, $includeBackground);
|
||||
});
|
||||
|
||||
@@ -2013,6 +2013,414 @@ p {
|
||||
font-size: 0.92rem;
|
||||
}
|
||||
|
||||
/* ============================================================
|
||||
CORPUS PAGE
|
||||
============================================================ */
|
||||
|
||||
/* Stats bar */
|
||||
.corpus-stats-bar {
|
||||
display: flex;
|
||||
gap: 0;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
margin-bottom: 36px;
|
||||
background: var(--panel);
|
||||
}
|
||||
|
||||
.corpus-stat {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
padding: 20px 16px;
|
||||
border-right: 1px solid var(--line);
|
||||
text-align: center;
|
||||
}
|
||||
.corpus-stat:last-child { border-right: 0; }
|
||||
|
||||
.corpus-stat__value {
|
||||
display: block;
|
||||
font-size: 2rem;
|
||||
font-weight: 800;
|
||||
color: var(--teal);
|
||||
line-height: 1;
|
||||
font-variant-numeric: tabular-nums;
|
||||
letter-spacing: -0.02em;
|
||||
}
|
||||
|
||||
.corpus-stat__value.is-loading {
|
||||
color: var(--line);
|
||||
background: linear-gradient(90deg, var(--line) 25%, #e8ecf2 50%, var(--line) 75%);
|
||||
background-size: 200% 100%;
|
||||
animation: shimmer 1.4s infinite;
|
||||
border-radius: 4px;
|
||||
min-width: 60px;
|
||||
}
|
||||
|
||||
@keyframes shimmer {
|
||||
0% { background-position: 200% 0; }
|
||||
100% { background-position: -200% 0; }
|
||||
}
|
||||
|
||||
.corpus-stat__label {
|
||||
display: block;
|
||||
font-size: 0.72rem;
|
||||
font-weight: 700;
|
||||
text-transform: uppercase;
|
||||
color: var(--muted);
|
||||
margin-top: 6px;
|
||||
letter-spacing: 0.04em;
|
||||
}
|
||||
|
||||
/* Section headings */
|
||||
.corpus-section {
|
||||
margin-bottom: 40px;
|
||||
}
|
||||
|
||||
.corpus-section__title {
|
||||
font-size: 1.05rem;
|
||||
font-weight: 700;
|
||||
color: var(--ink);
|
||||
margin: 4px 0 18px;
|
||||
}
|
||||
|
||||
/* Category cards */
|
||||
.corpus-categories {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(3, 1fr);
|
||||
gap: 14px;
|
||||
}
|
||||
|
||||
.category-card {
|
||||
background: var(--panel);
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
padding: 18px;
|
||||
transition: border-color 0.15s, box-shadow 0.15s;
|
||||
}
|
||||
|
||||
.category-card:hover {
|
||||
border-color: var(--teal);
|
||||
box-shadow: 0 4px 16px rgba(15, 118, 110, 0.1);
|
||||
}
|
||||
|
||||
.category-card__top {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: flex-start;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.category-card__icon {
|
||||
font-size: 1.5rem;
|
||||
line-height: 1;
|
||||
}
|
||||
|
||||
.category-card__count {
|
||||
font-size: 0.72rem;
|
||||
font-weight: 800;
|
||||
background: var(--soft-teal);
|
||||
color: var(--teal-dark);
|
||||
border-radius: 999px;
|
||||
padding: 2px 10px;
|
||||
font-variant-numeric: tabular-nums;
|
||||
min-width: 28px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.category-card__count.is-loading {
|
||||
color: transparent;
|
||||
background: linear-gradient(90deg, var(--line) 25%, #e8ecf2 50%, var(--line) 75%);
|
||||
background-size: 200% 100%;
|
||||
animation: shimmer 1.4s infinite;
|
||||
}
|
||||
|
||||
.category-card h4 {
|
||||
font-size: 0.88rem;
|
||||
font-weight: 700;
|
||||
color: var(--ink);
|
||||
margin: 0 0 6px;
|
||||
}
|
||||
|
||||
.category-card p {
|
||||
font-size: 0.80rem;
|
||||
color: var(--muted);
|
||||
line-height: 1.5;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* Sources table */
|
||||
.corpus-table-wrap {
|
||||
overflow-x: auto;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.sources-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 0.84rem;
|
||||
background: var(--panel);
|
||||
}
|
||||
|
||||
.sources-table th {
|
||||
background: var(--bg);
|
||||
font-size: 0.70rem;
|
||||
font-weight: 700;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: var(--muted);
|
||||
padding: 10px 14px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid var(--line);
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.sources-table td {
|
||||
padding: 10px 14px;
|
||||
border-bottom: 1px solid var(--line);
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.sources-table tbody tr:last-child td { border-bottom: 0; }
|
||||
|
||||
.sources-table tbody tr:hover td {
|
||||
background: var(--soft-teal);
|
||||
}
|
||||
|
||||
.sources-table a {
|
||||
color: var(--teal);
|
||||
text-decoration: none;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.sources-table a:hover { text-decoration: underline; }
|
||||
|
||||
.source-name { max-width: 260px; }
|
||||
|
||||
.source-badge {
|
||||
display: inline-block;
|
||||
font-size: 0.68rem;
|
||||
font-weight: 800;
|
||||
padding: 2px 8px;
|
||||
border-radius: 999px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.03em;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.badge--teal { background: var(--soft-teal); color: var(--teal-dark); }
|
||||
.badge--amber { background: #fef3cd; color: var(--amber); }
|
||||
.badge--coral { background: var(--soft-coral); color: var(--coral); }
|
||||
.badge--muted { background: #eef0f5; color: var(--muted); }
|
||||
|
||||
.source-cat {
|
||||
font-size: 0.75rem;
|
||||
color: var(--muted);
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.status-active { color: #15803d; font-size: 0.78rem; font-weight: 700; white-space: nowrap; }
|
||||
.status-inactive { color: var(--muted); font-size: 0.78rem; white-space: nowrap; }
|
||||
|
||||
.sources-skeleton td {
|
||||
color: var(--muted);
|
||||
font-style: italic;
|
||||
padding: 16px 14px;
|
||||
}
|
||||
|
||||
/* AI Stack */
|
||||
.stack-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
||||
gap: 14px;
|
||||
}
|
||||
|
||||
.stack-card {
|
||||
background: var(--panel);
|
||||
border: 1px solid var(--line);
|
||||
border-left: 3px solid var(--teal);
|
||||
border-radius: 0 8px 8px 0;
|
||||
padding: 18px 20px;
|
||||
}
|
||||
|
||||
.stack-card h3 {
|
||||
font-size: 0.70rem;
|
||||
font-weight: 700;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.06em;
|
||||
color: var(--muted);
|
||||
margin: 0 0 12px;
|
||||
}
|
||||
|
||||
.stack-list {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0 0 10px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 7px;
|
||||
font-size: 0.84rem;
|
||||
color: var(--ink);
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.stack-badge {
|
||||
display: inline-block;
|
||||
font-size: 0.62rem;
|
||||
font-weight: 800;
|
||||
padding: 1px 6px;
|
||||
border-radius: 4px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.04em;
|
||||
vertical-align: middle;
|
||||
margin-right: 4px;
|
||||
}
|
||||
|
||||
.stack-badge--azure { background: #dbeafe; color: #1d4ed8; }
|
||||
.stack-badge--gpu { background: #f3e8ff; color: #7c3aed; }
|
||||
.stack-badge--api { background: #fef3cd; color: var(--amber); }
|
||||
|
||||
.stack-star {
|
||||
font-size: 0.70rem;
|
||||
font-weight: 700;
|
||||
color: var(--amber);
|
||||
}
|
||||
|
||||
.stack-note {
|
||||
font-size: 0.75rem;
|
||||
color: var(--muted);
|
||||
margin: 8px 0 0;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.stack-note code {
|
||||
background: var(--bg);
|
||||
padding: 1px 5px;
|
||||
border-radius: 3px;
|
||||
font-size: 0.72rem;
|
||||
}
|
||||
|
||||
/* Pipeline flow */
|
||||
.pipeline-flow {
|
||||
display: flex;
|
||||
align-items: stretch;
|
||||
flex-wrap: wrap;
|
||||
gap: 0;
|
||||
background: var(--panel);
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.pipeline-step {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 18px 14px;
|
||||
text-align: center;
|
||||
min-width: 90px;
|
||||
flex: 1;
|
||||
background: var(--panel);
|
||||
}
|
||||
|
||||
.pipeline-step--end {
|
||||
background: var(--soft-teal);
|
||||
}
|
||||
|
||||
.pipeline-step__icon {
|
||||
font-size: 1.4rem;
|
||||
margin-bottom: 6px;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.pipeline-step span:not(.pipeline-step__icon) {
|
||||
font-size: 0.78rem;
|
||||
font-weight: 700;
|
||||
color: var(--ink);
|
||||
display: block;
|
||||
}
|
||||
|
||||
.pipeline-step small {
|
||||
font-size: 0.67rem;
|
||||
color: var(--muted);
|
||||
display: block;
|
||||
margin-top: 3px;
|
||||
}
|
||||
|
||||
.pipeline-arrow {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
padding: 0 2px;
|
||||
color: var(--muted);
|
||||
font-size: 1.1rem;
|
||||
background: var(--bg);
|
||||
border-left: 1px solid var(--line);
|
||||
border-right: 1px solid var(--line);
|
||||
}
|
||||
|
||||
.pipeline-arrow::after {
|
||||
content: '›';
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
/* Corpus health sidebar */
|
||||
.corpus-health-dl {
|
||||
margin: 0;
|
||||
padding: 16px;
|
||||
display: grid;
|
||||
grid-template-columns: auto 1fr;
|
||||
gap: 6px 12px;
|
||||
font-size: 0.80rem;
|
||||
}
|
||||
|
||||
.corpus-health-dl dt {
|
||||
font-weight: 700;
|
||||
color: var(--muted);
|
||||
font-size: 0.70rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.04em;
|
||||
padding-top: 2px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.corpus-health-dl dd {
|
||||
color: var(--ink);
|
||||
margin: 0;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.corpus-health-dl code {
|
||||
background: var(--bg);
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 3px;
|
||||
font-size: 0.70rem;
|
||||
padding: 1px 5px;
|
||||
}
|
||||
|
||||
/* Responsive */
|
||||
@media (max-width: 760px) {
|
||||
.corpus-stats-bar {
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.corpus-stat {
|
||||
flex: 1 1 50%;
|
||||
border-right: 0;
|
||||
border-bottom: 1px solid var(--line);
|
||||
}
|
||||
.corpus-categories {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
.pipeline-flow {
|
||||
flex-direction: column;
|
||||
}
|
||||
.pipeline-arrow {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
|
||||
/* Method trace — overrides for #traceList rendered in rich mode */
|
||||
.trace-list.is-rich {
|
||||
display: grid;
|
||||
|
||||
+415
@@ -0,0 +1,415 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
$toolName = 'corpus';
|
||||
$toolTitle = 'Legal Knowledge Base';
|
||||
$toolKind = 'Corpus Intelligence';
|
||||
$toolBadge = '~220 K passages';
|
||||
|
||||
ob_start();
|
||||
?>
|
||||
<div class="reasoning-head">
|
||||
<p class="eyebrow">Corpus health</p>
|
||||
<h2 id="reasoningTitle">Vector index</h2>
|
||||
</div>
|
||||
<dl class="corpus-health-dl">
|
||||
<dt>Collection</dt>
|
||||
<dd><code>bnl_chunks</code></dd>
|
||||
<dt>Dimensions</dt>
|
||||
<dd>768 (nomic-embed-text)</dd>
|
||||
<dt>Similarity</dt>
|
||||
<dd>Cosine</dd>
|
||||
<dt>RAG strategy</dt>
|
||||
<dd>Hybrid vector + keyword<br>Reciprocal rank fusion</dd>
|
||||
<dt>Private boost</dt>
|
||||
<dd>1.5×</dd>
|
||||
<dt>Temporal mode</dt>
|
||||
<dd>legal_conservative</dd>
|
||||
<dt>Chunk target</dt>
|
||||
<dd>600 words · 75 overlap</dd>
|
||||
<dt>Vector DB</dt>
|
||||
<dd>Qdrant on Colin Docker<br><code>10.0.2.10:6333</code></dd>
|
||||
<dt>Hybrid search</dt>
|
||||
<dd>Azure AI Search<br><code>bnl-legal-search</code><br>West Europe · Basic SKU</dd>
|
||||
</dl>
|
||||
<?php
|
||||
$reasoningPanelOverride = ob_get_clean();
|
||||
|
||||
require_once __DIR__ . '/includes/layout.php';
|
||||
?>
|
||||
|
||||
<div class="corpus-stats-bar" id="corpusStatsBar">
|
||||
<div class="corpus-stat" id="statChunks">
|
||||
<span class="corpus-stat__value is-loading">—</span>
|
||||
<span class="corpus-stat__label">Indexed passages</span>
|
||||
</div>
|
||||
<div class="corpus-stat" id="statDocs">
|
||||
<span class="corpus-stat__value is-loading">—</span>
|
||||
<span class="corpus-stat__label">Source documents</span>
|
||||
</div>
|
||||
<div class="corpus-stat" id="statSources">
|
||||
<span class="corpus-stat__value is-loading">—</span>
|
||||
<span class="corpus-stat__label">Active scrapers</span>
|
||||
</div>
|
||||
<div class="corpus-stat" id="statUpdated">
|
||||
<span class="corpus-stat__value is-loading">—</span>
|
||||
<span class="corpus-stat__label">Last ingested</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- COVERAGE -->
|
||||
<div class="corpus-section">
|
||||
<p class="eyebrow">Coverage</p>
|
||||
<h3 class="corpus-section__title">Legal categories</h3>
|
||||
<div class="corpus-categories" id="corpusCategories">
|
||||
<div class="category-card" data-category="family-law">
|
||||
<div class="category-card__top">
|
||||
<span class="category-card__icon" aria-hidden="true">⚖</span>
|
||||
<span class="category-card__count is-loading" id="cat-family-law">—</span>
|
||||
</div>
|
||||
<h4>Family Law</h4>
|
||||
<p>Barneloven, child custody (foreldreansvar), samvær, mediation (mekling), separation and divorce proceedings.</p>
|
||||
</div>
|
||||
<div class="category-card" data-category="child-welfare">
|
||||
<div class="category-card__top">
|
||||
<span class="category-card__icon" aria-hidden="true">🧒</span>
|
||||
<span class="category-card__count is-loading" id="cat-child-welfare">—</span>
|
||||
</div>
|
||||
<h4>Child Welfare</h4>
|
||||
<p>Barnevernloven, omsorgsovertakelse, emergency care orders, foster placement, CPS (barnevernet) case law.</p>
|
||||
</div>
|
||||
<div class="category-card" data-category="labour-law">
|
||||
<div class="category-card__top">
|
||||
<span class="category-card__icon" aria-hidden="true">🏭</span>
|
||||
<span class="category-card__count is-loading" id="cat-labour-law">—</span>
|
||||
</div>
|
||||
<h4>Labour Law</h4>
|
||||
<p>Arbeidsmiljøloven, collective agreements (tariffavtaler), Arbeidsretten rulings, dismissal, sick leave obligations.</p>
|
||||
</div>
|
||||
<div class="category-card" data-category="social-welfare">
|
||||
<div class="category-card__top">
|
||||
<span class="category-card__icon" aria-hidden="true">🛡</span>
|
||||
<span class="category-card__count is-loading" id="cat-social-welfare">—</span>
|
||||
</div>
|
||||
<h4>Social Welfare</h4>
|
||||
<p>NAV guidance on sykepenger, dagpenger, AAP, uføretrygd, alderspensjon, yrkesskade and social assistance.</p>
|
||||
</div>
|
||||
<div class="category-card" data-category="tax-law">
|
||||
<div class="category-card__top">
|
||||
<span class="category-card__icon" aria-hidden="true">📊</span>
|
||||
<span class="category-card__count is-loading" id="cat-tax-law">—</span>
|
||||
</div>
|
||||
<h4>Tax Law</h4>
|
||||
<p>Skatteetaten's Skatte-ABC, binding advance rulings (BFU), Skatteklagenemnda decisions, income and capital tax.</p>
|
||||
</div>
|
||||
<div class="category-card" data-category="administrative-law">
|
||||
<div class="category-card__top">
|
||||
<span class="category-card__icon" aria-hidden="true">🏛</span>
|
||||
<span class="category-card__count is-loading" id="cat-administrative-law">—</span>
|
||||
</div>
|
||||
<h4>Administrative Law</h4>
|
||||
<p>Sivilombudet reports, Forvaltningsloven, procedural rights, official complaints, Stortinget oversight.</p>
|
||||
</div>
|
||||
<div class="category-card" data-category="consumer-law">
|
||||
<div class="category-card__top">
|
||||
<span class="category-card__icon" aria-hidden="true">🏠</span>
|
||||
<span class="category-card__count is-loading" id="cat-consumer-law">—</span>
|
||||
</div>
|
||||
<h4>Consumer & Housing</h4>
|
||||
<p>HTU (rental disputes), Finansklagenemnda, Forbrukertilsynet, Forbrukerrådet, Pakkereisenemnda decisions.</p>
|
||||
</div>
|
||||
<div class="category-card" data-category="immigration-law">
|
||||
<div class="category-card__top">
|
||||
<span class="category-card__icon" aria-hidden="true">🌐</span>
|
||||
<span class="category-card__count is-loading" id="cat-immigration-law">—</span>
|
||||
</div>
|
||||
<h4>Immigration & International</h4>
|
||||
<p>UNE (Utlendingsnemnda) decisions, ECHR Art. 8 family rights, EMD case law, Hague Convention (cross-border child abduction).</p>
|
||||
</div>
|
||||
<div class="category-card" data-category="government-documents">
|
||||
<div class="category-card__top">
|
||||
<span class="category-card__icon" aria-hidden="true">📄</span>
|
||||
<span class="category-card__count is-loading" id="cat-government-documents">—</span>
|
||||
</div>
|
||||
<h4>Government Documents</h4>
|
||||
<p>NOUer, Stortingsmeldinger, government white papers and regulatory guidance from Regjeringen.no.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- SOURCES TABLE -->
|
||||
<div class="corpus-section">
|
||||
<p class="eyebrow">Data sources</p>
|
||||
<h3 class="corpus-section__title">Active scrapers</h3>
|
||||
<div class="corpus-table-wrap">
|
||||
<table class="sources-table" id="sourcesTable">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Source</th>
|
||||
<th>Type</th>
|
||||
<th>Category</th>
|
||||
<th>Lang</th>
|
||||
<th>Schedule</th>
|
||||
<th>Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="sourcesTableBody">
|
||||
<tr class="sources-skeleton"><td colspan="6">Loading sources…</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- AI STACK -->
|
||||
<div class="corpus-section">
|
||||
<p class="eyebrow">Software</p>
|
||||
<h3 class="corpus-section__title">AI stack</h3>
|
||||
<div class="stack-grid">
|
||||
|
||||
<div class="stack-card">
|
||||
<h3>Reasoning LLMs</h3>
|
||||
<ul class="stack-list">
|
||||
<li><span class="stack-badge stack-badge--azure">Azure</span> <strong>gpt-4o-mini</strong> <span class="stack-star">★ default</span> — fast, cost-efficient</li>
|
||||
<li><span class="stack-badge stack-badge--azure">Azure</span> <strong>gpt-4o</strong> — highest quality</li>
|
||||
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>qwen2.5:14b</strong> — local, private</li>
|
||||
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>qwen3:14b</strong> — reasoning mode</li>
|
||||
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>dbn-legal-agent</strong> — Norwegian law fine-tune (QLoRA on qwen2.5:7b, NorwAI-24B distillation)</li>
|
||||
</ul>
|
||||
<p class="stack-note">All routed via LiteLLM on Colin · <code>10.0.1.10:4000</code></p>
|
||||
</div>
|
||||
|
||||
<div class="stack-card">
|
||||
<h3>Transcription</h3>
|
||||
<ul class="stack-list">
|
||||
<li><span class="stack-badge stack-badge--gpu">GPU</span> <strong>Whisper large-v3</strong> <span class="stack-star">★ primary</span><br>Cuttlefish · RTX 3060 12 GB VRAM</li>
|
||||
<li><span class="stack-badge stack-badge--api">API</span> OpenAI Whisper API</li>
|
||||
<li><span class="stack-badge stack-badge--azure">Azure</span> AI Speech <code>nb-NO</code> (Norway East)</li>
|
||||
</ul>
|
||||
<p class="stack-note">Speaker diarization · VAD silence filter · beam size 5 · vocabulary presets (barnerett, mediation)</p>
|
||||
</div>
|
||||
|
||||
<div class="stack-card">
|
||||
<h3>Embeddings</h3>
|
||||
<ul class="stack-list">
|
||||
<li><strong>nomic-embed-text</strong> — 768-dim dense vectors</li>
|
||||
<li>Ollama on Chloe <code>10.0.1.11:11434</code></li>
|
||||
<li>Cosine similarity in Qdrant</li>
|
||||
</ul>
|
||||
<p class="stack-note">All documents chunked and embedded before indexing; chunks stored in both Qdrant (vector) and MariaDB (keyword fallback)</p>
|
||||
</div>
|
||||
|
||||
<div class="stack-card">
|
||||
<h3>Vector & Hybrid Search</h3>
|
||||
<ul class="stack-list">
|
||||
<li><strong>Qdrant</strong> <code>bnl_chunks</code> · ~220 K vectors<br>Colin Docker · <code>10.0.2.10:6333</code></li>
|
||||
<li><strong>Azure AI Search</strong> <code>bnl-legal-search</code><br>Basic SKU · West Europe · hybrid keyword + semantic</li>
|
||||
<li>Reciprocal rank fusion (vector + keyword)</li>
|
||||
<li>Private corpus boosted 1.5×</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="stack-card">
|
||||
<h3>Chunking pipeline</h3>
|
||||
<ul class="stack-list">
|
||||
<li>Heading-aware semantic splitting</li>
|
||||
<li>600-word target · 75-word overlap</li>
|
||||
<li>50-word minimum chunk</li>
|
||||
<li>SHA-256 deduplication</li>
|
||||
<li>PDF, DOCX, HTML text extraction</li>
|
||||
<li>Temporal metadata (valid_from / valid_until)</li>
|
||||
</ul>
|
||||
<p class="stack-note">Legal temporal reranking: <code>legal_conservative</code> — surfaces current versions first</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- DATA PIPELINE -->
|
||||
<div class="corpus-section">
|
||||
<p class="eyebrow">How it works</p>
|
||||
<h3 class="corpus-section__title">Ingestion pipeline</h3>
|
||||
<div class="pipeline-flow" role="list" aria-label="Data pipeline steps">
|
||||
<div class="pipeline-step" role="listitem">
|
||||
<span class="pipeline-step__icon">🌐</span>
|
||||
<span>Source</span>
|
||||
<small>gov websites, APIs, PDFs</small>
|
||||
</div>
|
||||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||||
<div class="pipeline-step" role="listitem">
|
||||
<span class="pipeline-step__icon">🕷</span>
|
||||
<span>Scraper</span>
|
||||
<small>HTTP / API / PDF</small>
|
||||
</div>
|
||||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||||
<div class="pipeline-step" role="listitem">
|
||||
<span class="pipeline-step__icon">📝</span>
|
||||
<span>Text extract</span>
|
||||
<small>PDF, DOCX, HTML</small>
|
||||
</div>
|
||||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||||
<div class="pipeline-step" role="listitem">
|
||||
<span class="pipeline-step__icon">✂</span>
|
||||
<span>TextChunker</span>
|
||||
<small>600w · 75w overlap</small>
|
||||
</div>
|
||||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||||
<div class="pipeline-step" role="listitem">
|
||||
<span class="pipeline-step__icon">🔢</span>
|
||||
<span>Embed</span>
|
||||
<small>nomic · 768-dim</small>
|
||||
</div>
|
||||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||||
<div class="pipeline-step" role="listitem">
|
||||
<span class="pipeline-step__icon">⚡</span>
|
||||
<span>Qdrant</span>
|
||||
<small>cosine upsert</small>
|
||||
</div>
|
||||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||||
<div class="pipeline-step" role="listitem">
|
||||
<span class="pipeline-step__icon">🤖</span>
|
||||
<span>LiteLLM</span>
|
||||
<small>RAG + LLM</small>
|
||||
</div>
|
||||
<div class="pipeline-arrow" aria-hidden="true"></div>
|
||||
<div class="pipeline-step pipeline-step--end" role="listitem">
|
||||
<span class="pipeline-step__icon">🔍</span>
|
||||
<span>Your tool</span>
|
||||
<small>Ask, Search, Research…</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
(function () {
|
||||
function fmt(n) {
|
||||
if (n === null || n === undefined) return '—';
|
||||
return Number(n).toLocaleString('en');
|
||||
}
|
||||
|
||||
function fmtDate(s) {
|
||||
if (!s) return '—';
|
||||
try {
|
||||
const d = new Date(s);
|
||||
return d.toLocaleDateString('en-GB', { day: 'numeric', month: 'short', year: 'numeric' });
|
||||
} catch (e) { return s; }
|
||||
}
|
||||
|
||||
const authorityLabels = {
|
||||
case_law: { label: 'Case law', cls: 'badge--teal' },
|
||||
guidance: { label: 'Guidance', cls: 'badge--amber' },
|
||||
report: { label: 'Report', cls: 'badge--muted' },
|
||||
ombudsman: { label: 'Ombudsman', cls: 'badge--muted' },
|
||||
tribunal: { label: 'Tribunal', cls: 'badge--coral' },
|
||||
regulatory: { label: 'Regulatory', cls: 'badge--coral' },
|
||||
law: { label: 'Statute', cls: 'badge--teal' },
|
||||
treaty: { label: 'Treaty', cls: 'badge--muted' },
|
||||
};
|
||||
|
||||
const scheduleLabels = {
|
||||
daily: 'Daily',
|
||||
weekly: 'Weekly',
|
||||
monthly: 'Monthly',
|
||||
manual: 'Manual',
|
||||
};
|
||||
|
||||
// Category slug → element id map (for live counts)
|
||||
const catIds = {
|
||||
'family-law': 'cat-family-law',
|
||||
'family_law': 'cat-family-law',
|
||||
'child-welfare': 'cat-child-welfare',
|
||||
'child_welfare': 'cat-child-welfare',
|
||||
'labour-law': 'cat-labour-law',
|
||||
'labour_law': 'cat-labour-law',
|
||||
'social-welfare': 'cat-social-welfare',
|
||||
'social_welfare': 'cat-social-welfare',
|
||||
'tax-law': 'cat-tax-law',
|
||||
'tax_law': 'cat-tax-law',
|
||||
'administrative-law': 'cat-administrative-law',
|
||||
'administrative_law': 'cat-administrative-law',
|
||||
'consumer-law': 'cat-consumer-law',
|
||||
'consumer_law': 'cat-consumer-law',
|
||||
'tenancy-law': 'cat-consumer-law',
|
||||
'financial-law': 'cat-consumer-law',
|
||||
'immigration-law': 'cat-immigration-law',
|
||||
'immigration_law': 'cat-immigration-law',
|
||||
'government-documents':'cat-government-documents',
|
||||
'government_documents':'cat-government-documents',
|
||||
'case-law': 'cat-administrative-law',
|
||||
'victim-compensation': 'cat-administrative-law',
|
||||
'procurement-law': 'cat-administrative-law',
|
||||
};
|
||||
|
||||
function setLoaded(el) {
|
||||
el.classList.remove('is-loading');
|
||||
}
|
||||
|
||||
fetch('/api/corpus-stats.php', { credentials: 'same-origin' })
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
if (!data.ok) return;
|
||||
const s = data.stats;
|
||||
|
||||
const elChunks = document.querySelector('#statChunks .corpus-stat__value');
|
||||
const elDocs = document.querySelector('#statDocs .corpus-stat__value');
|
||||
const elSrc = document.querySelector('#statSources .corpus-stat__value');
|
||||
const elUpd = document.querySelector('#statUpdated .corpus-stat__value');
|
||||
|
||||
if (elChunks) { elChunks.textContent = fmt(s.total_chunks); setLoaded(elChunks); }
|
||||
if (elDocs) { elDocs.textContent = fmt(s.total_docs); setLoaded(elDocs); }
|
||||
if (elSrc) { elSrc.textContent = fmt(s.active_sources); setLoaded(elSrc); }
|
||||
if (elUpd) { elUpd.textContent = fmtDate(s.last_updated); setLoaded(elUpd); }
|
||||
|
||||
// Category counts
|
||||
(s.by_category || []).forEach(row => {
|
||||
const elId = catIds[row.category];
|
||||
if (!elId) return;
|
||||
const el = document.getElementById(elId);
|
||||
if (!el) return;
|
||||
const cur = parseInt(el.textContent, 10) || 0;
|
||||
el.textContent = fmt(cur + parseInt(row.doc_count, 10));
|
||||
setLoaded(el);
|
||||
});
|
||||
// Zero out remaining loading badges
|
||||
document.querySelectorAll('.category-card__count.is-loading').forEach(el => {
|
||||
el.textContent = '0';
|
||||
setLoaded(el);
|
||||
});
|
||||
|
||||
// Sources table
|
||||
const tbody = document.getElementById('sourcesTableBody');
|
||||
if (!tbody) return;
|
||||
tbody.innerHTML = '';
|
||||
(data.sources || []).forEach(src => {
|
||||
const auth = authorityLabels[src.authority_type] || { label: src.authority_type || '—', cls: 'badge--muted' };
|
||||
const sched = scheduleLabels[src.schedule] || (src.schedule || 'Manual');
|
||||
const langFlag = src.language === 'no' ? '🇳🇴' : src.language === 'en' ? '🇬🇧' : (src.language || '—');
|
||||
const statusHtml = src.is_active
|
||||
? '<span class="status-active">● Active</span>'
|
||||
: '<span class="status-inactive">○ Inactive</span>';
|
||||
const nameHtml = src.url
|
||||
? `<a href="${escHtml(src.url)}" target="_blank" rel="noopener">${escHtml(src.name)}</a>`
|
||||
: escHtml(src.name);
|
||||
const tr = document.createElement('tr');
|
||||
tr.innerHTML = `
|
||||
<td class="source-name">${nameHtml}</td>
|
||||
<td><span class="source-badge ${escHtml(auth.cls)}">${escHtml(auth.label)}</span></td>
|
||||
<td><span class="source-cat">${escHtml(src.category || '—')}</span></td>
|
||||
<td>${langFlag}</td>
|
||||
<td>${escHtml(sched)}</td>
|
||||
<td>${statusHtml}</td>`;
|
||||
tbody.appendChild(tr);
|
||||
});
|
||||
})
|
||||
.catch(() => {
|
||||
document.querySelectorAll('.corpus-stat__value').forEach(el => {
|
||||
el.textContent = '—';
|
||||
el.classList.remove('is-loading');
|
||||
});
|
||||
});
|
||||
|
||||
function escHtml(s) {
|
||||
return String(s ?? '').replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>').replace(/"/g,'"');
|
||||
}
|
||||
})();
|
||||
</script>
|
||||
|
||||
<?php require_once __DIR__ . '/includes/layout_footer.php'; ?>
|
||||
@@ -16,6 +16,7 @@ $navItems = [
|
||||
'timeline' => ['Timeline', 'Events'],
|
||||
'redact' => ['Redact', 'Privacy'],
|
||||
'transcribe' => ['Transcribe', 'Audio'],
|
||||
'corpus' => ['Corpus', 'Data & stack'],
|
||||
];
|
||||
$toolName = $toolName ?? 'ask';
|
||||
$toolTitle = $toolTitle ?? 'Legal Tools';
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
</section><!-- /tool-panel -->
|
||||
|
||||
<aside class="reasoning-panel" aria-labelledby="reasoningTitle">
|
||||
<?php if (!empty($reasoningPanelOverride)): ?>
|
||||
<?= $reasoningPanelOverride ?>
|
||||
<?php else: ?>
|
||||
<div class="reasoning-head">
|
||||
<p class="eyebrow">Evidence trail</p>
|
||||
<h2 id="reasoningTitle">Reasoning</h2>
|
||||
@@ -14,6 +17,7 @@
|
||||
</div>
|
||||
</li>
|
||||
</ol>
|
||||
<?php endif; ?>
|
||||
</aside>
|
||||
</section><!-- /workspace -->
|
||||
</main><!-- /appShell -->
|
||||
|
||||
@@ -42,6 +42,12 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
</div>
|
||||
<p class="upload-hint" data-i18n="timelineConfidenceHint">Show all: includes events the model is uncertain about (shown in grey). Hide low-confidence: only returns events the model is reasonably sure of.</p>
|
||||
|
||||
<div class="control-row" id="timelineBackgroundControl">
|
||||
<span class="control-label" data-i18n="timelineBackground">Background events</span>
|
||||
<label><input type="checkbox" id="includeBackgroundCheck" name="include_background" checked> <span data-i18n="timelineIncludeBackground">Include narrative / background dates</span></label>
|
||||
</div>
|
||||
<p class="upload-hint" data-i18n="timelineBackgroundHint">When checked, historical context dates are included (e.g. "born 30.07.2015", "met around 2011/2012"). Uncheck to extract only operational events and deadlines.</p>
|
||||
|
||||
<div class="control-row" id="timelineDatesControl">
|
||||
<span class="control-label" data-i18n="timelineDates">Date types</span>
|
||||
<label><input type="checkbox" id="includeRelativeCheck" name="include_relative" checked> <span data-i18n="timelineIncludeRelative">Include relative / recurring dates</span></label>
|
||||
|
||||
Reference in New Issue
Block a user