diff --git a/api/corpus-stats.php b/api/corpus-stats.php new file mode 100644 index 0000000..5c07800 --- /dev/null +++ b/api/corpus-stats.php @@ -0,0 +1,68 @@ +prepare('SELECT COUNT(*) FROM documents WHERE corpus_id = 1'); + $stmt->execute(); + $totalDocs = (int)$stmt->fetchColumn(); + + // Total chunks for corpus_id=1 documents + $stmt = $ragDb->prepare( + 'SELECT COUNT(*) FROM chunks c + JOIN documents d ON c.document_id = d.id + WHERE d.corpus_id = 1' + ); + $stmt->execute(); + $totalChunks = (int)$stmt->fetchColumn(); + + // Doc counts by category + $stmt = $ragDb->prepare( + 'SELECT category, COUNT(*) AS doc_count + FROM documents + WHERE corpus_id = 1 AND category IS NOT NULL AND category != \'\' + GROUP BY category + ORDER BY doc_count DESC' + ); + $stmt->execute(); + $byCategory = $stmt->fetchAll(PDO::FETCH_ASSOC); + + // Last updated timestamp + $stmt = $ragDb->prepare('SELECT MAX(updated_at) FROM documents WHERE corpus_id = 1'); + $stmt->execute(); + $lastUpdated = $stmt->fetchColumn() ?: null; + + // Active sources from bnl_admin + $stmt = $bnlDb->prepare( + 'SELECT name, url, category, authority_type, language, schedule, is_active, scraper_class + FROM corpus_sources + WHERE corpus_id = 1 + ORDER BY category, name' + ); + $stmt->execute(); + $sources = $stmt->fetchAll(PDO::FETCH_ASSOC); + + $activeSources = count(array_filter($sources, fn($s) => !empty($s['is_active']))); + + dbnToolsRespond([ + 'ok' => true, + 'stats' => [ + 'total_chunks' => $totalChunks, + 'total_docs' => $totalDocs, + 'active_sources' => $activeSources, + 'last_updated' => $lastUpdated, + 'by_category' => $byCategory, + ], + 'sources' => $sources, + ]); +} catch (Throwable $e) { + dbnToolsError('Could not load corpus statistics: ' . $e->getMessage(), 500, 'corpus_stats_error'); +} diff --git a/api/timeline.php b/api/timeline.php index 84f9a71..7a10cdc 100644 --- a/api/timeline.php +++ b/api/timeline.php @@ -22,7 +22,8 @@ dbnToolsWithTelemetry('timeline', $language, function () use ($input, $language) $confidenceFilter = (string)($input['confidence_filter'] ?? '') === 'high_medium' ? 'high_medium' : 'all'; - $includeRelative = ($input['include_relative'] ?? true) !== false; + $includeRelative = ($input['include_relative'] ?? true) !== false; + $includeBackground = ($input['include_background'] ?? true) !== false; - return (new DbnLegalToolsService())->timeline($text, $language, $engine, $focus, $confidenceFilter, $includeRelative); + return (new DbnLegalToolsService())->timeline($text, $language, $engine, $focus, $confidenceFilter, $includeRelative, $includeBackground); }); diff --git a/assets/css/tools.css b/assets/css/tools.css index 3513758..4d57a75 100644 --- a/assets/css/tools.css +++ b/assets/css/tools.css @@ -2013,6 +2013,414 @@ p { font-size: 0.92rem; } +/* ============================================================ + CORPUS PAGE + ============================================================ */ + +/* Stats bar */ +.corpus-stats-bar { + display: flex; + gap: 0; + border: 1px solid var(--line); + border-radius: 8px; + overflow: hidden; + margin-bottom: 36px; + background: var(--panel); +} + +.corpus-stat { + flex: 1; + display: flex; + flex-direction: column; + align-items: center; + padding: 20px 16px; + border-right: 1px solid var(--line); + text-align: center; +} +.corpus-stat:last-child { border-right: 0; } + +.corpus-stat__value { + display: block; + font-size: 2rem; + font-weight: 800; + color: var(--teal); + line-height: 1; + font-variant-numeric: tabular-nums; + letter-spacing: -0.02em; +} + +.corpus-stat__value.is-loading { + color: var(--line); + background: linear-gradient(90deg, var(--line) 25%, #e8ecf2 50%, var(--line) 75%); + background-size: 200% 100%; + animation: shimmer 1.4s infinite; + border-radius: 4px; + min-width: 60px; +} + +@keyframes shimmer { + 0% { background-position: 200% 0; } + 100% { background-position: -200% 0; } +} + +.corpus-stat__label { + display: block; + font-size: 0.72rem; + font-weight: 700; + text-transform: uppercase; + color: var(--muted); + margin-top: 6px; + letter-spacing: 0.04em; +} + +/* Section headings */ +.corpus-section { + margin-bottom: 40px; +} + +.corpus-section__title { + font-size: 1.05rem; + font-weight: 700; + color: var(--ink); + margin: 4px 0 18px; +} + +/* Category cards */ +.corpus-categories { + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: 14px; +} + +.category-card { + background: var(--panel); + border: 1px solid var(--line); + border-radius: 8px; + padding: 18px; + transition: border-color 0.15s, box-shadow 0.15s; +} + +.category-card:hover { + border-color: var(--teal); + box-shadow: 0 4px 16px rgba(15, 118, 110, 0.1); +} + +.category-card__top { + display: flex; + justify-content: space-between; + align-items: flex-start; + margin-bottom: 10px; +} + +.category-card__icon { + font-size: 1.5rem; + line-height: 1; +} + +.category-card__count { + font-size: 0.72rem; + font-weight: 800; + background: var(--soft-teal); + color: var(--teal-dark); + border-radius: 999px; + padding: 2px 10px; + font-variant-numeric: tabular-nums; + min-width: 28px; + text-align: center; +} + +.category-card__count.is-loading { + color: transparent; + background: linear-gradient(90deg, var(--line) 25%, #e8ecf2 50%, var(--line) 75%); + background-size: 200% 100%; + animation: shimmer 1.4s infinite; +} + +.category-card h4 { + font-size: 0.88rem; + font-weight: 700; + color: var(--ink); + margin: 0 0 6px; +} + +.category-card p { + font-size: 0.80rem; + color: var(--muted); + line-height: 1.5; + margin: 0; +} + +/* Sources table */ +.corpus-table-wrap { + overflow-x: auto; + border: 1px solid var(--line); + border-radius: 8px; +} + +.sources-table { + width: 100%; + border-collapse: collapse; + font-size: 0.84rem; + background: var(--panel); +} + +.sources-table th { + background: var(--bg); + font-size: 0.70rem; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--muted); + padding: 10px 14px; + text-align: left; + border-bottom: 1px solid var(--line); + white-space: nowrap; +} + +.sources-table td { + padding: 10px 14px; + border-bottom: 1px solid var(--line); + vertical-align: middle; +} + +.sources-table tbody tr:last-child td { border-bottom: 0; } + +.sources-table tbody tr:hover td { + background: var(--soft-teal); +} + +.sources-table a { + color: var(--teal); + text-decoration: none; + font-weight: 600; +} + +.sources-table a:hover { text-decoration: underline; } + +.source-name { max-width: 260px; } + +.source-badge { + display: inline-block; + font-size: 0.68rem; + font-weight: 800; + padding: 2px 8px; + border-radius: 999px; + text-transform: uppercase; + letter-spacing: 0.03em; + white-space: nowrap; +} + +.badge--teal { background: var(--soft-teal); color: var(--teal-dark); } +.badge--amber { background: #fef3cd; color: var(--amber); } +.badge--coral { background: var(--soft-coral); color: var(--coral); } +.badge--muted { background: #eef0f5; color: var(--muted); } + +.source-cat { + font-size: 0.75rem; + color: var(--muted); + white-space: nowrap; +} + +.status-active { color: #15803d; font-size: 0.78rem; font-weight: 700; white-space: nowrap; } +.status-inactive { color: var(--muted); font-size: 0.78rem; white-space: nowrap; } + +.sources-skeleton td { + color: var(--muted); + font-style: italic; + padding: 16px 14px; +} + +/* AI Stack */ +.stack-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); + gap: 14px; +} + +.stack-card { + background: var(--panel); + border: 1px solid var(--line); + border-left: 3px solid var(--teal); + border-radius: 0 8px 8px 0; + padding: 18px 20px; +} + +.stack-card h3 { + font-size: 0.70rem; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.06em; + color: var(--muted); + margin: 0 0 12px; +} + +.stack-list { + list-style: none; + padding: 0; + margin: 0 0 10px; + display: flex; + flex-direction: column; + gap: 7px; + font-size: 0.84rem; + color: var(--ink); + line-height: 1.4; +} + +.stack-badge { + display: inline-block; + font-size: 0.62rem; + font-weight: 800; + padding: 1px 6px; + border-radius: 4px; + text-transform: uppercase; + letter-spacing: 0.04em; + vertical-align: middle; + margin-right: 4px; +} + +.stack-badge--azure { background: #dbeafe; color: #1d4ed8; } +.stack-badge--gpu { background: #f3e8ff; color: #7c3aed; } +.stack-badge--api { background: #fef3cd; color: var(--amber); } + +.stack-star { + font-size: 0.70rem; + font-weight: 700; + color: var(--amber); +} + +.stack-note { + font-size: 0.75rem; + color: var(--muted); + margin: 8px 0 0; + line-height: 1.4; +} + +.stack-note code { + background: var(--bg); + padding: 1px 5px; + border-radius: 3px; + font-size: 0.72rem; +} + +/* Pipeline flow */ +.pipeline-flow { + display: flex; + align-items: stretch; + flex-wrap: wrap; + gap: 0; + background: var(--panel); + border: 1px solid var(--line); + border-radius: 8px; + overflow: hidden; +} + +.pipeline-step { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + padding: 18px 14px; + text-align: center; + min-width: 90px; + flex: 1; + background: var(--panel); +} + +.pipeline-step--end { + background: var(--soft-teal); +} + +.pipeline-step__icon { + font-size: 1.4rem; + margin-bottom: 6px; + display: block; +} + +.pipeline-step span:not(.pipeline-step__icon) { + font-size: 0.78rem; + font-weight: 700; + color: var(--ink); + display: block; +} + +.pipeline-step small { + font-size: 0.67rem; + color: var(--muted); + display: block; + margin-top: 3px; +} + +.pipeline-arrow { + display: flex; + align-items: center; + padding: 0 2px; + color: var(--muted); + font-size: 1.1rem; + background: var(--bg); + border-left: 1px solid var(--line); + border-right: 1px solid var(--line); +} + +.pipeline-arrow::after { + content: '›'; + font-weight: 700; +} + +/* Corpus health sidebar */ +.corpus-health-dl { + margin: 0; + padding: 16px; + display: grid; + grid-template-columns: auto 1fr; + gap: 6px 12px; + font-size: 0.80rem; +} + +.corpus-health-dl dt { + font-weight: 700; + color: var(--muted); + font-size: 0.70rem; + text-transform: uppercase; + letter-spacing: 0.04em; + padding-top: 2px; + white-space: nowrap; +} + +.corpus-health-dl dd { + color: var(--ink); + margin: 0; + line-height: 1.5; +} + +.corpus-health-dl code { + background: var(--bg); + border: 1px solid var(--line); + border-radius: 3px; + font-size: 0.70rem; + padding: 1px 5px; +} + +/* Responsive */ +@media (max-width: 760px) { + .corpus-stats-bar { + flex-wrap: wrap; + } + .corpus-stat { + flex: 1 1 50%; + border-right: 0; + border-bottom: 1px solid var(--line); + } + .corpus-categories { + grid-template-columns: 1fr; + } + .pipeline-flow { + flex-direction: column; + } + .pipeline-arrow { + display: none; + } +} + /* Method trace — overrides for #traceList rendered in rich mode */ .trace-list.is-rich { display: grid; diff --git a/corpus.php b/corpus.php new file mode 100644 index 0000000..e1cfa68 --- /dev/null +++ b/corpus.php @@ -0,0 +1,415 @@ + +
Corpus health
+bnl_chunks10.0.2.10:6333bnl-legal-searchCoverage
+Barneloven, child custody (foreldreansvar), samvær, mediation (mekling), separation and divorce proceedings.
+Barnevernloven, omsorgsovertakelse, emergency care orders, foster placement, CPS (barnevernet) case law.
+Arbeidsmiljøloven, collective agreements (tariffavtaler), Arbeidsretten rulings, dismissal, sick leave obligations.
+NAV guidance on sykepenger, dagpenger, AAP, uføretrygd, alderspensjon, yrkesskade and social assistance.
+Skatteetaten's Skatte-ABC, binding advance rulings (BFU), Skatteklagenemnda decisions, income and capital tax.
+Sivilombudet reports, Forvaltningsloven, procedural rights, official complaints, Stortinget oversight.
+HTU (rental disputes), Finansklagenemnda, Forbrukertilsynet, Forbrukerrådet, Pakkereisenemnda decisions.
+UNE (Utlendingsnemnda) decisions, ECHR Art. 8 family rights, EMD case law, Hague Convention (cross-border child abduction).
+NOUer, Stortingsmeldinger, government white papers and regulatory guidance from Regjeringen.no.
+Data sources
+| Source | +Type | +Category | +Lang | +Schedule | +Status | +
|---|---|---|---|---|---|
| Loading sources… | |||||
Software
+All routed via LiteLLM on Colin · 10.0.1.10:4000
nb-NO (Norway East)Speaker diarization · VAD silence filter · beam size 5 · vocabulary presets (barnerett, mediation)
+10.0.1.11:11434All documents chunked and embedded before indexing; chunks stored in both Qdrant (vector) and MariaDB (keyword fallback)
+bnl_chunks · ~220 K vectors10.0.2.10:6333bnl-legal-searchLegal temporal reranking: legal_conservative — surfaces current versions first
How it works
+Show all: includes events the model is uncertain about (shown in grey). Hide low-confidence: only returns events the model is reasonably sure of.
+When checked, historical context dates are included (e.g. "born 30.07.2015", "met around 2011/2012"). Uncheck to extract only operational events and deadlines.
+