Expand corpus slices to 8: split ECHR/Hague, add Norwegian Courts, Bufdir, DBN Resources
- Replace combined echr_hague slice with echr (Art.8+9, HUDOC, NIM) and hague (INCADAT, cross-border abduction) as separate toggles; echr defaults ON, hague defaults OFF - Add norwegian_courts slice: Domstol (src 5,26) + Rettspraksis.no (src 33, 482 docs) - Add bufdir_guidance slice: Barneombudet (19), Bufdir (20), Statsforvalteren (31) - Add dbn_resources slice: DBN website pages (flashcards, resource directory), defaults OFF - Replace isWebsiteChunk() with slice-aware shouldExcludeChunk(): always strips EU AI Act chunks (EUR-Lex source 7 leaks through when Qdrant runs unconstrained) and DBN website pages unless dbn_resources slice is explicitly ON - Update SLICE_DEFS in advocate.js and deep-research.js to match all 8 slices - Backward compat: echr_hague key in incoming requests fans out to echr+hague Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+32
-4
@@ -44,7 +44,7 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
|
||||
<div class="dr-slice-section">
|
||||
<p class="control-label">Corpus slices</p>
|
||||
<p class="upload-hint">Select which slices the agent searches when building your case. All three legal slices are on by default.</p>
|
||||
<p class="upload-hint">Three core legal slices are on by default. Enable ECHR Article 9, Hague Convention, Norwegian Courts, Bufdir guidance, or DBN Resources for more targeted research.</p>
|
||||
<div class="dr-slice-grid">
|
||||
<button type="button" class="adv-slice is-on" data-slice="family_core" aria-pressed="true">
|
||||
<div class="dr-slice__head">
|
||||
@@ -60,12 +60,33 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Barnevern, omsorgsovertakelse, foster care</p>
|
||||
</button>
|
||||
<button type="button" class="adv-slice is-on" data-slice="echr_hague" aria-pressed="true">
|
||||
<button type="button" class="adv-slice is-on" data-slice="echr" aria-pressed="true">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">ECHR and Hague</span>
|
||||
<span class="dr-slice__title">ECHR</span>
|
||||
<span class="dr-slice__badge">on</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Article 8, EMD, HCCH, cross-border family</p>
|
||||
<p class="dr-slice__tagline">Art. 8 family life, Art. 9 religion, HUDOC vs Norway</p>
|
||||
</button>
|
||||
<button type="button" class="adv-slice" data-slice="hague" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">Hague Convention</span>
|
||||
<span class="dr-slice__badge">off</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">INCADAT, cross-border abduction, wrongful removal</p>
|
||||
</button>
|
||||
<button type="button" class="adv-slice" data-slice="norwegian_courts" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">Norwegian Courts</span>
|
||||
<span class="dr-slice__badge">off</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Høyesterett + Lagmannsrett family decisions</p>
|
||||
</button>
|
||||
<button type="button" class="adv-slice" data-slice="bufdir_guidance" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">Bufdir Guidance</span>
|
||||
<span class="dr-slice__badge">off</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Bufdir, Barneombudet, Statsforvalteren guidance</p>
|
||||
</button>
|
||||
<button type="button" class="adv-slice" data-slice="broader_legal" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
@@ -74,6 +95,13 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Arbeidsmiljøloven, NOUer, statutes, government background</p>
|
||||
</button>
|
||||
<button type="button" class="adv-slice" data-slice="dbn_resources" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">DBN Resources</span>
|
||||
<span class="dr-slice__badge">off</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Do Better Norge guides, flashcards, resource directory</p>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -10,8 +10,12 @@
|
||||
const SLICE_DEFS = [
|
||||
{ id: 'family_core', label: 'Family Law Core' },
|
||||
{ id: 'child_welfare', label: 'Child Welfare' },
|
||||
{ id: 'echr_hague', label: 'ECHR and Hague' },
|
||||
{ id: 'echr', label: 'ECHR' },
|
||||
{ id: 'hague', label: 'Hague Convention' },
|
||||
{ id: 'norwegian_courts', label: 'Norwegian Courts' },
|
||||
{ id: 'bufdir_guidance', label: 'Bufdir Guidance' },
|
||||
{ id: 'broader_legal', label: 'Broader Legal Support' },
|
||||
{ id: 'dbn_resources', label: 'DBN Resources' },
|
||||
];
|
||||
|
||||
const STEP_LABELS = [
|
||||
|
||||
@@ -10,8 +10,12 @@
|
||||
const SLICE_DEFS = [
|
||||
{ id: 'family_core', label: 'Family Law Core' },
|
||||
{ id: 'child_welfare', label: 'Child Welfare' },
|
||||
{ id: 'echr_hague', label: 'ECHR and Hague' },
|
||||
{ id: 'echr', label: 'ECHR' },
|
||||
{ id: 'hague', label: 'Hague Convention' },
|
||||
{ id: 'norwegian_courts', label: 'Norwegian Courts' },
|
||||
{ id: 'bufdir_guidance', label: 'Bufdir Guidance' },
|
||||
{ id: 'broader_legal', label: 'Broader Legal Support' },
|
||||
{ id: 'dbn_resources', label: 'DBN Resources' },
|
||||
];
|
||||
|
||||
const STEP_LABELS = [
|
||||
|
||||
+32
-4
@@ -24,7 +24,7 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
|
||||
<div class="dr-slice-section">
|
||||
<p class="control-label">Corpus slices</p>
|
||||
<p class="upload-hint">Select which slices of the Do Better Norge legal corpus the agent searches. Toggle Broader Legal on when the question reaches beyond family law.</p>
|
||||
<p class="upload-hint">Three core legal slices are on by default. Enable Hague Convention, Norwegian Courts, Bufdir guidance, or DBN Resources for more targeted research.</p>
|
||||
<div class="dr-slice-grid">
|
||||
<button type="button" class="dr-slice is-on" data-slice="family_core" aria-pressed="true">
|
||||
<div class="dr-slice__head">
|
||||
@@ -40,12 +40,33 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Barnevern, omsorgsovertakelse, foster care</p>
|
||||
</button>
|
||||
<button type="button" class="dr-slice is-on" data-slice="echr_hague" aria-pressed="true">
|
||||
<button type="button" class="dr-slice is-on" data-slice="echr" aria-pressed="true">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">ECHR and Hague</span>
|
||||
<span class="dr-slice__title">ECHR</span>
|
||||
<span class="dr-slice__badge">on</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Article 8, EMD, HCCH, cross-border family</p>
|
||||
<p class="dr-slice__tagline">Art. 8 family life, Art. 9 religion, HUDOC vs Norway</p>
|
||||
</button>
|
||||
<button type="button" class="dr-slice" data-slice="hague" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">Hague Convention</span>
|
||||
<span class="dr-slice__badge">off</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">INCADAT, cross-border abduction, wrongful removal</p>
|
||||
</button>
|
||||
<button type="button" class="dr-slice" data-slice="norwegian_courts" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">Norwegian Courts</span>
|
||||
<span class="dr-slice__badge">off</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Høyesterett + Lagmannsrett family decisions</p>
|
||||
</button>
|
||||
<button type="button" class="dr-slice" data-slice="bufdir_guidance" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">Bufdir Guidance</span>
|
||||
<span class="dr-slice__badge">off</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Bufdir, Barneombudet, Statsforvalteren guidance</p>
|
||||
</button>
|
||||
<button type="button" class="dr-slice" data-slice="broader_legal" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
@@ -54,6 +75,13 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Arbeidsmiljøloven, NOUer, statutes, government background</p>
|
||||
</button>
|
||||
<button type="button" class="dr-slice" data-slice="dbn_resources" aria-pressed="false">
|
||||
<div class="dr-slice__head">
|
||||
<span class="dr-slice__title">DBN Resources</span>
|
||||
<span class="dr-slice__badge">off</span>
|
||||
</div>
|
||||
<p class="dr-slice__tagline">Do Better Norge guides, flashcards, resource directory</p>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -232,7 +232,7 @@ final class DbnDeepResearchAgent
|
||||
}
|
||||
$rawCorpusCount += count($corpusChunks);
|
||||
foreach ($corpusChunks as $chunk) {
|
||||
if ($this->isWebsiteChunk($chunk)) {
|
||||
if ($this->shouldExcludeChunk($chunk, $sliceSelectionNormalized)) {
|
||||
$filteredOutCount++;
|
||||
continue;
|
||||
}
|
||||
@@ -666,20 +666,40 @@ PROMPT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Defensive post-filter: drop any chunk that smells like a marketing-website hit
|
||||
* (dobetternorge.no marketing pages have source_group 'website-primary'/'website-beta'
|
||||
* but the chunk payload only carries `source_name` — use a name+title regex check).
|
||||
* Post-retrieval filter: drop chunks that don't belong in a family-law research pass.
|
||||
*
|
||||
* EU AI Act (source_id=7, EUR-Lex) leaks through when the Qdrant search runs
|
||||
* unconstrained (e.g. empty shared_doc_ids on slice-resolution failure). It is
|
||||
* never relevant to Norwegian family law and is always excluded.
|
||||
*
|
||||
* DBN website pages (Resource Directory, Flashcards, etc.) are indexed with
|
||||
* NULL source_id and score artificially high on broad queries. They are excluded
|
||||
* unless the dbn_resources slice is explicitly ON.
|
||||
*/
|
||||
private function isWebsiteChunk(array $chunk): bool
|
||||
private function shouldExcludeChunk(array $chunk, array $activeSlices): bool
|
||||
{
|
||||
$name = strtolower((string)($chunk['source_name'] ?? ''));
|
||||
$title = strtolower((string)($chunk['document_title'] ?? $chunk['title'] ?? ''));
|
||||
if ($name === '') return false;
|
||||
// Trusted shared-corpus packages do not contain the word 'website'. Marketing
|
||||
// sources are explicitly labelled with source_group=website-primary/beta upstream.
|
||||
if (str_contains($name, 'website')) return true;
|
||||
if (str_contains($title, 'dobetternorge.no')) return true;
|
||||
if (preg_match('/^(homepage|landing|about |contact )/i', $title)) return true;
|
||||
$url = strtolower((string)($chunk['source_url'] ?? ''));
|
||||
|
||||
// EU AI Act — never relevant to family law research
|
||||
if (preg_match('/eu\s+ai\s+act|2024[\/.]1689|regulation.*\bai\b.*act/i', $title)) return true;
|
||||
if (str_contains($url, 'eur-lex') && preg_match('/2024.1689|ai.act/i', $url)) return true;
|
||||
|
||||
// DBN website pages — allow through only when dbn_resources slice is ON
|
||||
$isDbnPage = (
|
||||
str_contains($name, 'website')
|
||||
|| str_contains($title, 'dobetternorge.no')
|
||||
|| preg_match('/^(homepage|landing|about |contact )/i', $title)
|
||||
|| str_contains($title, 'resource directory')
|
||||
|| preg_match('/^flashcards?\s*[-–|]/i', $title)
|
||||
|| preg_match('/\|\s*do better norge\s*$/i', $title)
|
||||
|| preg_match('/[-–]\s*do better norge\s*$/i', $title)
|
||||
);
|
||||
if ($isDbnPage) {
|
||||
return !($activeSlices['dbn_resources'] ?? false);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user