e977bbb6b3
8-step NDJSON-streaming pipeline that compares two Barnevernet documents: classifies each doc, extracts parties and timelines, cross-references both for contradictions/deletions/additions, retrieves corpus legal context, and synthesises a full discrepancy report with tabbed UI. New files: DiscrepancyAgent.php, api/discrepancy.php, discrepancy.php, discrepancy.js. Modified: FreeTier.php (cost=4), i18n.php (all 4 langs), tool-svgs.php (DC icon), tools.css (dc-* component styles). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1048 lines
47 KiB
PHP
1048 lines
47 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
require_once __DIR__ . '/bootstrap.php';
|
|
require_once __DIR__ . '/AzureOpenAiGateway.php';
|
|
|
|
/**
|
|
* Document Discrepancy Finder Agent
|
|
*
|
|
* 8-step pipeline comparing two Barnevernet document versions:
|
|
* 1. Classify Document A
|
|
* 2. Classify Document B
|
|
* 3. Extract parties from both documents
|
|
* 4. Build timelines from both documents
|
|
* 5. Cross-reference parties (added / removed / changed)
|
|
* 6. Cross-reference timelines (contradictions / deletions / additions)
|
|
* 7. Generate legal research sub-questions from discrepancies
|
|
* 8. Corpus retrieval + synthesis of discrepancy report
|
|
*
|
|
* Steps 1-6 always use azure_mini. Step 8 synthesis uses the user's chosen engine.
|
|
*/
|
|
final class DbnDiscrepancyAgent
|
|
{
|
|
private const MAX_DOC_CHARS = 64000;
|
|
private const POOL_CAP = 20;
|
|
|
|
private DbnAzureOpenAiGateway $azure;
|
|
private array $stepTimings = [];
|
|
|
|
public function __construct(?DbnAzureOpenAiGateway $azure = null)
|
|
{
|
|
$this->azure = $azure ?: new DbnAzureOpenAiGateway();
|
|
}
|
|
|
|
/**
|
|
* @param array $fileA {filename, text, chars, truncated}
|
|
* @param array $fileB {filename, text, chars, truncated}
|
|
* @param string $engine 'azure_mini'|'azure_full'|'gpu'
|
|
* @param string $language 'en'|'no'|'uk'|'pl'
|
|
* @param array $sliceSelection Corpus slice toggles
|
|
* @param callable|null $emit function(string $event, array $payload): void
|
|
*/
|
|
public function run(
|
|
array $fileA,
|
|
array $fileB,
|
|
string $engine,
|
|
string $language,
|
|
array $sliceSelection,
|
|
?callable $emit = null
|
|
): array {
|
|
$engine = in_array($engine, ['azure_mini', 'azure_full', 'gpu'], true) ? $engine : 'azure_mini';
|
|
$language = dbnToolsNormalizeUiLanguage($language);
|
|
|
|
$textA = mb_substr((string)($fileA['text'] ?? ''), 0, self::MAX_DOC_CHARS, 'UTF-8');
|
|
$textB = mb_substr((string)($fileB['text'] ?? ''), 0, self::MAX_DOC_CHARS, 'UTF-8');
|
|
|
|
if ($textA === '' || $textB === '') {
|
|
dbnToolsAbort('Could not extract text from one or both uploaded files.', 422, 'empty_document');
|
|
}
|
|
|
|
$nameA = (string)($fileA['filename'] ?? 'Document A');
|
|
$nameB = (string)($fileB['filename'] ?? 'Document B');
|
|
|
|
$client = dbnToolsRequireClient();
|
|
$package = $this->requireFamilyPackage((int)$client['id']);
|
|
|
|
dbnToolsBootCaveau();
|
|
$aiPortalRoot = dbnToolsAiPortalRoot();
|
|
require_once $aiPortalRoot . '/platform/includes/dbn_v6.php';
|
|
|
|
$this->stepTimings = [];
|
|
$trace = [];
|
|
|
|
$emitStep = function (string $stepId, string $label, string $detail, string $status)
|
|
use (&$trace, $emit): void {
|
|
$trace[] = ['label' => $label, 'detail' => $detail, 'status' => $status];
|
|
if ($emit) {
|
|
$emit('step', ['step' => $stepId, 'label' => $label, 'detail' => $detail, 'status' => $status]);
|
|
}
|
|
};
|
|
$emitRunning = function (string $stepId, string $label, string $detail = 'Running…') use ($emit): void {
|
|
if ($emit) {
|
|
$emit('step', ['step' => $stepId, 'label' => $label, 'detail' => $detail, 'status' => 'running']);
|
|
}
|
|
};
|
|
|
|
// ── STEP 1+2: Classify both documents ─────────────────────────────────
|
|
$emitRunning('doc_classify', 'Classify documents', "Classifying {$nameA}…");
|
|
$stepStart = microtime(true);
|
|
$metaA = $this->classifyDoc($textA, $nameA, $language);
|
|
if ($emit) $emit('doc_a_meta', ['result' => $metaA]);
|
|
if ($emit) $emit('progress', ['detail' => "Classifying {$nameB}…"]);
|
|
$metaB = $this->classifyDoc($textB, $nameB, $language);
|
|
if ($emit) $emit('doc_b_meta', ['result' => $metaB]);
|
|
$this->stepTimings['doc_classify'] = $this->elapsedMs($stepStart);
|
|
$emitStep('doc_classify', 'Classify documents',
|
|
sprintf('%s (%s) → %s (%s)',
|
|
$metaA['doc_type'] ?? 'Document A', $metaA['doc_date'] ?? '?',
|
|
$metaB['doc_type'] ?? 'Document B', $metaB['doc_date'] ?? '?'),
|
|
'complete');
|
|
|
|
// ── STEP 3: Extract parties from both documents ─────────────────────────
|
|
$emitRunning('party_extract', 'Extract parties', "Extracting parties from {$nameA}…");
|
|
$stepStart = microtime(true);
|
|
$partiesA = $this->extractPartiesDoc($textA, $nameA, $language);
|
|
if ($emit) $emit('parties_a', ['parties' => $partiesA]);
|
|
if ($emit) $emit('progress', ['detail' => "Extracting parties from {$nameB}…"]);
|
|
$partiesB = $this->extractPartiesDoc($textB, $nameB, $language);
|
|
if ($emit) $emit('parties_b', ['parties' => $partiesB]);
|
|
$this->stepTimings['party_extract'] = $this->elapsedMs($stepStart);
|
|
$emitStep('party_extract', 'Extract parties',
|
|
sprintf('%d in %s · %d in %s', count($partiesA), $nameA, count($partiesB), $nameB),
|
|
'complete');
|
|
|
|
// ── STEP 4: Build timelines from both documents ─────────────────────────
|
|
$emitRunning('timeline_extract', 'Build timelines', "Building timeline from {$nameA}…");
|
|
$stepStart = microtime(true);
|
|
$timelineA = $this->extractTimelineDoc($textA, $nameA, $language);
|
|
if ($emit) $emit('timeline_a', ['events' => $timelineA]);
|
|
if ($emit) $emit('progress', ['detail' => "Building timeline from {$nameB}…"]);
|
|
$timelineB = $this->extractTimelineDoc($textB, $nameB, $language);
|
|
if ($emit) $emit('timeline_b', ['events' => $timelineB]);
|
|
$this->stepTimings['timeline_extract'] = $this->elapsedMs($stepStart);
|
|
$emitStep('timeline_extract', 'Build timelines',
|
|
sprintf('%d events in %s · %d events in %s',
|
|
count($timelineA), $nameA, count($timelineB), $nameB),
|
|
'complete');
|
|
|
|
// ── STEP 5: Cross-reference parties ────────────────────────────────────
|
|
$emitRunning('cross_parties', 'Cross-reference parties', 'Comparing parties across both documents…');
|
|
$stepStart = microtime(true);
|
|
$partiesDiff = $this->crossReferenceParties($partiesA, $partiesB, $nameA, $nameB, $language);
|
|
if ($emit) $emit('parties_diff', ['result' => $partiesDiff]);
|
|
$this->stepTimings['cross_parties'] = $this->elapsedMs($stepStart);
|
|
$pRemoved = count($partiesDiff['in_a_only'] ?? []);
|
|
$pAdded = count($partiesDiff['in_b_only'] ?? []);
|
|
$pChanged = count($partiesDiff['changed_between'] ?? []);
|
|
$emitStep('cross_parties', 'Cross-reference parties',
|
|
sprintf('%d removed · %d added · %d changed', $pRemoved, $pAdded, $pChanged),
|
|
'complete');
|
|
|
|
// ── STEP 6: Cross-reference timelines ─────────────────────────────────
|
|
$emitRunning('cross_timelines', 'Cross-reference timelines',
|
|
'Scanning for contradictions, deletions, and new events…');
|
|
$stepStart = microtime(true);
|
|
$timelineDiff = $this->crossReferenceTimelines(
|
|
$timelineA, $timelineB, $textA, $textB, $nameA, $nameB, $language
|
|
);
|
|
if ($emit) $emit('timeline_diff', ['result' => $timelineDiff]);
|
|
$this->stepTimings['cross_timelines'] = $this->elapsedMs($stepStart);
|
|
$conflictCount = count($timelineDiff['conflicts'] ?? []);
|
|
$deletedCount = count($timelineDiff['in_a_only'] ?? []);
|
|
$addedCount = count($timelineDiff['in_b_only'] ?? []);
|
|
$emitStep('cross_timelines', 'Cross-reference timelines',
|
|
sprintf('%d contradictions · %d deleted events · %d new events',
|
|
$conflictCount, $deletedCount, $addedCount),
|
|
'complete');
|
|
|
|
// ── STEP 7: Generate research sub-questions ────────────────────────────
|
|
$emitRunning('sub_question_gen', 'Research questions',
|
|
'Generating legal research questions from discrepancies…');
|
|
$stepStart = microtime(true);
|
|
$subQuestions = $this->generateDiscrepancySubQ(
|
|
$partiesDiff, $timelineDiff, $metaA, $metaB, $language
|
|
);
|
|
$this->stepTimings['sub_question_gen'] = $this->elapsedMs($stepStart);
|
|
$emitStep('sub_question_gen', 'Research questions',
|
|
sprintf('%d legal research question(s) generated.', count($subQuestions)),
|
|
'complete');
|
|
|
|
// ── STEP 8: Corpus retrieval ────────────────────────────────────────────
|
|
$emitRunning('retrieval', 'Retrieve legal context',
|
|
sprintf('Hybrid vector + keyword search across %d question(s)…', count($subQuestions)));
|
|
$stepStart = microtime(true);
|
|
|
|
$sliceSelectionNormalized = dbnV6NormalizeSliceSelection($sliceSelection);
|
|
if (!array_filter($sliceSelectionNormalized)) {
|
|
$sliceSelectionNormalized = [
|
|
'child_welfare' => true,
|
|
'echr' => true,
|
|
'family_core' => true,
|
|
'bufdir_guidance' => true,
|
|
];
|
|
}
|
|
|
|
$ragDb = dbnToolsRagDb();
|
|
try {
|
|
$sharedDocIds = dbnV6ResolveSelectedDocIds($ragDb, $sliceSelectionNormalized);
|
|
} catch (Throwable $e) {
|
|
error_log('Discrepancy slice resolve failed: ' . $e->getMessage());
|
|
$sharedDocIds = [];
|
|
}
|
|
|
|
try {
|
|
$rag = new ClientRagPipeline((int)$client['id'], 'http://10.0.1.10:4000', 60);
|
|
} catch (Throwable $e) {
|
|
dbnToolsAbort('Could not initialise the retrieval pipeline.', 503, 'rag_init_failed');
|
|
}
|
|
|
|
$retrievalQueries = $subQuestions ?: [[
|
|
'id' => 'q1',
|
|
'question' => 'ECHR procedural requirements when Barnevernet changes facts between document versions',
|
|
'rationale' => 'Fallback query',
|
|
]];
|
|
|
|
$rawPool = [];
|
|
$retrievalWarnings = 0;
|
|
$rawCorpusCount = 0;
|
|
|
|
foreach ($retrievalQueries as $idx => $sq) {
|
|
if ($emit) {
|
|
$emit('subq', [
|
|
'index' => $idx + 1,
|
|
'total' => count($retrievalQueries),
|
|
'id' => $sq['id'],
|
|
'question' => $sq['question'],
|
|
]);
|
|
}
|
|
try {
|
|
$corpusChunks = $rag->searchAll(
|
|
$sq['question'],
|
|
6,
|
|
null,
|
|
[
|
|
'search_private' => false,
|
|
'search_shared' => true,
|
|
'package_ids' => [(int)$package['id']],
|
|
'shared_doc_ids' => $sharedDocIds,
|
|
'chunk_limit' => 6,
|
|
'search_method' => 'hybrid',
|
|
'reranker_enabled' => true,
|
|
'include_beta_website' => false,
|
|
'include_primary_website' => false,
|
|
]
|
|
);
|
|
} catch (Throwable $e) {
|
|
error_log('Discrepancy sub-Q retrieval failed: ' . $e->getMessage());
|
|
$corpusChunks = [];
|
|
$retrievalWarnings++;
|
|
}
|
|
$rawCorpusCount += count($corpusChunks);
|
|
foreach ($corpusChunks as $chunk) {
|
|
$rawPool[] = $this->normalizeCorpusChunk($chunk, $sq['id']);
|
|
}
|
|
}
|
|
|
|
$merged = $this->mergeAndDedupe($rawPool, self::POOL_CAP);
|
|
$this->hydrateSourceUrls($merged);
|
|
$numberedSources = $this->numberSources(array_slice($merged, 0, 12));
|
|
$this->stepTimings['retrieval'] = $this->elapsedMs($stepStart);
|
|
$retrievalStatus = $retrievalWarnings > 0 ? 'warning' : 'complete';
|
|
$emitStep('retrieval', 'Retrieve legal context',
|
|
sprintf('%d sub-Q(s) → %d corpus chunks → %d unique sources.',
|
|
count($retrievalQueries), $rawCorpusCount, count($numberedSources)),
|
|
$retrievalStatus);
|
|
|
|
// ── STEP 9: Synthesis ───────────────────────────────────────────────────
|
|
$engineLabel = match ($engine) {
|
|
'azure_full' => 'Azure gpt-4o',
|
|
'gpu' => 'GPU qwen2.5:14b',
|
|
default => 'Azure gpt-4o-mini',
|
|
};
|
|
$emitRunning('synthesis', 'Synthesize report',
|
|
sprintf('Synthesising discrepancy report with %s…', $engineLabel));
|
|
$stepStart = microtime(true);
|
|
$synthesis = $this->synthesize(
|
|
$metaA, $metaB, $nameA, $nameB,
|
|
$partiesDiff, $timelineDiff,
|
|
$numberedSources, $engine, $language
|
|
);
|
|
$this->stepTimings['synthesis'] = $this->elapsedMs($stepStart);
|
|
$confidence = $this->citationConfidence($numberedSources);
|
|
$emitStep('synthesis', 'Synthesize report',
|
|
sprintf('Report complete · %d source(s) · %s confidence.',
|
|
count($numberedSources), $confidence),
|
|
'complete');
|
|
|
|
$synJson = $synthesis['json'];
|
|
return [
|
|
'tool' => 'discrepancy',
|
|
'language' => $language,
|
|
'doc_a_name' => $nameA,
|
|
'doc_b_name' => $nameB,
|
|
'doc_a_meta' => $metaA,
|
|
'doc_b_meta' => $metaB,
|
|
'parties_a' => $partiesA,
|
|
'parties_b' => $partiesB,
|
|
'timeline_a' => $timelineA,
|
|
'timeline_b' => $timelineB,
|
|
'parties_diff' => $partiesDiff,
|
|
'timeline_diff' => $timelineDiff,
|
|
'headline_finding' => (string)($synJson['headline_finding'] ?? ''),
|
|
'critical_discrepancies' => is_array($synJson['critical_discrepancies'] ?? null)
|
|
? $synJson['critical_discrepancies'] : [],
|
|
'recommended_actions' => is_array($synJson['recommended_actions'] ?? null)
|
|
? $synJson['recommended_actions'] : [],
|
|
'what_remains_uncertain' => is_array($synJson['what_remains_uncertain'] ?? null)
|
|
? $synJson['what_remains_uncertain'] : [],
|
|
'sources' => $numberedSources,
|
|
'sub_questions' => $subQuestions,
|
|
'citation_confidence' => $confidence,
|
|
'trace' => $trace,
|
|
'trace_metadata' => [
|
|
'source_count' => count($numberedSources),
|
|
'sub_question_count' => count($retrievalQueries),
|
|
'conflict_count' => $conflictCount,
|
|
'deleted_count' => $deletedCount,
|
|
'added_count' => $addedCount,
|
|
'deployment' => $synthesis['deploy_label'],
|
|
'engine_used' => $engine,
|
|
'citation_confidence' => $confidence,
|
|
'elapsed_ms_per_step' => $this->stepTimings,
|
|
],
|
|
'disclaimer' => dbnToolsDisclaimer($language),
|
|
];
|
|
}
|
|
|
|
// ── Per-document classification ────────────────────────────────────────────
|
|
|
|
private function classifyDoc(string $docText, string $label, string $language): array
|
|
{
|
|
$locale = dbnToolsLanguageName($language);
|
|
$excerpt = mb_substr($docText, 0, 6000, 'UTF-8');
|
|
|
|
$prompt = <<<PROMPT
|
|
You are analysing a Norwegian child welfare (Barnevernet) document labelled "{$label}".
|
|
Extract metadata. Return JSON only in {$locale}:
|
|
{
|
|
"doc_type": "Document type, e.g. Bekymringsmelding, Vedtak, Rapport, Omsorgsovertakelse, Fylkesnemnda-kjennelse",
|
|
"doc_date": "Primary date ISO 8601 (YYYY-MM-DD) or null",
|
|
"issuing_authority": "Issuing authority name or null",
|
|
"reference_number": "Case/reference number or null",
|
|
"child_info": "Brief description of child(ren) — anonymise if redacted"
|
|
}
|
|
Use null for missing fields. Do not invent information.
|
|
|
|
Document text (first 6000 chars):
|
|
{$excerpt}
|
|
PROMPT;
|
|
|
|
$default = [
|
|
'doc_type' => $label,
|
|
'doc_date' => null,
|
|
'issuing_authority' => null,
|
|
'reference_number' => null,
|
|
'child_info' => null,
|
|
];
|
|
try {
|
|
$raw = $this->azure->chatText([
|
|
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
|
['role' => 'user', 'content' => $prompt],
|
|
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 400, 'timeout' => 30]);
|
|
$json = $this->azure->decodeJsonObject($raw);
|
|
if (is_array($json)) {
|
|
return array_merge($default, array_filter($json, fn($v) => $v !== null && $v !== ''));
|
|
}
|
|
} catch (Throwable $e) {
|
|
error_log('Discrepancy classifyDoc failed (' . $label . '): ' . $e->getMessage());
|
|
}
|
|
return $default;
|
|
}
|
|
|
|
// ── Per-document party extraction ──────────────────────────────────────────
|
|
|
|
private function extractPartiesDoc(string $docText, string $label, string $language): array
|
|
{
|
|
$locale = dbnToolsLanguageName($language);
|
|
$excerpt = mb_substr($docText, 0, 20000, 'UTF-8');
|
|
|
|
$prompt = <<<PROMPT
|
|
You are analysing a Norwegian child welfare (Barnevernet) document labelled "{$label}".
|
|
Identify ALL named parties — every person or institution referred to by name or title.
|
|
|
|
Respond in {$locale}. Return JSON with key "parties" containing an array. Each object:
|
|
- "name": full name or institution name
|
|
- "role": e.g. Biological mother, Caseworker, Leder, Barnevernvakta, Politi, Sakkyndig, Talsperson
|
|
- "organization": employer/institution or null
|
|
- "relationship_to_child": relationship to the child or null
|
|
|
|
Rules: Include all named people and institutions. Maximum 20 parties.
|
|
|
|
Document text:
|
|
{$excerpt}
|
|
PROMPT;
|
|
|
|
try {
|
|
$raw = $this->azure->chatText([
|
|
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
|
['role' => 'user', 'content' => $prompt],
|
|
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 2000, 'timeout' => 45]);
|
|
$json = $this->azure->decodeJsonObject($raw);
|
|
if (is_array($json) && is_array($json['parties'] ?? null)) {
|
|
return array_slice($json['parties'], 0, 20);
|
|
}
|
|
if (is_array($json) && isset($json[0]['name'])) {
|
|
return array_slice($json, 0, 20);
|
|
}
|
|
} catch (Throwable $e) {
|
|
error_log('Discrepancy extractPartiesDoc failed (' . $label . '): ' . $e->getMessage());
|
|
}
|
|
return [];
|
|
}
|
|
|
|
// ── Per-document timeline extraction ───────────────────────────────────────
|
|
|
|
private function extractTimelineDoc(string $docText, string $label, string $language): array
|
|
{
|
|
$locale = dbnToolsLanguageName($language);
|
|
$excerpt = mb_substr($docText, 0, 20000, 'UTF-8');
|
|
|
|
$prompt = <<<PROMPT
|
|
Build a chronological timeline from this Norwegian Barnevernet document labelled "{$label}" in {$locale}.
|
|
|
|
Extract ALL dates and temporal references — visits, meetings, decisions, phone calls, assessments.
|
|
|
|
Norwegian date formats to recognise:
|
|
- DD.MM.YYYY, DD.MM.YY, D.M.YY, DD.MM. (infer year from context)
|
|
- Times: kl. HH:MM, klokken HH:MM
|
|
- Two-digit years: 20YY
|
|
|
|
Barnevernet events of HIGH significance:
|
|
- Akuttvedtak (§4-6, §4-25), Omsorgsovertakelse (§4-12), police involvement
|
|
- Formal vedtak or kjennelse, Fylkesnemnda hearing, Forhandlingsmøte
|
|
- Contact (samvær) reduced or denied, foster/institution placement
|
|
- Deadline breaches (§4-2 not processed within 7 days, investigation not opened within 6 weeks)
|
|
|
|
For each event:
|
|
- "date": ISO 8601 if determinable, else best-effort description
|
|
- "time_of_day": HH:MM or null
|
|
- "actor": person/institution involved
|
|
- "action": ≤ 80 chars describing what happened
|
|
- "significance": "high"|"medium"|"low"
|
|
|
|
Sort chronologically. Maximum 40 events. Return JSON: {"events":[...]}
|
|
|
|
Document text:
|
|
{$excerpt}
|
|
PROMPT;
|
|
|
|
try {
|
|
$raw = $this->azure->chatText([
|
|
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
|
['role' => 'user', 'content' => $prompt],
|
|
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 4000, 'timeout' => 55]);
|
|
$json = $this->azure->decodeJsonObject($raw);
|
|
if (is_array($json) && is_array($json['events'] ?? null)) {
|
|
return array_slice($json['events'], 0, 40);
|
|
}
|
|
} catch (Throwable $e) {
|
|
error_log('Discrepancy extractTimelineDoc failed (' . $label . '): ' . $e->getMessage());
|
|
}
|
|
return [];
|
|
}
|
|
|
|
// ── Cross-reference: parties ───────────────────────────────────────────────
|
|
|
|
private function crossReferenceParties(
|
|
array $partiesA,
|
|
array $partiesB,
|
|
string $nameA,
|
|
string $nameB,
|
|
string $language
|
|
): array {
|
|
$locale = dbnToolsLanguageName($language);
|
|
$partiesAJson = json_encode($partiesA, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
|
|
$partiesBJson = json_encode($partiesB, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
|
|
|
|
$prompt = <<<PROMPT
|
|
You are comparing parties across two versions of Norwegian Barnevernet documents.
|
|
|
|
Document A ({$nameA}) parties:
|
|
{$partiesAJson}
|
|
|
|
Document B ({$nameB}) parties:
|
|
{$partiesBJson}
|
|
|
|
Compare the two party lists and find:
|
|
1. Parties in A but absent from B — people/institutions removed from the later version
|
|
2. New parties in B not in A — new people/institutions introduced in the later version
|
|
3. The same person appearing in both but with a changed role, description, or relationship
|
|
|
|
For each entry explain the potential legal significance in a Barnevernet case context.
|
|
|
|
Return JSON only in {$locale}:
|
|
{
|
|
"in_a_only": [
|
|
{"name":"...","role_in_a":"...","significance":"One sentence why their removal may matter (≤ 130 chars)"}
|
|
],
|
|
"in_b_only": [
|
|
{"name":"...","role_in_b":"...","significance":"One sentence why their addition may matter (≤ 130 chars)"}
|
|
],
|
|
"changed_between": [
|
|
{"name":"...","in_a":"Role/details in A","in_b":"Role/details in B","significance":"One sentence on the change (≤ 130 chars)"}
|
|
]
|
|
}
|
|
|
|
Rules:
|
|
- Only flag genuine discrepancies. Match the same person with minor name spelling variations.
|
|
- Do not invent parties not present in the data above.
|
|
- If no discrepancies of a type exist, return an empty array.
|
|
PROMPT;
|
|
|
|
$default = ['in_a_only' => [], 'in_b_only' => [], 'changed_between' => []];
|
|
try {
|
|
$raw = $this->azure->chatText([
|
|
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
|
['role' => 'user', 'content' => $prompt],
|
|
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 2000, 'timeout' => 50]);
|
|
$json = $this->azure->decodeJsonObject($raw);
|
|
if (is_array($json)) {
|
|
return array_merge($default, array_intersect_key($json, $default));
|
|
}
|
|
} catch (Throwable $e) {
|
|
error_log('Discrepancy crossReferenceParties failed: ' . $e->getMessage());
|
|
}
|
|
return $default;
|
|
}
|
|
|
|
// ── Cross-reference: timelines ─────────────────────────────────────────────
|
|
|
|
private function crossReferenceTimelines(
|
|
array $timelineA,
|
|
array $timelineB,
|
|
string $textA,
|
|
string $textB,
|
|
string $nameA,
|
|
string $nameB,
|
|
string $language
|
|
): array {
|
|
$locale = dbnToolsLanguageName($language);
|
|
$tlAJson = json_encode(array_slice($timelineA, 0, 30), JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
|
|
$tlBJson = json_encode(array_slice($timelineB, 0, 30), JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
|
|
$excerptA = mb_substr($textA, 0, 3000, 'UTF-8');
|
|
$excerptB = mb_substr($textB, 0, 3000, 'UTF-8');
|
|
|
|
$prompt = <<<PROMPT
|
|
You are comparing timelines from two versions of Norwegian Barnevernet documents to find legally significant discrepancies.
|
|
|
|
Document A ({$nameA}) timeline:
|
|
{$tlAJson}
|
|
|
|
Document B ({$nameB}) timeline:
|
|
{$tlBJson}
|
|
|
|
Source excerpt from Document A:
|
|
{$excerptA}
|
|
|
|
Source excerpt from Document B:
|
|
{$excerptB}
|
|
|
|
Find all discrepancies:
|
|
1. CONTRADICTIONS — same date/event described differently between A and B
|
|
2. DELETIONS — events in A that are absent or missing from B (removed facts)
|
|
3. ADDITIONS — events in B not present in A (new allegations or narrative elements)
|
|
4. DATE SHIFTS — same event but with a different date in A vs B
|
|
5. PROCEDURAL GAPS — actions referenced but not documented in either version
|
|
|
|
Also identify overall NARRATIVE SHIFTS — how the framing changed between A and B.
|
|
|
|
For significance: "high" (changes facts central to the decision), "medium" (changes context or procedure), "low" (minor wording).
|
|
|
|
Return JSON only in {$locale}:
|
|
{
|
|
"conflicts": [
|
|
{
|
|
"date_a": "YYYY-MM-DD or description or null",
|
|
"date_b": "YYYY-MM-DD or description or null",
|
|
"doc_a_says": "What Document A says about this event",
|
|
"doc_b_says": "What Document B says about this event",
|
|
"conflict_type": "contradiction|deletion|addition|date_shift",
|
|
"significance": "high|medium|low",
|
|
"legal_significance": "One sentence why this matters legally (≤ 150 chars)"
|
|
}
|
|
],
|
|
"in_a_only": [
|
|
{
|
|
"date": "...",
|
|
"actor": "...",
|
|
"description": "Event in A not present in B",
|
|
"significance": "high|medium|low",
|
|
"legal_significance": "..."
|
|
}
|
|
],
|
|
"in_b_only": [
|
|
{
|
|
"date": "...",
|
|
"actor": "...",
|
|
"description": "New event in B not present in A",
|
|
"significance": "high|medium|low",
|
|
"legal_significance": "..."
|
|
}
|
|
],
|
|
"procedural_gaps": [
|
|
{"gap": "Description of the gap", "significance": "high|medium|low"}
|
|
],
|
|
"narrative_shifts": {
|
|
"summary": "1-2 sentence description of how the overall narrative changed between A and B",
|
|
"new_in_b": ["Key new allegation or narrative element added in B"],
|
|
"removed_from_b": ["Key fact or narrative element present in A but absent in B"]
|
|
}
|
|
}
|
|
|
|
Rules:
|
|
- Only report genuine discrepancies grounded in the data above. Do not invent events.
|
|
- If no discrepancies of a type exist, return an empty array.
|
|
- Maximum 15 conflicts, 10 in_a_only, 10 in_b_only.
|
|
PROMPT;
|
|
|
|
$default = [
|
|
'conflicts' => [],
|
|
'in_a_only' => [],
|
|
'in_b_only' => [],
|
|
'procedural_gaps' => [],
|
|
'narrative_shifts' => ['summary' => '', 'new_in_b' => [], 'removed_from_b' => []],
|
|
];
|
|
try {
|
|
$raw = $this->azure->chatText([
|
|
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
|
['role' => 'user', 'content' => $prompt],
|
|
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 4000, 'timeout' => 90]);
|
|
$json = $this->azure->decodeJsonObject($raw);
|
|
if (is_array($json)) {
|
|
return array_merge($default, array_intersect_key($json, $default));
|
|
}
|
|
} catch (Throwable $e) {
|
|
error_log('Discrepancy crossReferenceTimelines failed: ' . $e->getMessage());
|
|
}
|
|
return $default;
|
|
}
|
|
|
|
// ── Sub-question generation ────────────────────────────────────────────────
|
|
|
|
private function generateDiscrepancySubQ(
|
|
array $partiesDiff,
|
|
array $timelineDiff,
|
|
array $metaA,
|
|
array $metaB,
|
|
string $language
|
|
): array {
|
|
$locale = dbnToolsLanguageName($language);
|
|
|
|
$parts = [];
|
|
$pRemove = count($partiesDiff['in_a_only'] ?? []);
|
|
$pAdd = count($partiesDiff['in_b_only'] ?? []);
|
|
$pChange = count($partiesDiff['changed_between'] ?? []);
|
|
if ($pRemove) $parts[] = "{$pRemove} parties removed between versions";
|
|
if ($pAdd) $parts[] = "{$pAdd} new parties added in later version";
|
|
if ($pChange) $parts[] = "{$pChange} parties changed between versions";
|
|
|
|
$conflicts = $timelineDiff['conflicts'] ?? [];
|
|
$deleted = $timelineDiff['in_a_only'] ?? [];
|
|
$added = $timelineDiff['in_b_only'] ?? [];
|
|
$procGaps = $timelineDiff['procedural_gaps'] ?? [];
|
|
if ($conflicts) $parts[] = count($conflicts) . ' timeline contradictions';
|
|
if ($deleted) $parts[] = count($deleted) . ' events deleted from later version';
|
|
if ($added) $parts[] = count($added) . ' new events added in later version';
|
|
if ($procGaps) $parts[] = count($procGaps) . ' procedural gaps identified';
|
|
|
|
$summary = $parts ? implode(', ', $parts) . '.' : 'Some discrepancies found.';
|
|
$docTypeA = $metaA['doc_type'] ?? 'Document A';
|
|
$docTypeB = $metaB['doc_type'] ?? 'Document B';
|
|
$authA = $metaA['issuing_authority'] ?? 'the municipality';
|
|
|
|
$exampleFacts = '';
|
|
if (!empty($conflicts[0])) {
|
|
$c = $conflicts[0];
|
|
$exampleFacts .= "- Contradiction: A says '{$c['doc_a_says']}', B says '{$c['doc_b_says']}'\n";
|
|
}
|
|
if (!empty($deleted[0])) {
|
|
$exampleFacts .= "- Deleted from B: '{$deleted[0]['description']}'\n";
|
|
}
|
|
if (!empty($added[0])) {
|
|
$exampleFacts .= "- New in B: '{$added[0]['description']}'\n";
|
|
}
|
|
if (!empty($procGaps[0])) {
|
|
$exampleFacts .= "- Procedural gap: '{$procGaps[0]['gap']}'\n";
|
|
}
|
|
if (!empty(($partiesDiff['changed_between'] ?? [])[0])) {
|
|
$pc = $partiesDiff['changed_between'][0];
|
|
$exampleFacts .= "- Party change: {$pc['name']}: '{$pc['in_a']}' → '{$pc['in_b']}'\n";
|
|
}
|
|
|
|
$prompt = <<<PROMPT
|
|
A family uploaded two Barnevernet documents for comparison:
|
|
- Document A: {$docTypeA} from {$authA}
|
|
- Document B: {$docTypeB}
|
|
- Discrepancies found: {$summary}
|
|
|
|
Most significant examples:
|
|
{$exampleFacts}
|
|
|
|
Generate exactly 4 specific legal research questions targeting the legal significance of these discrepancies.
|
|
|
|
Focus areas:
|
|
1. ECHR Article 8 procedural fairness when Barnevernet changes factual narrative between document versions
|
|
2. Barnevernloven requirements for changing the stated basis for an intervention
|
|
3. Procedural obligations when new allegations are introduced after initial filing
|
|
4. Documentation and evidence standards (Bufdir/Statsforvalter guidance)
|
|
|
|
Make each question specific to the discrepancies above — embed actual details.
|
|
|
|
Return JSON only in {$locale}:
|
|
{
|
|
"sub_questions": [
|
|
{"id":"q1","question":"...","rationale":"Why this angle matters (≤ 100 chars)"}
|
|
]
|
|
}
|
|
PROMPT;
|
|
|
|
try {
|
|
$raw = $this->azure->chatText([
|
|
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
|
['role' => 'user', 'content' => $prompt],
|
|
], ['json' => true, 'temperature' => 0.15, 'max_tokens' => 1000, 'timeout' => 40]);
|
|
$json = $this->azure->decodeJsonObject($raw);
|
|
if (is_array($json) && is_array($json['sub_questions'] ?? null)) {
|
|
$sqs = [];
|
|
foreach (array_slice($json['sub_questions'], 0, 5) as $sq) {
|
|
if (!empty($sq['id']) && !empty($sq['question'])) {
|
|
$sqs[] = [
|
|
'id' => (string)$sq['id'],
|
|
'question' => (string)$sq['question'],
|
|
'rationale' => (string)($sq['rationale'] ?? ''),
|
|
];
|
|
}
|
|
}
|
|
if ($sqs) return $sqs;
|
|
}
|
|
} catch (Throwable $e) {
|
|
error_log('Discrepancy generateDiscrepancySubQ failed: ' . $e->getMessage());
|
|
}
|
|
|
|
return [
|
|
['id' => 'q1', 'question' => 'What does ECHR Article 8 require when Barnevernet changes the factual basis of an intervention between document versions?', 'rationale' => 'ECHR procedural fairness'],
|
|
['id' => 'q2', 'question' => 'Under Barnevernloven, can new allegations be introduced after the initial care order application has been filed?', 'rationale' => 'New allegations validity'],
|
|
['id' => 'q3', 'question' => 'What are Barnevernloven documentation requirements for home visits and assessments?', 'rationale' => 'Documentation obligations'],
|
|
['id' => 'q4', 'question' => 'What Bufdir guidance exists on evidence standards and investigation quality for Barnevernet interventions?', 'rationale' => 'Evidence standards'],
|
|
];
|
|
}
|
|
|
|
// ── Synthesis ──────────────────────────────────────────────────────────────
|
|
|
|
private function synthesize(
|
|
array $metaA,
|
|
array $metaB,
|
|
string $nameA,
|
|
string $nameB,
|
|
array $partiesDiff,
|
|
array $timelineDiff,
|
|
array $numberedSources,
|
|
string $engine,
|
|
string $language
|
|
): array {
|
|
$locale = dbnToolsLanguageName($language);
|
|
$sourceCount = count($numberedSources);
|
|
$deployLabel = match ($engine) {
|
|
'gpu' => 'GPU (cuttlefish)',
|
|
'azure_full' => 'gpt-4o',
|
|
default => $this->azure->chatDeployment(),
|
|
};
|
|
|
|
if (empty($numberedSources)) {
|
|
return [
|
|
'json' => [
|
|
'headline_finding' => 'No corpus sources retrieved. Discrepancies were identified but could not be cross-referenced with the legal corpus for legal significance assessment.',
|
|
'critical_discrepancies' => [],
|
|
'recommended_actions' => ['Enable corpus slices (Child Welfare, ECHR, Family Core, Bufdir Guidance) and re-run for legal significance mapping.'],
|
|
'what_remains_uncertain' => ['Legal significance of each discrepancy — re-run with corpus slices enabled.'],
|
|
],
|
|
'deploy_label' => $deployLabel,
|
|
];
|
|
}
|
|
|
|
$sourcesContext = [];
|
|
foreach ($numberedSources as $s) {
|
|
$sourcesContext[] = sprintf(
|
|
"[%d] %s%s\n Corpus: %s | Authority: %s\n Excerpt: %s",
|
|
$s['n'],
|
|
$s['title'],
|
|
!empty($s['section']) ? ' — ' . $s['section'] : '',
|
|
$s['package_or_corpus'],
|
|
$s['authority_label'] ?? ($s['authority_type'] ?? 'n/a'),
|
|
$s['excerpt']
|
|
);
|
|
}
|
|
$sourcesText = implode("\n\n", $sourcesContext);
|
|
|
|
$discrepancyJson = json_encode([
|
|
'timeline_conflicts' => array_slice($timelineDiff['conflicts'] ?? [], 0, 10),
|
|
'events_deleted_from_b' => array_slice($timelineDiff['in_a_only'] ?? [], 0, 8),
|
|
'events_added_in_b' => array_slice($timelineDiff['in_b_only'] ?? [], 0, 8),
|
|
'procedural_gaps' => array_slice($timelineDiff['procedural_gaps'] ?? [], 0, 5),
|
|
'narrative_shifts' => $timelineDiff['narrative_shifts'] ?? [],
|
|
'parties_removed' => $partiesDiff['in_a_only'] ?? [],
|
|
'parties_added' => $partiesDiff['in_b_only'] ?? [],
|
|
'parties_changed' => $partiesDiff['changed_between'] ?? [],
|
|
], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
|
|
|
|
$docTypeA = $metaA['doc_type'] ?? $nameA;
|
|
$docDateA = $metaA['doc_date'] ?? '?';
|
|
$docTypeB = $metaB['doc_type'] ?? $nameB;
|
|
$docDateB = $metaB['doc_date'] ?? '?';
|
|
$authority = $metaA['issuing_authority'] ?? $metaB['issuing_authority'] ?? 'the authority';
|
|
|
|
$prompt = <<<PROMPT
|
|
You are Do Better Norge Legal Tools evaluating discrepancies between two Barnevernet document versions.
|
|
|
|
HALLUCINATION RULES:
|
|
- Only cite statute sections (§), ECHR articles, and case names that appear verbatim in the corpus sources below.
|
|
- Do not cite from training memory. Every legal citation must use [n] notation.
|
|
|
|
== DOCUMENTS ==
|
|
Document A: {$docTypeA} · {$docDateA} · {$authority}
|
|
Document B: {$docTypeB} · {$docDateB}
|
|
|
|
== DISCREPANCIES IDENTIFIED ==
|
|
{$discrepancyJson}
|
|
|
|
== CORPUS SOURCES ({$sourceCount} numbered — cite as [n]) ==
|
|
{$sourcesText}
|
|
|
|
== OUTPUT ==
|
|
Return valid JSON only. No markdown fences.
|
|
|
|
{
|
|
"headline_finding": "2-3 sentence plain-language summary of the most significant discrepancy and its legal implication.",
|
|
|
|
"critical_discrepancies": [
|
|
{
|
|
"category": "timeline_conflict|narrative_shift|party_discrepancy|procedural_gap",
|
|
"title": "Short title ≤ 60 chars",
|
|
"document_a_says": "What Document A says",
|
|
"document_b_says": "What Document B says or what is missing",
|
|
"significance": "high|medium|low",
|
|
"legal_relevance": "How this may affect the case — cite [n] if corpus supports",
|
|
"citations": ["[1]", "[3]"]
|
|
}
|
|
],
|
|
|
|
"recommended_actions": [
|
|
"2-5 specific concrete actions for the family or their lawyer"
|
|
],
|
|
|
|
"what_remains_uncertain": [
|
|
"2-4 specific questions needing legal professional verification"
|
|
]
|
|
}
|
|
|
|
Rules:
|
|
- critical_discrepancies: max 10 items, ordered high → low significance.
|
|
- Only include genuine discrepancies from the data provided.
|
|
- High-significance items must cite at least one [n] if corpus evidence exists.
|
|
- recommended_actions must be concrete, not generic.
|
|
- Respond in {$locale}.
|
|
PROMPT;
|
|
|
|
$sysPrompt = 'You return valid JSON only. No markdown fences. Only cite legal sources from the provided corpus, not training memory.';
|
|
$messages = [
|
|
['role' => 'system', 'content' => $sysPrompt],
|
|
['role' => 'user', 'content' => $prompt],
|
|
];
|
|
$opts = ['json' => true, 'temperature' => 0.15, 'max_tokens' => 4000, 'timeout' => 240];
|
|
|
|
$raw = '';
|
|
try {
|
|
if ($engine === 'gpu') {
|
|
$response = dbnToolsCallGpuLlm($messages, $opts);
|
|
$raw = (string)($response['choices'][0]['message']['content'] ?? '');
|
|
} elseif ($engine === 'azure_full') {
|
|
$raw = $this->azure->withDeployment('gpt-4o')->chatText($messages, $opts);
|
|
} else {
|
|
$raw = $this->azure->chatText($messages, $opts);
|
|
}
|
|
} catch (Throwable $e) {
|
|
dbnToolsAbort('Synthesis LLM request failed: ' . $e->getMessage(), 502, 'llm_error');
|
|
}
|
|
|
|
$json = $this->azure->decodeJsonObject($raw);
|
|
if (!is_array($json) || empty($json['headline_finding'])) {
|
|
$json = [
|
|
'headline_finding' => $raw,
|
|
'critical_discrepancies' => [],
|
|
'recommended_actions' => [],
|
|
'what_remains_uncertain' => [],
|
|
];
|
|
}
|
|
return ['json' => $json, 'deploy_label' => $deployLabel];
|
|
}
|
|
|
|
// ── Corpus helpers ─────────────────────────────────────────────────────────
|
|
|
|
private function normalizeCorpusChunk(array $chunk, string $subQId): array
|
|
{
|
|
return [
|
|
'chunk_id' => isset($chunk['id']) ? (int)$chunk['id'] : null,
|
|
'title' => (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source'),
|
|
'section' => $chunk['section_title'] ?? null,
|
|
'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Legal'),
|
|
'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620),
|
|
'chunk_text' => (string)($chunk['content'] ?? ''),
|
|
'similarity' => isset($chunk['similarity']) ? round((float)$chunk['similarity'], 4) : null,
|
|
'reranker_score' => isset($chunk['reranker_score']) ? round((float)$chunk['reranker_score'], 4) : null,
|
|
'document_id' => isset($chunk['document_id']) ? (int)$chunk['document_id'] : null,
|
|
'source_origin' => 'corpus',
|
|
'authority_type' => $chunk['authority_type'] ?? null,
|
|
'jurisdiction' => $chunk['jurisdiction'] ?? null,
|
|
'source_url' => null,
|
|
'deep_link' => null,
|
|
'authority_label' => null,
|
|
'matched_sub_questions' => [$subQId],
|
|
];
|
|
}
|
|
|
|
private function mergeAndDedupe(array $rawPool, int $cap): array
|
|
{
|
|
$byKey = [];
|
|
foreach ($rawPool as $chunk) {
|
|
$key = 'corpus:' . ($chunk['chunk_id'] ?? bin2hex(random_bytes(4)));
|
|
if (!isset($byKey[$key])) {
|
|
$byKey[$key] = $chunk;
|
|
continue;
|
|
}
|
|
$existing = $byKey[$key];
|
|
$existing['matched_sub_questions'] = array_values(array_unique(array_merge(
|
|
$existing['matched_sub_questions'] ?? [],
|
|
$chunk['matched_sub_questions'] ?? []
|
|
)));
|
|
if (($chunk['reranker_score'] ?? 0) > ($existing['reranker_score'] ?? 0)) {
|
|
$existing['reranker_score'] = $chunk['reranker_score'];
|
|
}
|
|
if (($chunk['similarity'] ?? 0) > ($existing['similarity'] ?? 0)) {
|
|
$existing['similarity'] = $chunk['similarity'];
|
|
}
|
|
$byKey[$key] = $existing;
|
|
}
|
|
$merged = array_values($byKey);
|
|
usort($merged, function (array $a, array $b): int {
|
|
$aScore = $a['reranker_score'] ?? $a['similarity'] ?? 0;
|
|
$bScore = $b['reranker_score'] ?? $b['similarity'] ?? 0;
|
|
return $bScore <=> $aScore;
|
|
});
|
|
return array_slice($merged, 0, $cap);
|
|
}
|
|
|
|
private function numberSources(array $chunks): array
|
|
{
|
|
$out = [];
|
|
foreach ($chunks as $i => $c) {
|
|
$c['n'] = $i + 1;
|
|
$out[] = $c;
|
|
}
|
|
return $out;
|
|
}
|
|
|
|
private function citationConfidence(array $sources): string
|
|
{
|
|
if (!$sources) return 'low';
|
|
$scores = array_values(array_filter(array_map(
|
|
fn(array $s) => $s['reranker_score'] ?? $s['similarity'] ?? null,
|
|
$sources
|
|
), 'is_numeric'));
|
|
$best = $scores ? max($scores) : 0;
|
|
if (count($sources) >= 5 && $best >= 0.5) return 'high';
|
|
if (count($sources) >= 3 && $best >= 0.35) return 'medium';
|
|
return 'low';
|
|
}
|
|
|
|
private function hydrateSourceUrls(array &$pool): void
|
|
{
|
|
$docIds = [];
|
|
foreach ($pool as $chunk) {
|
|
$docId = (int)($chunk['document_id'] ?? 0);
|
|
if ($docId > 0) $docIds[$docId] = true;
|
|
}
|
|
if (empty($docIds)) return;
|
|
try {
|
|
$ragDb = dbnToolsRagDb();
|
|
$ids = array_keys($docIds);
|
|
$ph = implode(',', array_fill(0, count($ids), '?'));
|
|
$stmt = $ragDb->prepare(
|
|
"SELECT d.id, d.source_url, d.authority_type, d.publication_date, d.source_id, d.title
|
|
FROM documents d WHERE d.id IN ({$ph})"
|
|
);
|
|
$stmt->execute($ids);
|
|
$docMeta = [];
|
|
$sourceIds = [];
|
|
foreach ($stmt as $row) {
|
|
$dId = (int)$row['id'];
|
|
$sid = isset($row['source_id']) ? (int)$row['source_id'] : null;
|
|
if ($sid) $sourceIds[] = $sid;
|
|
$docMeta[$dId] = [
|
|
'source_url' => $row['source_url'] ?? null,
|
|
'authority_label' => dbnV6AuthorityLabel($row['authority_type'] ?? null),
|
|
'publication_date' => $row['publication_date'] ?? null,
|
|
'source_id' => $sid,
|
|
];
|
|
}
|
|
if ($sourceIds) {
|
|
$uSids = array_values(array_unique($sourceIds));
|
|
$sPh = implode(',', array_fill(0, count($uSids), '?'));
|
|
$sStmt = dbnToolsDb()->prepare("SELECT id, name FROM corpus_sources WHERE id IN ({$sPh})");
|
|
$sStmt->execute($uSids);
|
|
$srcNames = [];
|
|
foreach ($sStmt as $row) {
|
|
$srcNames[(int)$row['id']] = dbnV6RepairText((string)($row['name'] ?? 'Do Better Legal'));
|
|
}
|
|
foreach ($docMeta as &$m) {
|
|
if ($m['source_id'] && isset($srcNames[$m['source_id']])) {
|
|
$m['corpus_source_name'] = $srcNames[$m['source_id']];
|
|
}
|
|
}
|
|
unset($m);
|
|
}
|
|
} catch (Throwable $e) {
|
|
error_log('Discrepancy hydrateSourceUrls failed: ' . $e->getMessage());
|
|
return;
|
|
}
|
|
foreach ($pool as &$chunk) {
|
|
$docId = (int)($chunk['document_id'] ?? 0);
|
|
if (!$docId || !isset($docMeta[$docId])) continue;
|
|
$m = $docMeta[$docId];
|
|
$chunk['source_url'] = $m['source_url'] ?? null;
|
|
$chunk['deep_link'] = $m['source_url'] ?? null;
|
|
$chunk['authority_label'] = $m['authority_label'] ?? $chunk['authority_label'];
|
|
$chunk['corpus_source_name'] = $m['corpus_source_name'] ?? null;
|
|
$chunk['publication_date'] = $m['publication_date'] ?? null;
|
|
}
|
|
unset($chunk);
|
|
}
|
|
|
|
private function requireFamilyPackage(int $clientId): array
|
|
{
|
|
$package = dbnToolsFetchPackage('family-legal');
|
|
if (!$package || empty($package['is_active'])) {
|
|
dbnToolsAbort('The family-legal corpus package is not active.', 503, 'package_unavailable');
|
|
}
|
|
if (!dbnToolsHasActiveSubscription($clientId, (int)$package['id'])) {
|
|
dbnToolsAbort('Do Better Norge does not have an active family-legal subscription.', 503, 'subscription_missing');
|
|
}
|
|
return $package;
|
|
}
|
|
|
|
private function elapsedMs(float $start): int
|
|
{
|
|
return (int)round((microtime(true) - $start) * 1000);
|
|
}
|
|
}
|