Files
dobetternorge-tools/api/discrepancy.php
T
daveadmin e977bbb6b3 Add Document Discrepancy Finder tool
8-step NDJSON-streaming pipeline that compares two Barnevernet documents:
classifies each doc, extracts parties and timelines, cross-references both
for contradictions/deletions/additions, retrieves corpus legal context, and
synthesises a full discrepancy report with tabbed UI.

New files: DiscrepancyAgent.php, api/discrepancy.php, discrepancy.php,
discrepancy.js. Modified: FreeTier.php (cost=4), i18n.php (all 4 langs),
tool-svgs.php (DC icon), tools.css (dc-* component styles).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 19:30:38 +02:00

162 lines
6.1 KiB
PHP

<?php
declare(strict_types=1);
require_once __DIR__ . '/../includes/bootstrap.php';
require_once __DIR__ . '/../includes/DiscrepancyAgent.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
$ftUid = dbnToolsFreeTierCheck('discrepancy');
$ftRemaining = dbnToolsFreeTierDeduct($ftUid, 'discrepancy');
@ini_set('output_buffering', '0');
@ini_set('zlib.output_compression', '0');
@ini_set('implicit_flush', '1');
while (ob_get_level() > 0) { @ob_end_clean(); }
ob_implicit_flush(true);
header('Content-Type: application/x-ndjson; charset=utf-8');
header('Cache-Control: no-store');
header('X-Accel-Buffering: no');
if ($ftRemaining >= 0) { header('X-Credits-Remaining: ' . $ftRemaining); }
$language = 'en';
$startTime = microtime(true);
$emit = function (string $event, array $payload = []) use ($startTime): void {
$payload['event'] = $event;
$payload['t_ms'] = (int)round((microtime(true) - $startTime) * 1000);
echo json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) . "\n";
@flush();
};
try {
// Parse payload (always multipart — two files required)
$payloadRaw = (string)($_POST['payload'] ?? '');
if ($payloadRaw === '') {
throw new DbnToolsHttpException('Missing payload field.', 422, 'missing_payload');
}
$input = json_decode($payloadRaw, true);
if (!is_array($input)) {
throw new DbnToolsHttpException('Invalid payload JSON.', 422, 'invalid_payload_json');
}
$language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
$engine = (string)($input['engine'] ?? 'azure_mini');
$sliceInput = $input['slices'] ?? [];
// Extract file A
$emit('progress', ['detail' => 'Reading Document A…']);
$fileEntryA = $_FILES['file_a'] ?? null;
if (!$fileEntryA || ($fileEntryA['error'] ?? UPLOAD_ERR_NO_FILE) !== UPLOAD_ERR_OK) {
throw new DbnToolsHttpException(
'Document A is required. Upload a PDF, DOCX, or TXT file.',
422, 'missing_file_a'
);
}
$extractedA = dbnToolsExtractUploadedFile([
'name' => $fileEntryA['name'] ?? '',
'type' => $fileEntryA['type'] ?? '',
'tmp_name' => $fileEntryA['tmp_name'] ?? '',
'error' => $fileEntryA['error'] ?? UPLOAD_ERR_NO_FILE,
'size' => $fileEntryA['size'] ?? 0,
]);
$fileA = [
'filename' => $extractedA['filename'],
'text' => $extractedA['text'],
'chars' => $extractedA['chars'],
'truncated' => $extractedA['truncated'],
];
$emit('progress', ['detail' => sprintf('Document A extracted: %s (%d chars%s)',
$extractedA['filename'], $extractedA['chars'],
!empty($extractedA['truncated']) ? ', truncated' : '')]);
// Extract file B
$emit('progress', ['detail' => 'Reading Document B…']);
$fileEntryB = $_FILES['file_b'] ?? null;
if (!$fileEntryB || ($fileEntryB['error'] ?? UPLOAD_ERR_NO_FILE) !== UPLOAD_ERR_OK) {
throw new DbnToolsHttpException(
'Document B is required. Upload a PDF, DOCX, or TXT file.',
422, 'missing_file_b'
);
}
$extractedB = dbnToolsExtractUploadedFile([
'name' => $fileEntryB['name'] ?? '',
'type' => $fileEntryB['type'] ?? '',
'tmp_name' => $fileEntryB['tmp_name'] ?? '',
'error' => $fileEntryB['error'] ?? UPLOAD_ERR_NO_FILE,
'size' => $fileEntryB['size'] ?? 0,
]);
$fileB = [
'filename' => $extractedB['filename'],
'text' => $extractedB['text'],
'chars' => $extractedB['chars'],
'truncated' => $extractedB['truncated'],
];
$emit('progress', ['detail' => sprintf('Document B extracted: %s (%d chars%s)',
$extractedB['filename'], $extractedB['chars'],
!empty($extractedB['truncated']) ? ', truncated' : '')]);
if (($fileA['text'] ?? '') === '') {
throw new DbnToolsHttpException('Could not extract text from Document A.', 422, 'empty_file_a');
}
if (($fileB['text'] ?? '') === '') {
throw new DbnToolsHttpException('Could not extract text from Document B.', 422, 'empty_file_b');
}
$emit('start', [
'engine' => $engine,
'language' => $language,
'file_a' => $fileA['filename'],
'file_b' => $fileB['filename'],
]);
$result = (new DbnDiscrepancyAgent())->run(
$fileA,
$fileB,
$engine,
$language,
is_array($sliceInput) ? $sliceInput : [],
$emit
);
$result['ok'] = true;
$result['latency_ms'] = (int)round((microtime(true) - $startTime) * 1000);
dbnToolsLogMetadata([
'tool' => 'discrepancy',
'language' => $language,
'ok' => true,
'latency_ms' => $result['latency_ms'],
'source_count' => (int)($result['trace_metadata']['source_count'] ?? 0),
'conflict_count' => (int)($result['trace_metadata']['conflict_count'] ?? 0),
'deleted_count' => (int)($result['trace_metadata']['deleted_count'] ?? 0),
'added_count' => (int)($result['trace_metadata']['added_count'] ?? 0),
'deployment' => $result['trace_metadata']['deployment'] ?? null,
]);
$emit('final', ['result' => $result]);
} catch (DbnToolsHttpException $e) {
$latency = (int)round((microtime(true) - $startTime) * 1000);
dbnToolsLogMetadata([
'tool' => 'discrepancy',
'language' => $language,
'ok' => false,
'latency_ms' => $latency,
'error_code' => $e->errorCode,
]);
$emit('error', ['code' => $e->errorCode, 'message' => $e->getMessage(), 'status' => $e->status]);
} catch (Throwable $e) {
error_log('DBN discrepancy fatal: ' . $e->getMessage());
$latency = (int)round((microtime(true) - $startTime) * 1000);
dbnToolsLogMetadata([
'tool' => 'discrepancy',
'language' => $language,
'ok' => false,
'latency_ms' => $latency,
'error_code' => 'internal_error',
]);
$emit('error', ['code' => 'internal_error', 'message' => 'The discrepancy finder could not complete this request.']);
}