Add Document Discrepancy Finder tool
8-step NDJSON-streaming pipeline that compares two Barnevernet documents: classifies each doc, extracts parties and timelines, cross-references both for contradictions/deletions/additions, retrieves corpus legal context, and synthesises a full discrepancy report with tabbed UI. New files: DiscrepancyAgent.php, api/discrepancy.php, discrepancy.php, discrepancy.js. Modified: FreeTier.php (cost=4), i18n.php (all 4 langs), tool-svgs.php (DC icon), tools.css (dc-* component styles). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,161 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/../includes/bootstrap.php';
|
||||
require_once __DIR__ . '/../includes/DiscrepancyAgent.php';
|
||||
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
$ftUid = dbnToolsFreeTierCheck('discrepancy');
|
||||
$ftRemaining = dbnToolsFreeTierDeduct($ftUid, 'discrepancy');
|
||||
|
||||
@ini_set('output_buffering', '0');
|
||||
@ini_set('zlib.output_compression', '0');
|
||||
@ini_set('implicit_flush', '1');
|
||||
while (ob_get_level() > 0) { @ob_end_clean(); }
|
||||
ob_implicit_flush(true);
|
||||
|
||||
header('Content-Type: application/x-ndjson; charset=utf-8');
|
||||
header('Cache-Control: no-store');
|
||||
header('X-Accel-Buffering: no');
|
||||
if ($ftRemaining >= 0) { header('X-Credits-Remaining: ' . $ftRemaining); }
|
||||
|
||||
$language = 'en';
|
||||
$startTime = microtime(true);
|
||||
|
||||
$emit = function (string $event, array $payload = []) use ($startTime): void {
|
||||
$payload['event'] = $event;
|
||||
$payload['t_ms'] = (int)round((microtime(true) - $startTime) * 1000);
|
||||
echo json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) . "\n";
|
||||
@flush();
|
||||
};
|
||||
|
||||
try {
|
||||
// Parse payload (always multipart — two files required)
|
||||
$payloadRaw = (string)($_POST['payload'] ?? '');
|
||||
if ($payloadRaw === '') {
|
||||
throw new DbnToolsHttpException('Missing payload field.', 422, 'missing_payload');
|
||||
}
|
||||
$input = json_decode($payloadRaw, true);
|
||||
if (!is_array($input)) {
|
||||
throw new DbnToolsHttpException('Invalid payload JSON.', 422, 'invalid_payload_json');
|
||||
}
|
||||
|
||||
$language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
|
||||
$engine = (string)($input['engine'] ?? 'azure_mini');
|
||||
$sliceInput = $input['slices'] ?? [];
|
||||
|
||||
// Extract file A
|
||||
$emit('progress', ['detail' => 'Reading Document A…']);
|
||||
$fileEntryA = $_FILES['file_a'] ?? null;
|
||||
if (!$fileEntryA || ($fileEntryA['error'] ?? UPLOAD_ERR_NO_FILE) !== UPLOAD_ERR_OK) {
|
||||
throw new DbnToolsHttpException(
|
||||
'Document A is required. Upload a PDF, DOCX, or TXT file.',
|
||||
422, 'missing_file_a'
|
||||
);
|
||||
}
|
||||
$extractedA = dbnToolsExtractUploadedFile([
|
||||
'name' => $fileEntryA['name'] ?? '',
|
||||
'type' => $fileEntryA['type'] ?? '',
|
||||
'tmp_name' => $fileEntryA['tmp_name'] ?? '',
|
||||
'error' => $fileEntryA['error'] ?? UPLOAD_ERR_NO_FILE,
|
||||
'size' => $fileEntryA['size'] ?? 0,
|
||||
]);
|
||||
$fileA = [
|
||||
'filename' => $extractedA['filename'],
|
||||
'text' => $extractedA['text'],
|
||||
'chars' => $extractedA['chars'],
|
||||
'truncated' => $extractedA['truncated'],
|
||||
];
|
||||
$emit('progress', ['detail' => sprintf('Document A extracted: %s (%d chars%s)',
|
||||
$extractedA['filename'], $extractedA['chars'],
|
||||
!empty($extractedA['truncated']) ? ', truncated' : '')]);
|
||||
|
||||
// Extract file B
|
||||
$emit('progress', ['detail' => 'Reading Document B…']);
|
||||
$fileEntryB = $_FILES['file_b'] ?? null;
|
||||
if (!$fileEntryB || ($fileEntryB['error'] ?? UPLOAD_ERR_NO_FILE) !== UPLOAD_ERR_OK) {
|
||||
throw new DbnToolsHttpException(
|
||||
'Document B is required. Upload a PDF, DOCX, or TXT file.',
|
||||
422, 'missing_file_b'
|
||||
);
|
||||
}
|
||||
$extractedB = dbnToolsExtractUploadedFile([
|
||||
'name' => $fileEntryB['name'] ?? '',
|
||||
'type' => $fileEntryB['type'] ?? '',
|
||||
'tmp_name' => $fileEntryB['tmp_name'] ?? '',
|
||||
'error' => $fileEntryB['error'] ?? UPLOAD_ERR_NO_FILE,
|
||||
'size' => $fileEntryB['size'] ?? 0,
|
||||
]);
|
||||
$fileB = [
|
||||
'filename' => $extractedB['filename'],
|
||||
'text' => $extractedB['text'],
|
||||
'chars' => $extractedB['chars'],
|
||||
'truncated' => $extractedB['truncated'],
|
||||
];
|
||||
$emit('progress', ['detail' => sprintf('Document B extracted: %s (%d chars%s)',
|
||||
$extractedB['filename'], $extractedB['chars'],
|
||||
!empty($extractedB['truncated']) ? ', truncated' : '')]);
|
||||
|
||||
if (($fileA['text'] ?? '') === '') {
|
||||
throw new DbnToolsHttpException('Could not extract text from Document A.', 422, 'empty_file_a');
|
||||
}
|
||||
if (($fileB['text'] ?? '') === '') {
|
||||
throw new DbnToolsHttpException('Could not extract text from Document B.', 422, 'empty_file_b');
|
||||
}
|
||||
|
||||
$emit('start', [
|
||||
'engine' => $engine,
|
||||
'language' => $language,
|
||||
'file_a' => $fileA['filename'],
|
||||
'file_b' => $fileB['filename'],
|
||||
]);
|
||||
|
||||
$result = (new DbnDiscrepancyAgent())->run(
|
||||
$fileA,
|
||||
$fileB,
|
||||
$engine,
|
||||
$language,
|
||||
is_array($sliceInput) ? $sliceInput : [],
|
||||
$emit
|
||||
);
|
||||
|
||||
$result['ok'] = true;
|
||||
$result['latency_ms'] = (int)round((microtime(true) - $startTime) * 1000);
|
||||
|
||||
dbnToolsLogMetadata([
|
||||
'tool' => 'discrepancy',
|
||||
'language' => $language,
|
||||
'ok' => true,
|
||||
'latency_ms' => $result['latency_ms'],
|
||||
'source_count' => (int)($result['trace_metadata']['source_count'] ?? 0),
|
||||
'conflict_count' => (int)($result['trace_metadata']['conflict_count'] ?? 0),
|
||||
'deleted_count' => (int)($result['trace_metadata']['deleted_count'] ?? 0),
|
||||
'added_count' => (int)($result['trace_metadata']['added_count'] ?? 0),
|
||||
'deployment' => $result['trace_metadata']['deployment'] ?? null,
|
||||
]);
|
||||
|
||||
$emit('final', ['result' => $result]);
|
||||
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
$latency = (int)round((microtime(true) - $startTime) * 1000);
|
||||
dbnToolsLogMetadata([
|
||||
'tool' => 'discrepancy',
|
||||
'language' => $language,
|
||||
'ok' => false,
|
||||
'latency_ms' => $latency,
|
||||
'error_code' => $e->errorCode,
|
||||
]);
|
||||
$emit('error', ['code' => $e->errorCode, 'message' => $e->getMessage(), 'status' => $e->status]);
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN discrepancy fatal: ' . $e->getMessage());
|
||||
$latency = (int)round((microtime(true) - $startTime) * 1000);
|
||||
dbnToolsLogMetadata([
|
||||
'tool' => 'discrepancy',
|
||||
'language' => $language,
|
||||
'ok' => false,
|
||||
'latency_ms' => $latency,
|
||||
'error_code' => 'internal_error',
|
||||
]);
|
||||
$emit('error', ['code' => 'internal_error', 'message' => 'The discrepancy finder could not complete this request.']);
|
||||
}
|
||||
Reference in New Issue
Block a user