Initial release: Do Better Norge Legal Tools Hub
Five MVP tools (Ask, Search, Summarize, Timeline, Redact) with email+password auth, Azure OpenAI gateway, evidence trail panel, and process-and-forget privacy default. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,223 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/bootstrap.php';
|
||||
|
||||
final class DbnAzureOpenAiGateway
|
||||
{
|
||||
private array $config;
|
||||
|
||||
public function __construct(?array $config = null)
|
||||
{
|
||||
$this->config = $config ?: [
|
||||
'endpoint' => rtrim((string)dbnToolsEnv('DBN_AZURE_OPENAI_ENDPOINT', ''), '/'),
|
||||
'api_key' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_API_KEY', ''),
|
||||
'api_version' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_API_VERSION', ''),
|
||||
'chat_deployment' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_CHAT_DEPLOYMENT', ''),
|
||||
'embedding_deployment' => (string)dbnToolsEnv('DBN_AZURE_OPENAI_EMBEDDING_DEPLOYMENT', ''),
|
||||
];
|
||||
}
|
||||
|
||||
public function missingChatConfig(): array
|
||||
{
|
||||
$missing = [];
|
||||
foreach (['endpoint', 'api_key', 'api_version', 'chat_deployment'] as $key) {
|
||||
if (trim((string)($this->config[$key] ?? '')) === '') {
|
||||
$missing[] = $key;
|
||||
}
|
||||
}
|
||||
return $missing;
|
||||
}
|
||||
|
||||
public function missingEmbeddingConfig(): array
|
||||
{
|
||||
$missing = [];
|
||||
foreach (['endpoint', 'api_key', 'api_version', 'embedding_deployment'] as $key) {
|
||||
if (trim((string)($this->config[$key] ?? '')) === '') {
|
||||
$missing[] = $key;
|
||||
}
|
||||
}
|
||||
return $missing;
|
||||
}
|
||||
|
||||
public function chatDeployment(): string
|
||||
{
|
||||
return (string)$this->config['chat_deployment'];
|
||||
}
|
||||
|
||||
public function embeddingDeployment(): string
|
||||
{
|
||||
return (string)$this->config['embedding_deployment'];
|
||||
}
|
||||
|
||||
public function requireChat(): void
|
||||
{
|
||||
$missing = $this->missingChatConfig();
|
||||
if ($missing) {
|
||||
dbnToolsAbort(
|
||||
'Azure OpenAI chat gateway is missing configuration: ' . implode(', ', $missing) . '.',
|
||||
503,
|
||||
'azure_config_missing',
|
||||
['missing' => $missing]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public function requireEmbedding(): void
|
||||
{
|
||||
$missing = $this->missingEmbeddingConfig();
|
||||
if ($missing) {
|
||||
dbnToolsAbort(
|
||||
'Azure OpenAI embedding gateway is missing configuration: ' . implode(', ', $missing) . '.',
|
||||
503,
|
||||
'azure_embedding_config_missing',
|
||||
['missing' => $missing]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public function embeddings(array|string $input, array $options = []): array
|
||||
{
|
||||
$this->requireEmbedding();
|
||||
|
||||
$url = $this->config['endpoint']
|
||||
. '/openai/deployments/'
|
||||
. rawurlencode((string)$this->config['embedding_deployment'])
|
||||
. '/embeddings?api-version='
|
||||
. rawurlencode((string)$this->config['api_version']);
|
||||
|
||||
return $this->postJson($url, ['input' => $input], (int)($options['timeout'] ?? 30));
|
||||
}
|
||||
|
||||
public function chatText(array $messages, array $options = []): string
|
||||
{
|
||||
$response = $this->chat($messages, $options);
|
||||
$content = $response['choices'][0]['message']['content'] ?? '';
|
||||
if (!is_string($content) || trim($content) === '') {
|
||||
throw new RuntimeException('Azure OpenAI returned an empty chat response.');
|
||||
}
|
||||
return trim($content);
|
||||
}
|
||||
|
||||
public function chat(array $messages, array $options = []): array
|
||||
{
|
||||
$this->requireChat();
|
||||
|
||||
$payload = [
|
||||
'messages' => $messages,
|
||||
'temperature' => $options['temperature'] ?? 0.2,
|
||||
'max_tokens' => $options['max_tokens'] ?? 1200,
|
||||
];
|
||||
if (!empty($options['json'])) {
|
||||
$payload['response_format'] = ['type' => 'json_object'];
|
||||
}
|
||||
|
||||
$url = $this->config['endpoint']
|
||||
. '/openai/deployments/'
|
||||
. rawurlencode((string)$this->config['chat_deployment'])
|
||||
. '/chat/completions?api-version='
|
||||
. rawurlencode((string)$this->config['api_version']);
|
||||
|
||||
return $this->postJson($url, $payload, (int)($options['timeout'] ?? 45));
|
||||
}
|
||||
|
||||
public function ping(int $timeout = 8): bool
|
||||
{
|
||||
try {
|
||||
$text = $this->chatText([
|
||||
['role' => 'system', 'content' => 'Return one word only: ok'],
|
||||
['role' => 'user', 'content' => 'health'],
|
||||
], [
|
||||
'temperature' => 0,
|
||||
'max_tokens' => 5,
|
||||
'timeout' => $timeout,
|
||||
]);
|
||||
return trim($text) !== '';
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN Azure health check failed: ' . $e->getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public function decodeJsonObject(string $content): ?array
|
||||
{
|
||||
$content = trim($content);
|
||||
$decoded = json_decode($content, true);
|
||||
if (is_array($decoded)) {
|
||||
return $decoded;
|
||||
}
|
||||
|
||||
if (preg_match('/\{(?:[^{}]|(?R))*\}/s', $content, $match)) {
|
||||
$decoded = json_decode($match[0], true);
|
||||
if (is_array($decoded)) {
|
||||
return $decoded;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private function postJson(string $url, array $payload, int $timeout): array
|
||||
{
|
||||
$body = json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
if ($body === false) {
|
||||
throw new RuntimeException('Unable to encode Azure OpenAI request.');
|
||||
}
|
||||
|
||||
$headers = [
|
||||
'Content-Type: application/json',
|
||||
'api-key: ' . $this->config['api_key'],
|
||||
];
|
||||
|
||||
if (function_exists('curl_init')) {
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_POST => true,
|
||||
CURLOPT_POSTFIELDS => $body,
|
||||
CURLOPT_HTTPHEADER => $headers,
|
||||
CURLOPT_TIMEOUT => $timeout,
|
||||
]);
|
||||
$response = curl_exec($ch);
|
||||
$code = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
||||
$error = curl_error($ch);
|
||||
curl_close($ch);
|
||||
|
||||
if ($response === false) {
|
||||
throw new RuntimeException('Azure OpenAI request failed: ' . $error);
|
||||
}
|
||||
return $this->decodeResponse($response, $code);
|
||||
}
|
||||
|
||||
$context = stream_context_create([
|
||||
'http' => [
|
||||
'method' => 'POST',
|
||||
'header' => implode("\r\n", $headers),
|
||||
'content' => $body,
|
||||
'timeout' => $timeout,
|
||||
'ignore_errors' => true,
|
||||
],
|
||||
]);
|
||||
$response = @file_get_contents($url, false, $context);
|
||||
$code = 0;
|
||||
if (isset($http_response_header[0]) && preg_match('/\s(\d{3})\s/', $http_response_header[0], $m)) {
|
||||
$code = (int)$m[1];
|
||||
}
|
||||
if ($response === false) {
|
||||
throw new RuntimeException('Azure OpenAI request failed.');
|
||||
}
|
||||
return $this->decodeResponse($response, $code);
|
||||
}
|
||||
|
||||
private function decodeResponse(string $response, int $code): array
|
||||
{
|
||||
$decoded = json_decode($response, true);
|
||||
if (!is_array($decoded)) {
|
||||
throw new RuntimeException('Azure OpenAI returned non-JSON response.');
|
||||
}
|
||||
if ($code < 200 || $code >= 300) {
|
||||
$message = $decoded['error']['message'] ?? ('HTTP ' . $code);
|
||||
throw new RuntimeException('Azure OpenAI request failed: ' . $message);
|
||||
}
|
||||
return $decoded;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,631 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/bootstrap.php';
|
||||
require_once __DIR__ . '/AzureOpenAiGateway.php';
|
||||
|
||||
final class DbnLegalToolsService
|
||||
{
|
||||
private const MAX_PASTE_CHARS = 32000;
|
||||
|
||||
private DbnAzureOpenAiGateway $azure;
|
||||
|
||||
public function __construct(?DbnAzureOpenAiGateway $azure = null)
|
||||
{
|
||||
$this->azure = $azure ?: new DbnAzureOpenAiGateway();
|
||||
}
|
||||
|
||||
public function search(string $query, string $language = 'en', int $limit = 6): array
|
||||
{
|
||||
$query = trim($query);
|
||||
if (mb_strlen($query, 'UTF-8') < 3) {
|
||||
dbnToolsAbort('Search query must be at least 3 characters.', 422, 'query_too_short');
|
||||
}
|
||||
$limit = max(1, min(10, $limit));
|
||||
|
||||
$trace = [
|
||||
$this->trace('Query interpretation', 'Searching Dave Jr Legal private corpus plus the subscribed family-legal package.', 'complete'),
|
||||
$this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode, private corpus enabled, shared package filter set to family-legal.', 'running'),
|
||||
];
|
||||
|
||||
$client = dbnToolsRequireClient();
|
||||
$package = $this->requireFamilyPackage((int)$client['id']);
|
||||
|
||||
$chunks = [];
|
||||
$retrievalNote = 'ClientRagPipeline keyword retrieval';
|
||||
try {
|
||||
dbnToolsBootCaveau();
|
||||
$gatewayUrl = 'http://10.0.1.10:4000';
|
||||
try {
|
||||
$config = getConfig();
|
||||
$configured = trim((string)($config['ai_gateway']['url'] ?? ''));
|
||||
if ($configured !== '') {
|
||||
$gatewayUrl = $configured;
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
// Retrieval still works in keyword mode without gateway config.
|
||||
}
|
||||
|
||||
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
|
||||
$chunks = $rag->searchAll($query, $limit, null, [
|
||||
'search_private' => true,
|
||||
'search_shared' => true,
|
||||
'package_ids' => [(int)$package['id']],
|
||||
'chunk_limit' => $limit,
|
||||
'search_method' => 'keyword',
|
||||
'min_private' => 0,
|
||||
'include_beta_website' => true,
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
$retrievalNote = 'SQL keyword fallback after ClientRagPipeline error';
|
||||
$trace[] = $this->trace('Search fallback', 'Pipeline retrieval failed; using direct SQL keyword fallback without storing the query.', 'warning');
|
||||
$chunks = $this->fallbackKeywordSearch((int)$client['id'], $package, $query, $limit);
|
||||
}
|
||||
|
||||
if (!$chunks) {
|
||||
$fallback = $this->fallbackKeywordSearch((int)$client['id'], $package, $query, $limit);
|
||||
if ($fallback) {
|
||||
$chunks = $fallback;
|
||||
$retrievalNote = 'SQL keyword fallback';
|
||||
}
|
||||
}
|
||||
|
||||
$hits = array_map(fn(array $chunk): array => $this->sourceFromChunk($chunk), array_slice($chunks, 0, $limit));
|
||||
$confidence = $this->citationConfidence($hits);
|
||||
|
||||
$trace[1] = $this->trace('Search tools used', $retrievalNote . '; returned ' . count($hits) . ' source hit(s).', 'complete');
|
||||
$trace[] = $this->trace('Evidence found', count($hits) ? 'Retrieved source excerpts for review.' : 'No matching source excerpts were found.', count($hits) ? 'complete' : 'warning');
|
||||
$trace[] = $this->trace('Citation confidence', ucfirst($confidence) . ' confidence based on source count and retrieval scores.', $confidence === 'low' ? 'warning' : 'complete');
|
||||
|
||||
return [
|
||||
'tool' => 'search',
|
||||
'language' => $language,
|
||||
'what_we_found' => count($hits) ? 'Found source excerpts from the legal corpus.' : 'No matching source excerpts were found.',
|
||||
'hits' => $hits,
|
||||
'evidence_trail' => $hits,
|
||||
'what_remains_uncertain' => count($hits) ? 'Search results still need human review for legal relevance and currentness.' : 'The corpus may not contain enough evidence for this query.',
|
||||
'next_practical_step' => count($hits) ? 'Open the strongest sources and confirm the cited sections before relying on them.' : 'Try a narrower query with statutory terms, party names, or dates.',
|
||||
'trace' => $trace,
|
||||
'trace_metadata' => [
|
||||
'chunk_count' => count($chunks),
|
||||
'source_count' => count($hits),
|
||||
'deployment' => null,
|
||||
'citation_confidence' => $confidence,
|
||||
],
|
||||
'disclaimer' => dbnToolsDisclaimer($language),
|
||||
];
|
||||
}
|
||||
|
||||
public function ask(string $question, string $language = 'en'): array
|
||||
{
|
||||
$search = $this->search($question, $language, 7);
|
||||
$hits = $search['hits'];
|
||||
$trace = $search['trace'];
|
||||
|
||||
if (!$hits) {
|
||||
$trace[] = $this->trace('Synthesis', 'Skipped answer synthesis because no evidence was found.', 'warning');
|
||||
return [
|
||||
'tool' => 'ask',
|
||||
'language' => $language,
|
||||
'answer' => $language === 'no'
|
||||
? 'Jeg fant ikke nok kildestøtte i familie-rettskorpuset til å svare sikkert.'
|
||||
: 'I did not find enough source support in the family-law corpus to answer safely.',
|
||||
'what_we_found' => $search['what_we_found'],
|
||||
'evidence_trail' => [],
|
||||
'what_remains_uncertain' => $search['what_remains_uncertain'],
|
||||
'next_practical_step' => $search['next_practical_step'],
|
||||
'trace' => $trace,
|
||||
'trace_metadata' => [
|
||||
'chunk_count' => 0,
|
||||
'source_count' => 0,
|
||||
'deployment' => null,
|
||||
'citation_confidence' => 'low',
|
||||
],
|
||||
'disclaimer' => dbnToolsDisclaimer($language),
|
||||
];
|
||||
}
|
||||
|
||||
$this->azure->requireChat();
|
||||
|
||||
$context = $this->buildEvidenceContext($hits);
|
||||
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
||||
$prompt = <<<PROMPT
|
||||
Question:
|
||||
{$question}
|
||||
|
||||
Evidence excerpts:
|
||||
{$context}
|
||||
|
||||
Return JSON only with these keys:
|
||||
{
|
||||
"answer": "short direct answer in {$locale}",
|
||||
"what_we_found": "plain-language summary of the supported finding",
|
||||
"evidence_trail": [{"title":"source title","why_it_matters":"one sentence","citation":"visible source title or section"}],
|
||||
"what_remains_uncertain": ["specific gaps or caveats"],
|
||||
"next_practical_step": "one concrete next action"
|
||||
}
|
||||
PROMPT;
|
||||
|
||||
$system = $this->legalJsonSystemPrompt($language);
|
||||
$raw = $this->azure->chatText([
|
||||
['role' => 'system', 'content' => $system],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
], [
|
||||
'json' => true,
|
||||
'temperature' => 0.15,
|
||||
'max_tokens' => 1300,
|
||||
]);
|
||||
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
if (!$json) {
|
||||
$json = [
|
||||
'answer' => $raw,
|
||||
'what_we_found' => 'Azure returned a plain-text answer based on the retrieved excerpts.',
|
||||
'evidence_trail' => [],
|
||||
'what_remains_uncertain' => ['The response format could not be validated as structured JSON.'],
|
||||
'next_practical_step' => 'Review the source excerpts manually before relying on the answer.',
|
||||
];
|
||||
}
|
||||
|
||||
$trace[] = $this->trace('Synthesis', 'Azure OpenAI generated an answer using only the retrieved source excerpts.', 'complete');
|
||||
$trace[] = $this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($json['what_remains_uncertain'] ?? []), 'complete');
|
||||
$trace[] = $this->trace('Next practical step', (string)($json['next_practical_step'] ?? 'Review the evidence trail.'), 'complete');
|
||||
|
||||
return [
|
||||
'tool' => 'ask',
|
||||
'language' => $language,
|
||||
'answer' => (string)($json['answer'] ?? ''),
|
||||
'what_we_found' => (string)($json['what_we_found'] ?? ''),
|
||||
'evidence_trail' => $hits,
|
||||
'citation_notes' => $this->normalizeEvidenceTrail($json['evidence_trail'] ?? [], $hits),
|
||||
'sources' => $hits,
|
||||
'what_remains_uncertain' => $json['what_remains_uncertain'] ?? [],
|
||||
'next_practical_step' => (string)($json['next_practical_step'] ?? ''),
|
||||
'trace' => $trace,
|
||||
'trace_metadata' => [
|
||||
'chunk_count' => count($hits),
|
||||
'source_count' => count($hits),
|
||||
'deployment' => $this->azure->chatDeployment(),
|
||||
'citation_confidence' => $search['trace_metadata']['citation_confidence'] ?? 'medium',
|
||||
],
|
||||
'disclaimer' => dbnToolsDisclaimer($language),
|
||||
];
|
||||
}
|
||||
|
||||
public function summarize(string $text, string $language = 'en'): array
|
||||
{
|
||||
$text = $this->requirePasteText($text);
|
||||
$this->azure->requireChat();
|
||||
|
||||
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
||||
$prompt = <<<PROMPT
|
||||
Summarize this pasted case-preparation text in {$locale}. Do not invent missing facts.
|
||||
|
||||
Pasted text:
|
||||
{$text}
|
||||
|
||||
Return JSON only:
|
||||
{
|
||||
"what_we_found": "plain-language summary",
|
||||
"key_facts": ["fact"],
|
||||
"dates": ["date or unknown"],
|
||||
"parties": ["party or role"],
|
||||
"legal_references_detected": ["reference"],
|
||||
"what_remains_uncertain": ["uncertainty"],
|
||||
"next_practical_step": "one concrete next action"
|
||||
}
|
||||
PROMPT;
|
||||
|
||||
$json = $this->runJsonTool($prompt, $language, 1300);
|
||||
$trace = [
|
||||
$this->trace('Query interpretation', 'Summarize pasted text without saving the text or output.', 'complete'),
|
||||
$this->trace('Search tools used', 'No external corpus search; source is the user-pasted text.', 'complete'),
|
||||
$this->trace('Evidence found', 'Evidence trail is limited to the pasted text supplied in this request.', 'complete'),
|
||||
$this->trace('Citation confidence', 'Medium confidence for factual extraction; no external legal source verification was performed.', 'warning'),
|
||||
$this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($json['what_remains_uncertain'] ?? []), 'complete'),
|
||||
$this->trace('Next practical step', (string)($json['next_practical_step'] ?? 'Review the summary against the original text.'), 'complete'),
|
||||
];
|
||||
|
||||
return [
|
||||
'tool' => 'summarize',
|
||||
'language' => $language,
|
||||
'what_we_found' => (string)($json['what_we_found'] ?? ''),
|
||||
'key_facts' => $json['key_facts'] ?? [],
|
||||
'dates' => $json['dates'] ?? [],
|
||||
'parties' => $json['parties'] ?? [],
|
||||
'legal_references_detected' => $json['legal_references_detected'] ?? [],
|
||||
'evidence_trail' => [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; not stored.']],
|
||||
'what_remains_uncertain' => $json['what_remains_uncertain'] ?? [],
|
||||
'next_practical_step' => (string)($json['next_practical_step'] ?? ''),
|
||||
'trace' => $trace,
|
||||
'trace_metadata' => [
|
||||
'chunk_count' => 1,
|
||||
'source_count' => 1,
|
||||
'deployment' => $this->azure->chatDeployment(),
|
||||
],
|
||||
'disclaimer' => dbnToolsDisclaimer($language),
|
||||
];
|
||||
}
|
||||
|
||||
public function timeline(string $text, string $language = 'en'): array
|
||||
{
|
||||
$text = $this->requirePasteText($text);
|
||||
$this->azure->requireChat();
|
||||
|
||||
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
||||
$prompt = <<<PROMPT
|
||||
Build a chronological timeline from this pasted text in {$locale}. Keep uncertain dates explicit.
|
||||
|
||||
Pasted text:
|
||||
{$text}
|
||||
|
||||
Return JSON only:
|
||||
{
|
||||
"what_we_found": "short overview",
|
||||
"events": [{"date":"YYYY-MM-DD, month/year, or unknown","actor":"actor or unknown","event":"event","source_excerpt":"short excerpt","confidence":"high|medium|low"}],
|
||||
"evidence_trail": [{"title":"Pasted text","excerpt":"short relevant excerpt"}],
|
||||
"what_remains_uncertain": ["uncertainty"],
|
||||
"next_practical_step": "one concrete next action"
|
||||
}
|
||||
PROMPT;
|
||||
|
||||
$json = $this->runJsonTool($prompt, $language, 1600);
|
||||
$events = is_array($json['events'] ?? null) ? $json['events'] : [];
|
||||
$trace = [
|
||||
$this->trace('Query interpretation', 'Extract dated events from pasted text without saving the text or output.', 'complete'),
|
||||
$this->trace('Search tools used', 'No external corpus search; source is the user-pasted text.', 'complete'),
|
||||
$this->trace('Evidence found', count($events) . ' event(s) identified.', count($events) ? 'complete' : 'warning'),
|
||||
$this->trace('Citation confidence', 'Confidence is per event and based only on the pasted text.', 'complete'),
|
||||
$this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($json['what_remains_uncertain'] ?? []), 'complete'),
|
||||
$this->trace('Next practical step', (string)($json['next_practical_step'] ?? 'Verify dates against original documents.'), 'complete'),
|
||||
];
|
||||
|
||||
return [
|
||||
'tool' => 'timeline',
|
||||
'language' => $language,
|
||||
'what_we_found' => (string)($json['what_we_found'] ?? ''),
|
||||
'events' => $events,
|
||||
'evidence_trail' => $json['evidence_trail'] ?? [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; not stored.']],
|
||||
'what_remains_uncertain' => $json['what_remains_uncertain'] ?? [],
|
||||
'next_practical_step' => (string)($json['next_practical_step'] ?? ''),
|
||||
'trace' => $trace,
|
||||
'trace_metadata' => [
|
||||
'chunk_count' => count($events),
|
||||
'source_count' => 1,
|
||||
'deployment' => $this->azure->chatDeployment(),
|
||||
],
|
||||
'disclaimer' => dbnToolsDisclaimer($language),
|
||||
];
|
||||
}
|
||||
|
||||
public function redact(string $text, string $mode = 'standard'): array
|
||||
{
|
||||
$text = $this->requirePasteText($text);
|
||||
$mode = $mode === 'strict' ? 'strict' : 'standard';
|
||||
[$redacted, $entities] = $this->deterministicRedaction($text, $mode);
|
||||
|
||||
$categories = array_keys(array_filter($entities, fn(int $count): bool => $count > 0));
|
||||
$trace = [
|
||||
$this->trace('Query interpretation', 'Detect and redact sensitive identifiers from pasted text.', 'complete'),
|
||||
$this->trace('Search tools used', 'Deterministic Norwegian privacy patterns first; no text was stored.', 'complete'),
|
||||
$this->trace('Evidence found', count($categories) ? 'Detected categories: ' . implode(', ', $categories) . '.' : 'No deterministic sensitive categories were detected.', count($categories) ? 'complete' : 'warning'),
|
||||
$this->trace('Citation confidence', 'High for emails and fødselsnummer-like values; medium for addresses and names.', 'complete'),
|
||||
$this->trace('Uncertainty / missing evidence', 'Contextual names may need human review, especially in standard mode.', 'warning'),
|
||||
$this->trace('Next practical step', 'Review the redacted output before sharing it outside the case team.', 'complete'),
|
||||
];
|
||||
|
||||
return [
|
||||
'tool' => 'redact',
|
||||
'mode' => $mode,
|
||||
'what_we_found' => 'Redacted deterministic privacy patterns from the pasted text.',
|
||||
'redacted_text' => $redacted,
|
||||
'detected_entity_categories' => $categories,
|
||||
'entity_counts' => $entities,
|
||||
'evidence_trail' => [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; not stored.']],
|
||||
'what_remains_uncertain' => ['Human review is still needed for names that depend on case context.'],
|
||||
'next_practical_step' => 'Review the output and rerun in strict mode if the text will be shared broadly.',
|
||||
'trace' => $trace,
|
||||
'trace_metadata' => [
|
||||
'chunk_count' => 1,
|
||||
'source_count' => 1,
|
||||
'deployment' => null,
|
||||
],
|
||||
'disclaimer' => 'Privacy support tool. Review before disclosure.',
|
||||
];
|
||||
}
|
||||
|
||||
private function requireFamilyPackage(int $clientId): array
|
||||
{
|
||||
$package = dbnToolsFetchPackage('family-legal');
|
||||
if (!$package || empty($package['is_active'])) {
|
||||
dbnToolsAbort('The family-legal corpus package is not active.', 503, 'package_unavailable');
|
||||
}
|
||||
if (!dbnToolsHasActiveSubscription($clientId, (int)$package['id'])) {
|
||||
dbnToolsAbort('Dave Jr Legal does not have an active family-legal subscription.', 503, 'subscription_missing');
|
||||
}
|
||||
return $package;
|
||||
}
|
||||
|
||||
private function runJsonTool(string $prompt, string $language, int $maxTokens): array
|
||||
{
|
||||
$raw = $this->azure->chatText([
|
||||
['role' => 'system', 'content' => $this->legalJsonSystemPrompt($language)],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
], [
|
||||
'json' => true,
|
||||
'temperature' => 0.1,
|
||||
'max_tokens' => $maxTokens,
|
||||
]);
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
if (!$json) {
|
||||
dbnToolsAbort('Azure OpenAI did not return valid structured JSON.', 502, 'azure_invalid_json');
|
||||
}
|
||||
return $json;
|
||||
}
|
||||
|
||||
private function legalJsonSystemPrompt(string $language): string
|
||||
{
|
||||
$locale = $language === 'no' ? 'Norwegian' : 'English';
|
||||
return <<<PROMPT
|
||||
You are Do Better Norge Legal Tools in a source-grounded legal preparation workflow.
|
||||
Use the DBN legal guardrails:
|
||||
- Answer only from provided source excerpts or pasted text.
|
||||
- Treat your role as legal information and issue-spotting, not final legal advice.
|
||||
- Never invent statutes, paragraph numbers, case names, citations, parties, dates, or sources.
|
||||
- If evidence is insufficient, say so plainly.
|
||||
- Respond in {$locale}.
|
||||
- Return valid JSON only. No markdown fences.
|
||||
PROMPT;
|
||||
}
|
||||
|
||||
private function buildEvidenceContext(array $hits): string
|
||||
{
|
||||
$lines = [];
|
||||
foreach ($hits as $idx => $hit) {
|
||||
$n = $idx + 1;
|
||||
$lines[] = "[{$n}] Title: " . ($hit['title'] ?? 'Untitled');
|
||||
if (!empty($hit['section'])) {
|
||||
$lines[] = "Section: " . $hit['section'];
|
||||
}
|
||||
$lines[] = "Corpus/package: " . ($hit['package_or_corpus'] ?? 'unknown');
|
||||
$lines[] = "Excerpt: " . ($hit['excerpt'] ?? '');
|
||||
}
|
||||
return implode("\n", $lines);
|
||||
}
|
||||
|
||||
private function normalizeEvidenceTrail(mixed $trail, array $hits): array
|
||||
{
|
||||
if (!is_array($trail) || !$trail) {
|
||||
return array_map(fn(array $hit): array => [
|
||||
'title' => $hit['title'],
|
||||
'citation' => $hit['title'],
|
||||
'why_it_matters' => dbnToolsExcerpt($hit['excerpt'], 180),
|
||||
], array_slice($hits, 0, 4));
|
||||
}
|
||||
return array_values(array_filter($trail, 'is_array'));
|
||||
}
|
||||
|
||||
private function sourceFromChunk(array $chunk): array
|
||||
{
|
||||
$title = (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source');
|
||||
$score = isset($chunk['similarity']) ? round((float)$chunk['similarity'], 4) : null;
|
||||
return [
|
||||
'title' => $title,
|
||||
'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620),
|
||||
'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Dave Jr Legal'),
|
||||
'score' => $score,
|
||||
'document_id' => isset($chunk['document_id']) ? (int)$chunk['document_id'] : null,
|
||||
'chunk_id' => isset($chunk['id']) ? (int)$chunk['id'] : null,
|
||||
'section' => $chunk['section_title'] ?? null,
|
||||
'authority_type' => $chunk['authority_type'] ?? null,
|
||||
'jurisdiction' => $chunk['jurisdiction'] ?? null,
|
||||
];
|
||||
}
|
||||
|
||||
private function citationConfidence(array $hits): string
|
||||
{
|
||||
if (!$hits) {
|
||||
return 'low';
|
||||
}
|
||||
$scores = array_values(array_filter(array_map(fn(array $h) => $h['score'] ?? null, $hits), 'is_numeric'));
|
||||
$best = $scores ? max($scores) : 0;
|
||||
if (count($hits) >= 3 && $best >= 0.35) {
|
||||
return 'high';
|
||||
}
|
||||
if (count($hits) >= 1) {
|
||||
return 'medium';
|
||||
}
|
||||
return 'low';
|
||||
}
|
||||
|
||||
private function fallbackKeywordSearch(int $clientId, array $package, string $query, int $limit): array
|
||||
{
|
||||
$results = [];
|
||||
try {
|
||||
$results = array_merge($results, $this->fallbackPrivateSearch($clientId, $query, $limit));
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN tools private fallback failed: ' . $e->getMessage());
|
||||
}
|
||||
try {
|
||||
$remaining = max(1, $limit - count($results));
|
||||
$results = array_merge($results, $this->fallbackSharedSearch($package, $query, $remaining));
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN tools shared fallback failed: ' . $e->getMessage());
|
||||
}
|
||||
return array_slice($results, 0, $limit);
|
||||
}
|
||||
|
||||
private function fallbackPrivateSearch(int $clientId, string $query, int $limit): array
|
||||
{
|
||||
$db = dbnToolsDb();
|
||||
$terms = $this->searchTerms($query);
|
||||
if (!$terms) {
|
||||
return [];
|
||||
}
|
||||
$clauses = [];
|
||||
$params = [':client_id' => $clientId];
|
||||
foreach ($terms as $i => $term) {
|
||||
$key = ':term' . $i;
|
||||
$clauses[] = "(cc.content LIKE {$key} OR cd.title LIKE {$key})";
|
||||
$params[$key] = '%' . $term . '%';
|
||||
}
|
||||
$sql = 'SELECT cc.id, cc.document_id, cc.content, cd.title AS document_title, cd.category
|
||||
FROM client_chunks cc
|
||||
JOIN client_documents cd ON cc.document_id = cd.id
|
||||
WHERE cc.client_id = :client_id AND cd.status = "ready" AND (' . implode(' OR ', $clauses) . ')
|
||||
LIMIT ' . (int)$limit;
|
||||
$stmt = $db->prepare($sql);
|
||||
$stmt->execute($params);
|
||||
$rows = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
foreach ($rows as &$row) {
|
||||
$row['similarity'] = 0.25;
|
||||
$row['source_name'] = 'Dave Jr Legal private corpus';
|
||||
$row['source_type'] = 'private';
|
||||
}
|
||||
return $rows;
|
||||
}
|
||||
|
||||
private function fallbackSharedSearch(array $package, string $query, int $limit): array
|
||||
{
|
||||
$ragDb = dbnToolsRagDb();
|
||||
$terms = $this->searchTerms($query);
|
||||
if (!$terms) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$where = ['d.status = "ready"'];
|
||||
$params = [];
|
||||
|
||||
if (!empty($package['corpus_id'])) {
|
||||
$where[] = 'd.corpus_id = ?';
|
||||
$params[] = (int)$package['corpus_id'];
|
||||
}
|
||||
|
||||
$cats = json_decode((string)($package['category_filter'] ?? '[]'), true) ?: [];
|
||||
if ($cats) {
|
||||
$where[] = 'd.category IN (' . implode(',', array_fill(0, count($cats), '?')) . ')';
|
||||
$params = array_merge($params, $cats);
|
||||
}
|
||||
|
||||
$langs = json_decode((string)($package['language_filter'] ?? '[]'), true) ?: [];
|
||||
if ($langs) {
|
||||
$where[] = 'd.language IN (' . implode(',', array_fill(0, count($langs), '?')) . ')';
|
||||
$params = array_merge($params, $langs);
|
||||
}
|
||||
|
||||
$termClauses = [];
|
||||
foreach ($terms as $term) {
|
||||
$termClauses[] = '(c.content LIKE ? OR d.title LIKE ?)';
|
||||
$params[] = '%' . $term . '%';
|
||||
$params[] = '%' . $term . '%';
|
||||
}
|
||||
$where[] = '(' . implode(' OR ', $termClauses) . ')';
|
||||
|
||||
$sql = 'SELECT c.id, c.document_id, c.content, c.section_title, d.title AS document_title,
|
||||
d.category, d.language
|
||||
FROM chunks c
|
||||
JOIN documents d ON c.document_id = d.id
|
||||
WHERE ' . implode(' AND ', $where) . '
|
||||
LIMIT ' . (int)$limit;
|
||||
$stmt = $ragDb->prepare($sql);
|
||||
$stmt->execute($params);
|
||||
$rows = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
foreach ($rows as &$row) {
|
||||
$row['similarity'] = 0.2;
|
||||
$row['source_name'] = (string)($package['name'] ?? 'family-legal');
|
||||
$row['source_type'] = 'package';
|
||||
}
|
||||
return $rows;
|
||||
}
|
||||
|
||||
private function searchTerms(string $query): array
|
||||
{
|
||||
$parts = preg_split('/[^\p{L}\p{N}]+/u', mb_strtolower($query, 'UTF-8')) ?: [];
|
||||
$stop = ['the', 'and', 'for', 'with', 'that', 'this', 'hva', 'har', 'kan', 'jeg', 'som', 'det', 'med', 'til', 'og'];
|
||||
$terms = [];
|
||||
foreach ($parts as $part) {
|
||||
if (mb_strlen($part, 'UTF-8') < 3 || in_array($part, $stop, true)) {
|
||||
continue;
|
||||
}
|
||||
$terms[] = $part;
|
||||
}
|
||||
return array_slice(array_values(array_unique($terms)), 0, 6);
|
||||
}
|
||||
|
||||
private function requirePasteText(string $text): string
|
||||
{
|
||||
$text = trim($text);
|
||||
if (mb_strlen($text, 'UTF-8') < 20) {
|
||||
dbnToolsAbort('Paste at least 20 characters of text.', 422, 'text_too_short');
|
||||
}
|
||||
if (mb_strlen($text, 'UTF-8') > self::MAX_PASTE_CHARS) {
|
||||
dbnToolsAbort('Pasted text is too long for the MVP limit.', 422, 'text_too_long');
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
private function deterministicRedaction(string $text, string $mode): array
|
||||
{
|
||||
$counts = [
|
||||
'email' => 0,
|
||||
'phone' => 0,
|
||||
'fødselsnummer' => 0,
|
||||
'address' => 0,
|
||||
'person_or_child_name' => 0,
|
||||
];
|
||||
|
||||
$replace = function (string $pattern, string $category, string $token) use (&$text, &$counts): void {
|
||||
$text = preg_replace_callback($pattern, function () use (&$counts, $category, $token): string {
|
||||
$counts[$category]++;
|
||||
return $token;
|
||||
}, $text) ?? $text;
|
||||
};
|
||||
|
||||
$replace('/\b[A-Z0-9._%+\-]+@[A-Z0-9.\-]+\.[A-Z]{2,}\b/i', 'email', '[EMAIL]');
|
||||
$replace('/(?<!\d)(?:\d{6}[\s\-]?\d{5}|\d{11})(?!\d)/u', 'fødselsnummer', '[FNR]');
|
||||
$replace('/(?<!\d)(?:\+47[\s.\-]?)?(?:\d[\s.\-]?){8}(?!\d)/u', 'phone', '[PHONE]');
|
||||
$replace('/\b[A-ZÆØÅ][\p{L}æøåÆØÅ\.\- ]{2,40}\s+(?:gate|gata|vei|veien|plass|street|road|avenue|ave)\s+\d+[A-Z]?\b/iu', 'address', '[ADDRESS]');
|
||||
|
||||
$text = preg_replace_callback(
|
||||
'/\b(Barn|Child|Navn|Name|Mor|Far|Mother|Father|Sønn|Datter)\s*:\s*([^\r\n,.;]+)/iu',
|
||||
function (array $m) use (&$counts): string {
|
||||
$counts['person_or_child_name']++;
|
||||
return $m[1] . ': [PERSON]';
|
||||
},
|
||||
$text
|
||||
) ?? $text;
|
||||
|
||||
$text = preg_replace_callback(
|
||||
'/\b(?:barnet|child|sønn|son|datter|daughter)\s+(?:heter|named|called)?\s*([A-ZÆØÅ][\p{L}æøåÆØÅ\-]{2,})\b/iu',
|
||||
function () use (&$counts): string {
|
||||
$counts['person_or_child_name']++;
|
||||
return '[CHILD_IDENTIFIER]';
|
||||
},
|
||||
$text
|
||||
) ?? $text;
|
||||
|
||||
if ($mode === 'strict') {
|
||||
$replace('/\b[A-ZÆØÅ][\p{L}æøåÆØÅ\-]{2,}\s+[A-ZÆØÅ][\p{L}æøåÆØÅ\-]{2,}\b/u', 'person_or_child_name', '[PERSON]');
|
||||
}
|
||||
|
||||
return [$text, $counts];
|
||||
}
|
||||
|
||||
private function uncertaintySummary(mixed $uncertainty): string
|
||||
{
|
||||
if (is_array($uncertainty)) {
|
||||
$uncertainty = implode(' ', array_map('strval', $uncertainty));
|
||||
}
|
||||
$uncertainty = trim((string)$uncertainty);
|
||||
return $uncertainty !== '' ? dbnToolsExcerpt($uncertainty, 220) : 'No additional uncertainty was supplied by the tool.';
|
||||
}
|
||||
|
||||
private function trace(string $label, string $detail, string $status = 'complete'): array
|
||||
{
|
||||
return [
|
||||
'label' => $label,
|
||||
'detail' => $detail,
|
||||
'status' => $status,
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,408 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
define('DBN_TOOLS_ROOT', dirname(__DIR__));
|
||||
define('DBN_TOOLS_VERSION', '0.1.0');
|
||||
|
||||
final class DbnToolsHttpException extends RuntimeException
|
||||
{
|
||||
public int $status;
|
||||
public string $errorCode;
|
||||
public array $extra;
|
||||
|
||||
public function __construct(string $message, int $status = 400, string $errorCode = 'bad_request', array $extra = [])
|
||||
{
|
||||
parent::__construct($message);
|
||||
$this->status = $status;
|
||||
$this->errorCode = $errorCode;
|
||||
$this->extra = $extra;
|
||||
}
|
||||
}
|
||||
|
||||
function dbnToolsLoadEnv(string $path): void
|
||||
{
|
||||
if (!is_file($path) || !is_readable($path)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$lines = file($path, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||
if ($lines === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
foreach ($lines as $line) {
|
||||
$line = trim($line);
|
||||
if ($line === '' || str_starts_with($line, '#') || !str_contains($line, '=')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
[$key, $value] = explode('=', $line, 2);
|
||||
$key = trim($key);
|
||||
$value = trim($value);
|
||||
if ($key === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((str_starts_with($value, '"') && str_ends_with($value, '"')) ||
|
||||
(str_starts_with($value, "'") && str_ends_with($value, "'"))) {
|
||||
$value = substr($value, 1, -1);
|
||||
}
|
||||
|
||||
if (getenv($key) === false) {
|
||||
putenv($key . '=' . $value);
|
||||
$_ENV[$key] = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dbnToolsLoadEnv(DBN_TOOLS_ROOT . '/.env');
|
||||
|
||||
function dbnToolsEnv(string $key, ?string $default = null): ?string
|
||||
{
|
||||
$fileKey = $key . '_FILE';
|
||||
$filePath = getenv($fileKey);
|
||||
if ($filePath !== false && $filePath !== '') {
|
||||
$value = @file_get_contents($filePath);
|
||||
if ($value === false) {
|
||||
throw new RuntimeException("Unable to read secret file for {$fileKey}");
|
||||
}
|
||||
return rtrim($value, "\r\n");
|
||||
}
|
||||
|
||||
$value = getenv($key);
|
||||
if ($value === false || $value === '') {
|
||||
return $default;
|
||||
}
|
||||
return $value;
|
||||
}
|
||||
|
||||
function dbnToolsIsHttps(): bool
|
||||
{
|
||||
if (!empty($_SERVER['HTTPS']) && strtolower((string)$_SERVER['HTTPS']) !== 'off') {
|
||||
return true;
|
||||
}
|
||||
return isset($_SERVER['HTTP_X_FORWARDED_PROTO']) &&
|
||||
strtolower((string)$_SERVER['HTTP_X_FORWARDED_PROTO']) === 'https';
|
||||
}
|
||||
|
||||
function dbnToolsStartSession(): void
|
||||
{
|
||||
if (session_status() === PHP_SESSION_ACTIVE) {
|
||||
return;
|
||||
}
|
||||
|
||||
session_name('dbn_tools_session');
|
||||
session_set_cookie_params([
|
||||
'lifetime' => 0,
|
||||
'path' => '/',
|
||||
'secure' => dbnToolsIsHttps(),
|
||||
'httponly' => true,
|
||||
'samesite' => 'Lax',
|
||||
]);
|
||||
session_start();
|
||||
|
||||
if (empty($_SESSION['dbn_tools_anon_id'])) {
|
||||
$_SESSION['dbn_tools_anon_id'] = bin2hex(random_bytes(16));
|
||||
}
|
||||
}
|
||||
|
||||
dbnToolsStartSession();
|
||||
|
||||
function dbnToolsIsAuthenticated(): bool
|
||||
{
|
||||
return !empty($_SESSION['dbn_tools_authenticated']);
|
||||
}
|
||||
|
||||
function dbnToolsAuthEmail(): ?string
|
||||
{
|
||||
return dbnToolsEnv('DBN_TOOLS_AUTH_EMAIL');
|
||||
}
|
||||
|
||||
function dbnToolsAuthPasswordHash(): ?string
|
||||
{
|
||||
return dbnToolsEnv('DBN_TOOLS_AUTH_PASSWORD_HASH');
|
||||
}
|
||||
|
||||
function dbnToolsAnonymousSessionId(): string
|
||||
{
|
||||
$id = (string)($_SESSION['dbn_tools_anon_id'] ?? '');
|
||||
if ($id === '') {
|
||||
$id = bin2hex(random_bytes(16));
|
||||
$_SESSION['dbn_tools_anon_id'] = $id;
|
||||
}
|
||||
return substr(hash('sha256', $id), 0, 18);
|
||||
}
|
||||
|
||||
function dbnToolsRespond(array $payload, int $status = 200): void
|
||||
{
|
||||
http_response_code($status);
|
||||
header('Content-Type: application/json; charset=utf-8');
|
||||
header('Cache-Control: no-store');
|
||||
echo json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT);
|
||||
exit;
|
||||
}
|
||||
|
||||
function dbnToolsError(string $message, int $status = 400, string $code = 'bad_request', array $extra = []): void
|
||||
{
|
||||
dbnToolsRespond(array_merge([
|
||||
'ok' => false,
|
||||
'error' => [
|
||||
'code' => $code,
|
||||
'message' => $message,
|
||||
],
|
||||
], $extra), $status);
|
||||
}
|
||||
|
||||
function dbnToolsAbort(string $message, int $status = 400, string $code = 'bad_request', array $extra = []): void
|
||||
{
|
||||
throw new DbnToolsHttpException($message, $status, $code, $extra);
|
||||
}
|
||||
|
||||
function dbnToolsRequireMethod(string $method): void
|
||||
{
|
||||
if (strtoupper($_SERVER['REQUEST_METHOD'] ?? 'GET') !== strtoupper($method)) {
|
||||
dbnToolsError('Method not allowed.', 405, 'method_not_allowed');
|
||||
}
|
||||
}
|
||||
|
||||
function dbnToolsRequireAuth(): void
|
||||
{
|
||||
if (!dbnToolsIsAuthenticated()) {
|
||||
dbnToolsError('Passcode session required.', 401, 'session_required');
|
||||
}
|
||||
}
|
||||
|
||||
function dbnToolsJsonInput(int $maxBytes = 50000): array
|
||||
{
|
||||
$raw = file_get_contents('php://input');
|
||||
if ($raw === false) {
|
||||
dbnToolsError('Unable to read request body.', 400, 'body_unreadable');
|
||||
}
|
||||
if (strlen($raw) > $maxBytes) {
|
||||
dbnToolsError('Request body is too large for this tool.', 413, 'body_too_large');
|
||||
}
|
||||
|
||||
$data = json_decode($raw, true);
|
||||
if (!is_array($data)) {
|
||||
dbnToolsError('Request body must be valid JSON.', 400, 'invalid_json');
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
|
||||
function dbnToolsNormalizeLanguage(mixed $value): string
|
||||
{
|
||||
$language = strtolower(trim((string)$value));
|
||||
return in_array($language, ['no', 'en'], true) ? $language : 'en';
|
||||
}
|
||||
|
||||
function dbnToolsString(array $input, string $key, int $maxChars, bool $required = true): string
|
||||
{
|
||||
$value = trim((string)($input[$key] ?? ''));
|
||||
if ($required && $value === '') {
|
||||
dbnToolsAbort("Missing required field: {$key}.", 422, 'missing_field');
|
||||
}
|
||||
if (mb_strlen($value, 'UTF-8') > $maxChars) {
|
||||
dbnToolsAbort("Field {$key} is too long.", 422, 'field_too_long');
|
||||
}
|
||||
return $value;
|
||||
}
|
||||
|
||||
function dbnToolsSupportDir(): string
|
||||
{
|
||||
$dir = dbnToolsEnv('DBN_TOOLS_SUPPORT_DIR');
|
||||
if ($dir === null || trim($dir) === '') {
|
||||
$dir = rtrim(sys_get_temp_dir(), "\\/") . DIRECTORY_SEPARATOR . 'dbn-tools';
|
||||
}
|
||||
|
||||
if (!is_dir($dir)) {
|
||||
@mkdir($dir, 0770, true);
|
||||
}
|
||||
return $dir;
|
||||
}
|
||||
|
||||
function dbnToolsMetadataLogPath(): string
|
||||
{
|
||||
return dbnToolsEnv('DBN_TOOLS_METADATA_LOG') ?: dbnToolsSupportDir() . DIRECTORY_SEPARATOR . 'metadata.jsonl';
|
||||
}
|
||||
|
||||
function dbnToolsLogMetadata(array $entry): void
|
||||
{
|
||||
$path = dbnToolsMetadataLogPath();
|
||||
$safe = [
|
||||
'timestamp' => gmdate('c'),
|
||||
'session' => dbnToolsAnonymousSessionId(),
|
||||
'tool' => (string)($entry['tool'] ?? 'unknown'),
|
||||
'latency_ms' => (int)($entry['latency_ms'] ?? 0),
|
||||
'language' => (string)($entry['language'] ?? ''),
|
||||
'ok' => (bool)($entry['ok'] ?? false),
|
||||
'error_code' => $entry['error_code'] ?? null,
|
||||
'chunk_count' => (int)($entry['chunk_count'] ?? 0),
|
||||
'source_count' => (int)($entry['source_count'] ?? 0),
|
||||
'deployment' => $entry['deployment'] ?? dbnToolsEnv('DBN_AZURE_OPENAI_CHAT_DEPLOYMENT'),
|
||||
];
|
||||
|
||||
@file_put_contents(
|
||||
$path,
|
||||
json_encode($safe, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) . PHP_EOL,
|
||||
FILE_APPEND | LOCK_EX
|
||||
);
|
||||
}
|
||||
|
||||
function dbnToolsWithTelemetry(string $tool, string $language, callable $handler): void
|
||||
{
|
||||
$start = microtime(true);
|
||||
|
||||
try {
|
||||
$payload = $handler();
|
||||
$latency = (int)round((microtime(true) - $start) * 1000);
|
||||
$payload['ok'] = $payload['ok'] ?? true;
|
||||
$payload['latency_ms'] = $latency;
|
||||
|
||||
dbnToolsLogMetadata([
|
||||
'tool' => $tool,
|
||||
'language' => $language,
|
||||
'ok' => true,
|
||||
'latency_ms' => $latency,
|
||||
'chunk_count' => (int)($payload['trace_metadata']['chunk_count'] ?? 0),
|
||||
'source_count' => (int)($payload['trace_metadata']['source_count'] ?? 0),
|
||||
'deployment' => $payload['trace_metadata']['deployment'] ?? null,
|
||||
]);
|
||||
|
||||
dbnToolsRespond($payload);
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
$latency = (int)round((microtime(true) - $start) * 1000);
|
||||
dbnToolsLogMetadata([
|
||||
'tool' => $tool,
|
||||
'language' => $language,
|
||||
'ok' => false,
|
||||
'latency_ms' => $latency,
|
||||
'error_code' => $e->errorCode,
|
||||
]);
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra);
|
||||
} catch (Throwable $e) {
|
||||
$latency = (int)round((microtime(true) - $start) * 1000);
|
||||
dbnToolsLogMetadata([
|
||||
'tool' => $tool,
|
||||
'language' => $language,
|
||||
'ok' => false,
|
||||
'latency_ms' => $latency,
|
||||
'error_code' => 'internal_error',
|
||||
]);
|
||||
error_log('DBN tools error: ' . $e->getMessage());
|
||||
dbnToolsError('The tool could not complete this request.', 500, 'internal_error');
|
||||
}
|
||||
}
|
||||
|
||||
function dbnToolsAiPortalRoot(): string
|
||||
{
|
||||
$root = dbnToolsEnv('DBN_AI_PORTAL_ROOT');
|
||||
if ($root !== null && trim($root) !== '') {
|
||||
return rtrim($root, "\\/");
|
||||
}
|
||||
return dirname(DBN_TOOLS_ROOT) . DIRECTORY_SEPARATOR . 'ai-portal';
|
||||
}
|
||||
|
||||
function dbnToolsBootCaveau(): void
|
||||
{
|
||||
static $booted = false;
|
||||
if ($booted) {
|
||||
return;
|
||||
}
|
||||
|
||||
$root = dbnToolsAiPortalRoot();
|
||||
$dbFile = $root . DIRECTORY_SEPARATOR . 'admin' . DIRECTORY_SEPARATOR . 'includes' . DIRECTORY_SEPARATOR . 'db.php';
|
||||
$ragFile = $root . DIRECTORY_SEPARATOR . 'platform' . DIRECTORY_SEPARATOR . 'includes' . DIRECTORY_SEPARATOR . 'client_rag.php';
|
||||
$agentFile = $root . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'ai' . DIRECTORY_SEPARATOR . 'DbnLegalAgent.php';
|
||||
|
||||
if (!is_file($dbFile) || !is_file($ragFile)) {
|
||||
dbnToolsAbort('CaveauAI platform files are not available. Check DBN_AI_PORTAL_ROOT.', 503, 'caveau_unavailable');
|
||||
}
|
||||
|
||||
require_once $dbFile;
|
||||
require_once $ragFile;
|
||||
if (is_file($agentFile)) {
|
||||
require_once $agentFile;
|
||||
}
|
||||
$booted = true;
|
||||
}
|
||||
|
||||
function dbnToolsDb(): PDO
|
||||
{
|
||||
dbnToolsBootCaveau();
|
||||
try {
|
||||
return getDb();
|
||||
} catch (Throwable $e) {
|
||||
throw new DbnToolsHttpException('CaveauAI database is not reachable.', 503, 'db_unavailable');
|
||||
}
|
||||
}
|
||||
|
||||
function dbnToolsRagDb(): PDO
|
||||
{
|
||||
dbnToolsBootCaveau();
|
||||
try {
|
||||
return getRagDb();
|
||||
} catch (Throwable $e) {
|
||||
throw new DbnToolsHttpException('CaveauAI corpus database is not reachable.', 503, 'rag_db_unavailable');
|
||||
}
|
||||
}
|
||||
|
||||
function dbnToolsClientSlug(): string
|
||||
{
|
||||
return dbnToolsEnv('DBN_CAVEAU_CLIENT_SLUG') ?: 'dave-jr-legal';
|
||||
}
|
||||
|
||||
function dbnToolsFetchClient(?PDO $db = null): ?array
|
||||
{
|
||||
$db = $db ?: dbnToolsDb();
|
||||
$stmt = $db->prepare('SELECT * FROM clients WHERE slug = ? LIMIT 1');
|
||||
$stmt->execute([dbnToolsClientSlug()]);
|
||||
$row = $stmt->fetch(PDO::FETCH_ASSOC);
|
||||
return $row ?: null;
|
||||
}
|
||||
|
||||
function dbnToolsRequireClient(): array
|
||||
{
|
||||
$client = dbnToolsFetchClient();
|
||||
if (!$client || empty($client['is_active'])) {
|
||||
dbnToolsAbort('Dave Jr Legal client tenant is not active or was not found.', 503, 'client_unavailable');
|
||||
}
|
||||
return $client;
|
||||
}
|
||||
|
||||
function dbnToolsFetchPackage(string $slug = 'family-legal', ?PDO $db = null): ?array
|
||||
{
|
||||
$db = $db ?: dbnToolsDb();
|
||||
$stmt = $db->prepare('SELECT * FROM corpus_packages WHERE slug = ? LIMIT 1');
|
||||
$stmt->execute([$slug]);
|
||||
$row = $stmt->fetch(PDO::FETCH_ASSOC);
|
||||
return $row ?: null;
|
||||
}
|
||||
|
||||
function dbnToolsHasActiveSubscription(int $clientId, int $packageId, ?PDO $db = null): bool
|
||||
{
|
||||
$db = $db ?: dbnToolsDb();
|
||||
$stmt = $db->prepare(
|
||||
'SELECT COUNT(*) FROM client_corpus_subscriptions
|
||||
WHERE client_id = ? AND package_id = ? AND is_active = 1'
|
||||
);
|
||||
$stmt->execute([$clientId, $packageId]);
|
||||
return (int)$stmt->fetchColumn() > 0;
|
||||
}
|
||||
|
||||
function dbnToolsDisclaimer(string $language): string
|
||||
{
|
||||
if ($language === 'no') {
|
||||
return 'Juridisk informasjon og forberedelsesstøtte, ikke endelig juridisk rådgivning.';
|
||||
}
|
||||
return 'Legal information and preparation support, not final legal advice.';
|
||||
}
|
||||
|
||||
function dbnToolsExcerpt(string $text, int $limit = 520): string
|
||||
{
|
||||
$text = preg_replace('/\s+/u', ' ', strip_tags($text)) ?? '';
|
||||
$text = trim($text);
|
||||
if (mb_strlen($text, 'UTF-8') <= $limit) {
|
||||
return $text;
|
||||
}
|
||||
return rtrim(mb_substr($text, 0, $limit - 1, 'UTF-8')) . '…';
|
||||
}
|
||||
Reference in New Issue
Block a user