azure = $azure ?: new DbnAzureOpenAiGateway(); } public function search(string $query, string $language = 'en', int $limit = 6): array { $query = trim($query); if (mb_strlen($query, 'UTF-8') < 3) { dbnToolsAbort('Search query must be at least 3 characters.', 422, 'query_too_short'); } $limit = max(1, min(10, $limit)); $trace = [ $this->trace('Query interpretation', 'Searching Dave Jr Legal private corpus plus the subscribed family-legal package.', 'complete'), $this->trace('Search tools used', 'ClientRagPipeline::searchAll with keyword mode, private corpus enabled, shared package filter set to family-legal.', 'running'), ]; $client = dbnToolsRequireClient(); $package = $this->requireFamilyPackage((int)$client['id']); $chunks = []; $retrievalNote = 'ClientRagPipeline keyword retrieval'; try { dbnToolsBootCaveau(); $gatewayUrl = 'http://10.0.1.10:4000'; try { $config = getConfig(); $configured = trim((string)($config['ai_gateway']['url'] ?? '')); if ($configured !== '') { $gatewayUrl = $configured; } } catch (Throwable $e) { // Retrieval still works in keyword mode without gateway config. } $rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30); $chunks = $rag->searchAll($query, $limit, null, [ 'search_private' => true, 'search_shared' => true, 'package_ids' => [(int)$package['id']], 'chunk_limit' => $limit, 'search_method' => 'keyword', 'min_private' => 0, 'include_beta_website' => true, ]); } catch (Throwable $e) { $retrievalNote = 'SQL keyword fallback after ClientRagPipeline error'; $trace[] = $this->trace('Search fallback', 'Pipeline retrieval failed; using direct SQL keyword fallback without storing the query.', 'warning'); $chunks = $this->fallbackKeywordSearch((int)$client['id'], $package, $query, $limit); } if (!$chunks) { $fallback = $this->fallbackKeywordSearch((int)$client['id'], $package, $query, $limit); if ($fallback) { $chunks = $fallback; $retrievalNote = 'SQL keyword fallback'; } } $hits = array_map(fn(array $chunk): array => $this->sourceFromChunk($chunk), array_slice($chunks, 0, $limit)); $confidence = $this->citationConfidence($hits); $trace[1] = $this->trace('Search tools used', $retrievalNote . '; returned ' . count($hits) . ' source hit(s).', 'complete'); $trace[] = $this->trace('Evidence found', count($hits) ? 'Retrieved source excerpts for review.' : 'No matching source excerpts were found.', count($hits) ? 'complete' : 'warning'); $trace[] = $this->trace('Citation confidence', ucfirst($confidence) . ' confidence based on source count and retrieval scores.', $confidence === 'low' ? 'warning' : 'complete'); return [ 'tool' => 'search', 'language' => $language, 'what_we_found' => count($hits) ? 'Found source excerpts from the legal corpus.' : 'No matching source excerpts were found.', 'hits' => $hits, 'evidence_trail' => $hits, 'what_remains_uncertain' => count($hits) ? 'Search results still need human review for legal relevance and currentness.' : 'The corpus may not contain enough evidence for this query.', 'next_practical_step' => count($hits) ? 'Open the strongest sources and confirm the cited sections before relying on them.' : 'Try a narrower query with statutory terms, party names, or dates.', 'trace' => $trace, 'trace_metadata' => [ 'chunk_count' => count($chunks), 'source_count' => count($hits), 'deployment' => null, 'citation_confidence' => $confidence, ], 'disclaimer' => dbnToolsDisclaimer($language), ]; } public function ask(string $question, string $language = 'en'): array { $search = $this->search($question, $language, 7); $hits = $search['hits']; $trace = $search['trace']; if (!$hits) { $trace[] = $this->trace('Synthesis', 'Skipped answer synthesis because no evidence was found.', 'warning'); return [ 'tool' => 'ask', 'language' => $language, 'answer' => $language === 'no' ? 'Jeg fant ikke nok kildestøtte i familie-rettskorpuset til å svare sikkert.' : 'I did not find enough source support in the family-law corpus to answer safely.', 'what_we_found' => $search['what_we_found'], 'evidence_trail' => [], 'what_remains_uncertain' => $search['what_remains_uncertain'], 'next_practical_step' => $search['next_practical_step'], 'trace' => $trace, 'trace_metadata' => [ 'chunk_count' => 0, 'source_count' => 0, 'deployment' => null, 'citation_confidence' => 'low', ], 'disclaimer' => dbnToolsDisclaimer($language), ]; } $this->azure->requireChat(); $context = $this->buildEvidenceContext($hits); $locale = $language === 'no' ? 'Norwegian' : 'English'; $prompt = <<legalJsonSystemPrompt($language); $raw = $this->azure->chatText([ ['role' => 'system', 'content' => $system], ['role' => 'user', 'content' => $prompt], ], [ 'json' => true, 'temperature' => 0.15, 'max_tokens' => 1300, ]); $json = $this->azure->decodeJsonObject($raw); if (!$json) { $json = [ 'answer' => $raw, 'what_we_found' => 'Azure returned a plain-text answer based on the retrieved excerpts.', 'evidence_trail' => [], 'what_remains_uncertain' => ['The response format could not be validated as structured JSON.'], 'next_practical_step' => 'Review the source excerpts manually before relying on the answer.', ]; } $trace[] = $this->trace('Synthesis', 'Azure OpenAI generated an answer using only the retrieved source excerpts.', 'complete'); $trace[] = $this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($json['what_remains_uncertain'] ?? []), 'complete'); $trace[] = $this->trace('Next practical step', (string)($json['next_practical_step'] ?? 'Review the evidence trail.'), 'complete'); return [ 'tool' => 'ask', 'language' => $language, 'answer' => (string)($json['answer'] ?? ''), 'what_we_found' => (string)($json['what_we_found'] ?? ''), 'evidence_trail' => $hits, 'citation_notes' => $this->normalizeEvidenceTrail($json['evidence_trail'] ?? [], $hits), 'sources' => $hits, 'what_remains_uncertain' => $json['what_remains_uncertain'] ?? [], 'next_practical_step' => (string)($json['next_practical_step'] ?? ''), 'trace' => $trace, 'trace_metadata' => [ 'chunk_count' => count($hits), 'source_count' => count($hits), 'deployment' => $this->azure->chatDeployment(), 'citation_confidence' => $search['trace_metadata']['citation_confidence'] ?? 'medium', ], 'disclaimer' => dbnToolsDisclaimer($language), ]; } public function summarize(string $text, string $language = 'en'): array { $text = $this->requirePasteText($text); $this->azure->requireChat(); $locale = $language === 'no' ? 'Norwegian' : 'English'; $prompt = <<runJsonTool($prompt, $language, 1300); $trace = [ $this->trace('Query interpretation', 'Summarize pasted text without saving the text or output.', 'complete'), $this->trace('Search tools used', 'No external corpus search; source is the user-pasted text.', 'complete'), $this->trace('Evidence found', 'Evidence trail is limited to the pasted text supplied in this request.', 'complete'), $this->trace('Citation confidence', 'Medium confidence for factual extraction; no external legal source verification was performed.', 'warning'), $this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($json['what_remains_uncertain'] ?? []), 'complete'), $this->trace('Next practical step', (string)($json['next_practical_step'] ?? 'Review the summary against the original text.'), 'complete'), ]; return [ 'tool' => 'summarize', 'language' => $language, 'what_we_found' => (string)($json['what_we_found'] ?? ''), 'key_facts' => $json['key_facts'] ?? [], 'dates' => $json['dates'] ?? [], 'parties' => $json['parties'] ?? [], 'legal_references_detected' => $json['legal_references_detected'] ?? [], 'evidence_trail' => [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; not stored.']], 'what_remains_uncertain' => $json['what_remains_uncertain'] ?? [], 'next_practical_step' => (string)($json['next_practical_step'] ?? ''), 'trace' => $trace, 'trace_metadata' => [ 'chunk_count' => 1, 'source_count' => 1, 'deployment' => $this->azure->chatDeployment(), ], 'disclaimer' => dbnToolsDisclaimer($language), ]; } public function timeline(string $text, string $language = 'en'): array { $text = $this->requirePasteText($text); $this->azure->requireChat(); $locale = $language === 'no' ? 'Norwegian' : 'English'; $prompt = <<runJsonTool($prompt, $language, 1600); $events = is_array($json['events'] ?? null) ? $json['events'] : []; $trace = [ $this->trace('Query interpretation', 'Extract dated events from pasted text without saving the text or output.', 'complete'), $this->trace('Search tools used', 'No external corpus search; source is the user-pasted text.', 'complete'), $this->trace('Evidence found', count($events) . ' event(s) identified.', count($events) ? 'complete' : 'warning'), $this->trace('Citation confidence', 'Confidence is per event and based only on the pasted text.', 'complete'), $this->trace('Uncertainty / missing evidence', $this->uncertaintySummary($json['what_remains_uncertain'] ?? []), 'complete'), $this->trace('Next practical step', (string)($json['next_practical_step'] ?? 'Verify dates against original documents.'), 'complete'), ]; return [ 'tool' => 'timeline', 'language' => $language, 'what_we_found' => (string)($json['what_we_found'] ?? ''), 'events' => $events, 'evidence_trail' => $json['evidence_trail'] ?? [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; not stored.']], 'what_remains_uncertain' => $json['what_remains_uncertain'] ?? [], 'next_practical_step' => (string)($json['next_practical_step'] ?? ''), 'trace' => $trace, 'trace_metadata' => [ 'chunk_count' => count($events), 'source_count' => 1, 'deployment' => $this->azure->chatDeployment(), ], 'disclaimer' => dbnToolsDisclaimer($language), ]; } public function redact(string $text, string $mode = 'standard'): array { $text = $this->requirePasteText($text); $mode = $mode === 'strict' ? 'strict' : 'standard'; [$redacted, $entities] = $this->deterministicRedaction($text, $mode); $categories = array_keys(array_filter($entities, fn(int $count): bool => $count > 0)); $trace = [ $this->trace('Query interpretation', 'Detect and redact sensitive identifiers from pasted text.', 'complete'), $this->trace('Search tools used', 'Deterministic Norwegian privacy patterns first; no text was stored.', 'complete'), $this->trace('Evidence found', count($categories) ? 'Detected categories: ' . implode(', ', $categories) . '.' : 'No deterministic sensitive categories were detected.', count($categories) ? 'complete' : 'warning'), $this->trace('Citation confidence', 'High for emails and fødselsnummer-like values; medium for addresses and names.', 'complete'), $this->trace('Uncertainty / missing evidence', 'Contextual names may need human review, especially in standard mode.', 'warning'), $this->trace('Next practical step', 'Review the redacted output before sharing it outside the case team.', 'complete'), ]; return [ 'tool' => 'redact', 'mode' => $mode, 'what_we_found' => 'Redacted deterministic privacy patterns from the pasted text.', 'redacted_text' => $redacted, 'detected_entity_categories' => $categories, 'entity_counts' => $entities, 'evidence_trail' => [['title' => 'Pasted text', 'excerpt' => 'Processed in-memory only; not stored.']], 'what_remains_uncertain' => ['Human review is still needed for names that depend on case context.'], 'next_practical_step' => 'Review the output and rerun in strict mode if the text will be shared broadly.', 'trace' => $trace, 'trace_metadata' => [ 'chunk_count' => 1, 'source_count' => 1, 'deployment' => null, ], 'disclaimer' => 'Privacy support tool. Review before disclosure.', ]; } private function requireFamilyPackage(int $clientId): array { $package = dbnToolsFetchPackage('family-legal'); if (!$package || empty($package['is_active'])) { dbnToolsAbort('The family-legal corpus package is not active.', 503, 'package_unavailable'); } if (!dbnToolsHasActiveSubscription($clientId, (int)$package['id'])) { dbnToolsAbort('Dave Jr Legal does not have an active family-legal subscription.', 503, 'subscription_missing'); } return $package; } private function runJsonTool(string $prompt, string $language, int $maxTokens): array { $raw = $this->azure->chatText([ ['role' => 'system', 'content' => $this->legalJsonSystemPrompt($language)], ['role' => 'user', 'content' => $prompt], ], [ 'json' => true, 'temperature' => 0.1, 'max_tokens' => $maxTokens, ]); $json = $this->azure->decodeJsonObject($raw); if (!$json) { dbnToolsAbort('Azure OpenAI did not return valid structured JSON.', 502, 'azure_invalid_json'); } return $json; } private function legalJsonSystemPrompt(string $language): string { $locale = $language === 'no' ? 'Norwegian' : 'English'; return << $hit) { $n = $idx + 1; $lines[] = "[{$n}] Title: " . ($hit['title'] ?? 'Untitled'); if (!empty($hit['section'])) { $lines[] = "Section: " . $hit['section']; } $lines[] = "Corpus/package: " . ($hit['package_or_corpus'] ?? 'unknown'); $lines[] = "Excerpt: " . ($hit['excerpt'] ?? ''); } return implode("\n", $lines); } private function normalizeEvidenceTrail(mixed $trail, array $hits): array { if (!is_array($trail) || !$trail) { return array_map(fn(array $hit): array => [ 'title' => $hit['title'], 'citation' => $hit['title'], 'why_it_matters' => dbnToolsExcerpt($hit['excerpt'], 180), ], array_slice($hits, 0, 4)); } return array_values(array_filter($trail, 'is_array')); } private function sourceFromChunk(array $chunk): array { $title = (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source'); $score = isset($chunk['similarity']) ? round((float)$chunk['similarity'], 4) : null; return [ 'title' => $title, 'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620), 'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Dave Jr Legal'), 'score' => $score, 'document_id' => isset($chunk['document_id']) ? (int)$chunk['document_id'] : null, 'chunk_id' => isset($chunk['id']) ? (int)$chunk['id'] : null, 'section' => $chunk['section_title'] ?? null, 'authority_type' => $chunk['authority_type'] ?? null, 'jurisdiction' => $chunk['jurisdiction'] ?? null, ]; } private function citationConfidence(array $hits): string { if (!$hits) { return 'low'; } $scores = array_values(array_filter(array_map(fn(array $h) => $h['score'] ?? null, $hits), 'is_numeric')); $best = $scores ? max($scores) : 0; if (count($hits) >= 3 && $best >= 0.35) { return 'high'; } if (count($hits) >= 1) { return 'medium'; } return 'low'; } private function fallbackKeywordSearch(int $clientId, array $package, string $query, int $limit): array { $results = []; try { $results = array_merge($results, $this->fallbackPrivateSearch($clientId, $query, $limit)); } catch (Throwable $e) { error_log('DBN tools private fallback failed: ' . $e->getMessage()); } try { $remaining = max(1, $limit - count($results)); $results = array_merge($results, $this->fallbackSharedSearch($package, $query, $remaining)); } catch (Throwable $e) { error_log('DBN tools shared fallback failed: ' . $e->getMessage()); } return array_slice($results, 0, $limit); } private function fallbackPrivateSearch(int $clientId, string $query, int $limit): array { $db = dbnToolsDb(); $terms = $this->searchTerms($query); if (!$terms) { return []; } $clauses = []; $params = [':client_id' => $clientId]; foreach ($terms as $i => $term) { $key = ':term' . $i; $clauses[] = "(cc.content LIKE {$key} OR cd.title LIKE {$key})"; $params[$key] = '%' . $term . '%'; } $sql = 'SELECT cc.id, cc.document_id, cc.content, cd.title AS document_title, cd.category FROM client_chunks cc JOIN client_documents cd ON cc.document_id = cd.id WHERE cc.client_id = :client_id AND cd.status = "ready" AND (' . implode(' OR ', $clauses) . ') LIMIT ' . (int)$limit; $stmt = $db->prepare($sql); $stmt->execute($params); $rows = $stmt->fetchAll(PDO::FETCH_ASSOC); foreach ($rows as &$row) { $row['similarity'] = 0.25; $row['source_name'] = 'Dave Jr Legal private corpus'; $row['source_type'] = 'private'; } return $rows; } private function fallbackSharedSearch(array $package, string $query, int $limit): array { $ragDb = dbnToolsRagDb(); $terms = $this->searchTerms($query); if (!$terms) { return []; } $where = ['d.status = "ready"']; $params = []; if (!empty($package['corpus_id'])) { $where[] = 'd.corpus_id = ?'; $params[] = (int)$package['corpus_id']; } $cats = json_decode((string)($package['category_filter'] ?? '[]'), true) ?: []; if ($cats) { $where[] = 'd.category IN (' . implode(',', array_fill(0, count($cats), '?')) . ')'; $params = array_merge($params, $cats); } $langs = json_decode((string)($package['language_filter'] ?? '[]'), true) ?: []; if ($langs) { $where[] = 'd.language IN (' . implode(',', array_fill(0, count($langs), '?')) . ')'; $params = array_merge($params, $langs); } $termClauses = []; foreach ($terms as $term) { $termClauses[] = '(c.content LIKE ? OR d.title LIKE ?)'; $params[] = '%' . $term . '%'; $params[] = '%' . $term . '%'; } $where[] = '(' . implode(' OR ', $termClauses) . ')'; $sql = 'SELECT c.id, c.document_id, c.content, c.section_title, d.title AS document_title, d.category, d.language FROM chunks c JOIN documents d ON c.document_id = d.id WHERE ' . implode(' AND ', $where) . ' LIMIT ' . (int)$limit; $stmt = $ragDb->prepare($sql); $stmt->execute($params); $rows = $stmt->fetchAll(PDO::FETCH_ASSOC); foreach ($rows as &$row) { $row['similarity'] = 0.2; $row['source_name'] = (string)($package['name'] ?? 'family-legal'); $row['source_type'] = 'package'; } return $rows; } private function searchTerms(string $query): array { $parts = preg_split('/[^\p{L}\p{N}]+/u', mb_strtolower($query, 'UTF-8')) ?: []; $stop = ['the', 'and', 'for', 'with', 'that', 'this', 'hva', 'har', 'kan', 'jeg', 'som', 'det', 'med', 'til', 'og']; $terms = []; foreach ($parts as $part) { if (mb_strlen($part, 'UTF-8') < 3 || in_array($part, $stop, true)) { continue; } $terms[] = $part; } return array_slice(array_values(array_unique($terms)), 0, 6); } private function requirePasteText(string $text): string { $text = trim($text); if (mb_strlen($text, 'UTF-8') < 20) { dbnToolsAbort('Paste at least 20 characters of text.', 422, 'text_too_short'); } if (mb_strlen($text, 'UTF-8') > self::MAX_PASTE_CHARS) { dbnToolsAbort('Pasted text is too long for the MVP limit.', 422, 'text_too_long'); } return $text; } private function deterministicRedaction(string $text, string $mode): array { $counts = [ 'email' => 0, 'phone' => 0, 'fødselsnummer' => 0, 'address' => 0, 'person_or_child_name' => 0, ]; $replace = function (string $pattern, string $category, string $token) use (&$text, &$counts): void { $text = preg_replace_callback($pattern, function () use (&$counts, $category, $token): string { $counts[$category]++; return $token; }, $text) ?? $text; }; $replace('/\b[A-Z0-9._%+\-]+@[A-Z0-9.\-]+\.[A-Z]{2,}\b/i', 'email', '[EMAIL]'); $replace('/(? $label, 'detail' => $detail, 'status' => $status, ]; } }