feat(tools): persona-driven multi-domain corpus + model routing

Generalize the family-locked legal tools into caveauAI persona profiles
(client 57 chat profiles, resolved in-process via the chat_profiles bridge).
Each tool accepts an optional `profile` slug that scopes the corpus package(s),
search method, system prompt and synthesis model; omitting it falls back to the
family-legal package so existing behaviour is unchanged.

- dbnToolsResolvePersona / dbnToolsListPersonas / dbnToolsBootChatProfiles in
  bootstrap.php; new api/personas.php + dbn.list_personas MCP tool.
- LegalTools search/ask/corpusContextForSummarize and the BvjAnalyzer /
  LegalAnalysis / translate paths take the persona's packages + prompt + model.
- Persona <select> on ask/search/summarize (populated from api/personas.php).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-01 20:49:58 +02:00
parent 5a0ef89dca
commit 662fbf7d6d
16 changed files with 404 additions and 58 deletions
+69 -30
View File
@@ -23,7 +23,8 @@ final class DbnLegalToolsService
int $limit = 6,
string $temporalMode = 'disabled',
?string $asOfDate = null,
string $scope = 'both'
string $scope = 'both',
?string $persona = null
): array {
$query = trim($query);
if (mb_strlen($query, 'UTF-8') < 3) {
@@ -44,7 +45,11 @@ final class DbnLegalToolsService
];
$client = dbnToolsRequireClient();
$package = $this->requireFamilyPackage((int)$client['id']);
$personaResolved = dbnToolsResolvePersona((int)$client['id'], $persona);
$package = $personaResolved['package'] ?? $this->requireFamilyPackage((int)$client['id']);
$packageIds = $personaResolved['package_ids'] ?: [(int)$package['id']];
$personaRagOpts = is_array($personaResolved['rag_opts'] ?? null) ? $personaResolved['rag_opts'] : [];
$searchMethod = (string)($personaResolved['search_method'] ?? 'keyword') ?: 'keyword';
// Personal corpus client_id from session (may be 0 if user has no linked workspace)
$personalClientId = (int)($_SESSION['dbn_tools_client_id'] ?? 0);
@@ -68,50 +73,50 @@ final class DbnLegalToolsService
// Search only the user's personal corpus
if ($personalClientId > 0) {
$rag = new ClientRagPipeline($personalClientId, $gatewayUrl, 30);
$chunks = $rag->searchAll($query, $limit, null, [
$chunks = $rag->searchAll($query, $limit, null, array_merge($personaRagOpts, [
'search_private' => true,
'search_shared' => false,
'chunk_limit' => $limit,
'search_method' => 'keyword',
'search_method' => $searchMethod,
'min_private' => 0,
]);
]));
}
} elseif ($scope === 'shared') {
// Search only the shared legal library
// Search only the shared legal library (persona-scoped packages)
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
$chunks = $rag->searchAll($query, $limit, null, [
$chunks = $rag->searchAll($query, $limit, null, array_merge($personaRagOpts, [
'search_private' => true,
'search_shared' => true,
'package_ids' => [(int)$package['id']],
'package_ids' => $packageIds,
'chunk_limit' => $limit,
'search_method' => 'keyword',
'search_method' => $searchMethod,
'min_private' => 0,
'include_beta_website' => true,
]);
]));
} else {
// 'both': shared library + personal corpus merged and re-ranked by score
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
$sharedChunks = $rag->searchAll($query, $limit, null, [
$sharedChunks = $rag->searchAll($query, $limit, null, array_merge($personaRagOpts, [
'search_private' => true,
'search_shared' => true,
'package_ids' => [(int)$package['id']],
'package_ids' => $packageIds,
'chunk_limit' => $limit,
'search_method' => 'keyword',
'search_method' => $searchMethod,
'min_private' => 0,
'include_beta_website' => true,
]);
]));
$privateChunks = [];
if ($personalClientId > 0) {
try {
$ragPrivate = new ClientRagPipeline($personalClientId, $gatewayUrl, 30);
$privateChunks = $ragPrivate->searchAll($query, $limit, null, [
$privateChunks = $ragPrivate->searchAll($query, $limit, null, array_merge($personaRagOpts, [
'search_private' => true,
'search_shared' => false,
'chunk_limit' => $limit,
'search_method' => 'keyword',
'search_method' => $searchMethod,
'min_private' => 0,
]);
]));
} catch (Throwable $e) {
error_log('[search] personal corpus query failed for client ' . $personalClientId . ': ' . $e->getMessage());
}
@@ -183,15 +188,19 @@ final class DbnLegalToolsService
'source_count' => count($hits),
'deployment' => null,
'citation_confidence' => $confidence,
'persona' => $personaResolved['slug'] ?? null,
'persona_source' => $personaResolved['source'] ?? null,
],
'disclaimer' => dbnToolsDisclaimer($language),
];
}
public function ask(string $question, string $language = 'en', string $engine = 'azure_mini'): array
public function ask(string $question, string $language = 'en', string $engine = 'azure_mini', ?string $persona = null): array
{
$engine = in_array($engine, ['azure_mini', 'azure_full'], true) ? $engine : 'azure_mini';
$search = $this->search($question, $language, 7);
$client = dbnToolsRequireClient();
$personaResolved = dbnToolsResolvePersona((int)$client['id'], $persona);
$search = $this->search($question, $language, 7, 'disabled', null, 'both', $personaResolved['slug']);
$hits = $search['hits'];
$trace = $search['trace'];
@@ -221,7 +230,8 @@ final class DbnLegalToolsService
];
}
$this->azure->requireChat();
[$gateway, $personaModel] = $this->personaGateway($personaResolved, $engine);
$gateway->requireChat();
$context = $this->buildEvidenceContext($hits);
$locale = dbnToolsLanguageName($language);
@@ -242,9 +252,14 @@ Return JSON only with these keys:
}
PROMPT;
// Persona voice/domain prepended to the JSON-enforcing scaffold (keeps the
// structured-output contract while applying the persona's legal framing).
$system = $this->legalJsonSystemPrompt($language);
$askDeployment = ($engine === 'azure_full') ? 'gpt-4o' : 'gpt-4o-mini';
$raw = $this->azure->withDeployment($askDeployment)->chatText([
if (!empty($personaResolved['system_prompt'])) {
$system = $personaResolved['system_prompt'] . "\n\n" . $system;
}
$askDeployment = $personaModel;
$raw = $gateway->withDeployment($askDeployment)->chatText([
['role' => 'system', 'content' => $system],
['role' => 'user', 'content' => $prompt],
], [
@@ -253,7 +268,7 @@ PROMPT;
'max_tokens' => 1300,
]);
$json = $this->azure->decodeJsonObject($raw);
$json = $gateway->decodeJsonObject($raw);
if (!$json) {
$json = [
'answer' => $raw,
@@ -1156,6 +1171,26 @@ PROMPT;
return $package;
}
/**
* Pick the synthesis gateway + model for a persona.
* - Persona pins a model (e.g. dbn-legal-agent-v3, gpt-4o) → route via LiteLLM
* so any model registered on the gateway is reachable.
* - No pinned model → existing Azure routing (gpt-4o / gpt-4o-mini by engine).
* @return array{0: DbnAzureOpenAiGateway|DbnBedrockGateway, 1: string}
*/
private function personaGateway(array $persona, string $engine): array
{
$model = trim((string)($persona['model'] ?? ''));
if ($model !== '') {
try {
return [new DbnBedrockGateway(['chat_model_name' => $model]), $model];
} catch (Throwable $e) {
error_log('[dbn-persona] gateway init failed for model ' . $model . ': ' . $e->getMessage());
}
}
return [$this->azure, ($engine === 'azure_full') ? 'gpt-4o' : 'gpt-4o-mini'];
}
private function runJsonTool(string $prompt, string $language, int $maxTokens): array
{
$raw = $this->azure->chatText([
@@ -1726,11 +1761,15 @@ PROMPT;
* Search the shared legal corpus and return top-N passages as a formatted
* context string. Returns '' on failure so the caller can degrade gracefully.
*/
public function corpusContextForSummarize(string $query, int $limit = 8): string
public function corpusContextForSummarize(string $query, int $limit = 8, ?string $persona = null): string
{
try {
$client = dbnToolsRequireClient();
$package = $this->requireFamilyPackage((int)$client['id']);
$client = dbnToolsRequireClient();
$personaResolved = dbnToolsResolvePersona((int)$client['id'], $persona);
$package = $personaResolved['package'] ?? $this->requireFamilyPackage((int)$client['id']);
$packageIds = $personaResolved['package_ids'] ?: [(int)$package['id']];
$searchMethod = (string)($personaResolved['search_method'] ?? 'keyword') ?: 'keyword';
$personaRagOpts = is_array($personaResolved['rag_opts'] ?? null) ? $personaResolved['rag_opts'] : [];
dbnToolsBootCaveau();
$gatewayUrl = 'http://10.0.1.10:4000';
try {
@@ -1739,15 +1778,15 @@ PROMPT;
if ($u !== '') $gatewayUrl = $u;
} catch (Throwable) {}
$rag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 20);
$chunks = $rag->searchAll($query, $limit, null, [
$chunks = $rag->searchAll($query, $limit, null, array_merge($personaRagOpts, [
'search_private' => true,
'search_shared' => true,
'package_ids' => [(int)$package['id']],
'package_ids' => $packageIds,
'chunk_limit' => $limit,
'search_method' => 'keyword',
'search_method' => $searchMethod,
'min_private' => 0,
'include_beta_website' => true,
]);
]));
$parts = [];
foreach ($chunks as $c) {
$title = (string)($c['title'] ?? ($c['source'] ?? 'Legal source'));