From 8b99ceec3b4d9d51c136a5cb3fe00126e40704e7 Mon Sep 17 00:00:00 2001 From: davegilligan Date: Wed, 3 Jun 2026 10:15:57 +0200 Subject: [PATCH] feat(rag): add doc-summary pre-filtering to DbnLegalToolsService::search Before chunk retrieval, embed the query against bnl_doc_summaries Qdrant collection to identify the most semantically relevant documents. The resulting document IDs are passed as shared_doc_ids to searchAll(), narrowing the shared-corpus chunk search to those documents only. Applied to the 'shared' and 'both' scope paths (not 'private', which has no shared corpus). Non-fatal: on any error preFilterDocIds stays empty and search falls back to current unfiltered chunk retrieval. Co-Authored-By: Claude Sonnet 4.6 --- includes/LegalTools.php | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/includes/LegalTools.php b/includes/LegalTools.php index 9edc780..16854a2 100644 --- a/includes/LegalTools.php +++ b/includes/LegalTools.php @@ -70,6 +70,18 @@ final class DbnLegalToolsService // Retrieval still works in keyword mode without gateway config. } + // Doc-summary pre-pass: embed query → search bnl_doc_summaries → narrow shared chunks + // to the most semantically relevant documents. Non-fatal: empty = no filter applied. + $preFilterDocIds = []; + if ($scope !== 'private') { + try { + $preRag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30); + $preFilterDocIds = $preRag->searchDocSummaries($query); + } catch (Throwable) { + // fall through — unfiltered chunk search + } + } + if ($scope === 'private') { // Search only the user's personal corpus if ($personalClientId > 0) { @@ -93,6 +105,7 @@ final class DbnLegalToolsService 'search_method' => $searchMethod, 'min_private' => 0, 'include_beta_website' => true, + 'shared_doc_ids' => $preFilterDocIds ?: null, ])); } else { // 'both': shared library + personal corpus merged and re-ranked by score @@ -105,6 +118,7 @@ final class DbnLegalToolsService 'search_method' => $searchMethod, 'min_private' => 0, 'include_beta_website' => true, + 'shared_doc_ids' => $preFilterDocIds ?: null, ])); $privateChunks = [];