feat(rag): add doc-summary pre-filtering to DbnLegalToolsService::search
Before chunk retrieval, embed the query against bnl_doc_summaries Qdrant collection to identify the most semantically relevant documents. The resulting document IDs are passed as shared_doc_ids to searchAll(), narrowing the shared-corpus chunk search to those documents only. Applied to the 'shared' and 'both' scope paths (not 'private', which has no shared corpus). Non-fatal: on any error preFilterDocIds stays empty and search falls back to current unfiltered chunk retrieval. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -70,6 +70,18 @@ final class DbnLegalToolsService
|
|||||||
// Retrieval still works in keyword mode without gateway config.
|
// Retrieval still works in keyword mode without gateway config.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Doc-summary pre-pass: embed query → search bnl_doc_summaries → narrow shared chunks
|
||||||
|
// to the most semantically relevant documents. Non-fatal: empty = no filter applied.
|
||||||
|
$preFilterDocIds = [];
|
||||||
|
if ($scope !== 'private') {
|
||||||
|
try {
|
||||||
|
$preRag = new ClientRagPipeline((int)$client['id'], $gatewayUrl, 30);
|
||||||
|
$preFilterDocIds = $preRag->searchDocSummaries($query);
|
||||||
|
} catch (Throwable) {
|
||||||
|
// fall through — unfiltered chunk search
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ($scope === 'private') {
|
if ($scope === 'private') {
|
||||||
// Search only the user's personal corpus
|
// Search only the user's personal corpus
|
||||||
if ($personalClientId > 0) {
|
if ($personalClientId > 0) {
|
||||||
@@ -93,6 +105,7 @@ final class DbnLegalToolsService
|
|||||||
'search_method' => $searchMethod,
|
'search_method' => $searchMethod,
|
||||||
'min_private' => 0,
|
'min_private' => 0,
|
||||||
'include_beta_website' => true,
|
'include_beta_website' => true,
|
||||||
|
'shared_doc_ids' => $preFilterDocIds ?: null,
|
||||||
]));
|
]));
|
||||||
} else {
|
} else {
|
||||||
// 'both': shared library + personal corpus merged and re-ranked by score
|
// 'both': shared library + personal corpus merged and re-ranked by score
|
||||||
@@ -105,6 +118,7 @@ final class DbnLegalToolsService
|
|||||||
'search_method' => $searchMethod,
|
'search_method' => $searchMethod,
|
||||||
'min_private' => 0,
|
'min_private' => 0,
|
||||||
'include_beta_website' => true,
|
'include_beta_website' => true,
|
||||||
|
'shared_doc_ids' => $preFilterDocIds ?: null,
|
||||||
]));
|
]));
|
||||||
|
|
||||||
$privateChunks = [];
|
$privateChunks = [];
|
||||||
|
|||||||
Reference in New Issue
Block a user