Exclude dobetternorge.no docs from all corpus search modes
BM25: adds NOT LIKE filter to SQL WHERE in both FULLTEXT and LIKE paths. Hybrid + Vector: post-filter hits array by source_url after results return. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,8 @@ $language = dbnToolsNormalizeLanguage($input['language'] ?? 'en');
|
|||||||
$limit = max(1, min(20, (int)($input['limit'] ?? 8)));
|
$limit = max(1, min(20, (int)($input['limit'] ?? 8)));
|
||||||
$category = isset($input['category']) && $input['category'] !== '' ? trim((string)$input['category']) : null;
|
$category = isset($input['category']) && $input['category'] !== '' ? trim((string)$input['category']) : null;
|
||||||
|
|
||||||
|
const EXCLUDED_DOMAIN = 'dobetternorge.no';
|
||||||
|
|
||||||
if (mb_strlen($query, 'UTF-8') < 3) {
|
if (mb_strlen($query, 'UTF-8') < 3) {
|
||||||
dbnToolsError('Query must be at least 3 characters.', 422, 'query_too_short');
|
dbnToolsError('Query must be at least 3 characters.', 422, 'query_too_short');
|
||||||
}
|
}
|
||||||
@@ -33,6 +35,7 @@ try {
|
|||||||
'source_url' => $h['source_url'] ?? null,
|
'source_url' => $h['source_url'] ?? null,
|
||||||
'language' => null,
|
'language' => null,
|
||||||
], $result['hits'] ?? []);
|
], $result['hits'] ?? []);
|
||||||
|
$hits = array_values(array_filter($hits, fn($h) => !str_contains($h['source_url'] ?? '', EXCLUDED_DOMAIN)));
|
||||||
dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'hybrid', 'query' => $query]);
|
dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'hybrid', 'query' => $query]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -41,6 +44,7 @@ try {
|
|||||||
// ── BM25: FULLTEXT with LIKE fallback ───────────────────────────────────
|
// ── BM25: FULLTEXT with LIKE fallback ───────────────────────────────────
|
||||||
if ($mode === 'bm25') {
|
if ($mode === 'bm25') {
|
||||||
$catClause = $category !== null ? ' AND d.category = ?' : '';
|
$catClause = $category !== null ? ' AND d.category = ?' : '';
|
||||||
|
$excludeLike = '%' . EXCLUDED_DOMAIN . '%';
|
||||||
|
|
||||||
// Try FULLTEXT index first
|
// Try FULLTEXT index first
|
||||||
try {
|
try {
|
||||||
@@ -52,10 +56,11 @@ try {
|
|||||||
JOIN documents d ON c.document_id = d.id
|
JOIN documents d ON c.document_id = d.id
|
||||||
WHERE d.corpus_id = ? AND d.status = 'ready'
|
WHERE d.corpus_id = ? AND d.status = 'ready'
|
||||||
AND MATCH(c.content) AGAINST (? IN BOOLEAN MODE) > 0
|
AND MATCH(c.content) AGAINST (? IN BOOLEAN MODE) > 0
|
||||||
|
AND d.source_url NOT LIKE ?
|
||||||
$catClause
|
$catClause
|
||||||
ORDER BY score DESC
|
ORDER BY score DESC
|
||||||
LIMIT ?";
|
LIMIT ?";
|
||||||
$params = [$query, 1, $query];
|
$params = [$query, 1, $query, $excludeLike];
|
||||||
if ($category !== null) $params[] = $category;
|
if ($category !== null) $params[] = $category;
|
||||||
$params[] = $limit;
|
$params[] = $limit;
|
||||||
$stmt = $ragDb->prepare($sql);
|
$stmt = $ragDb->prepare($sql);
|
||||||
@@ -72,10 +77,11 @@ try {
|
|||||||
JOIN documents d ON c.document_id = d.id
|
JOIN documents d ON c.document_id = d.id
|
||||||
WHERE d.corpus_id = ? AND d.status = 'ready'
|
WHERE d.corpus_id = ? AND d.status = 'ready'
|
||||||
AND (c.content LIKE ? OR d.title LIKE ?)
|
AND (c.content LIKE ? OR d.title LIKE ?)
|
||||||
|
AND d.source_url NOT LIKE ?
|
||||||
$catClause
|
$catClause
|
||||||
ORDER BY (d.title LIKE ?) DESC
|
ORDER BY (d.title LIKE ?) DESC
|
||||||
LIMIT ?";
|
LIMIT ?";
|
||||||
$params = [1, $like, $like];
|
$params = [1, $like, $like, $excludeLike];
|
||||||
if ($category !== null) $params[] = $category;
|
if ($category !== null) $params[] = $category;
|
||||||
$params[] = $like;
|
$params[] = $like;
|
||||||
$params[] = $limit;
|
$params[] = $limit;
|
||||||
@@ -152,6 +158,7 @@ try {
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$hits = array_values(array_filter($hits, fn($h) => !str_contains($h['source_url'] ?? '', EXCLUDED_DOMAIN)));
|
||||||
dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'vector', 'query' => $query]);
|
dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'vector', 'query' => $query]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user