search($query, $language, $limit, 'disabled', null); $hits = array_map(fn($h) => [ 'title' => $h['title'] ?? '', 'category' => $h['category'] ?? '', 'section' => $h['section'] ?? null, 'excerpt' => $h['excerpt'] ?? ($h['chunk_text'] ?? ''), 'score' => $h['score'] ?? null, 'document_id' => $h['document_id'] ?? null, 'chunk_id' => $h['chunk_id'] ?? null, 'source_url' => $h['source_url'] ?? null, 'language' => null, ], $result['hits'] ?? []); $hits = array_values(array_filter($hits, fn($h) => !str_contains($h['source_url'] ?? '', EXCLUDED_DOMAIN))); dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'hybrid', 'query' => $query]); } $ragDb = dbnToolsRagDb(); // ── BM25: FULLTEXT with LIKE fallback ─────────────────────────────────── if ($mode === 'bm25') { $catClause = $category !== null ? ' AND d.category = ?' : ''; $excludeLike = '%' . EXCLUDED_DOMAIN . '%'; // Try FULLTEXT index first try { $sql = "SELECT d.id AS document_id, d.title, d.category, d.source_url, c.id AS chunk_id, c.content AS excerpt, c.section_title AS section, d.language, MATCH(c.content) AGAINST (? IN BOOLEAN MODE) AS score FROM chunks c JOIN documents d ON c.document_id = d.id WHERE d.corpus_id = ? AND d.status = 'ready' AND MATCH(c.content) AGAINST (? IN BOOLEAN MODE) > 0 AND d.source_url NOT LIKE ? $catClause ORDER BY score DESC LIMIT $limit"; $params = [$query, 1, $query, $excludeLike]; if ($category !== null) $params[] = $category; $stmt = $ragDb->prepare($sql); $stmt->execute($params); $rows = $stmt->fetchAll(PDO::FETCH_ASSOC); } catch (Throwable $e) { // FULLTEXT index absent — use LIKE $like = '%' . str_replace(['%', '_'], ['\\%', '\\_'], $query) . '%'; $sql = "SELECT d.id AS document_id, d.title, d.category, d.source_url, c.id AS chunk_id, c.content AS excerpt, c.section_title AS section, d.language, 0.25 AS score FROM chunks c JOIN documents d ON c.document_id = d.id WHERE d.corpus_id = ? AND d.status = 'ready' AND (c.content LIKE ? OR d.title LIKE ?) AND d.source_url NOT LIKE ? $catClause ORDER BY (d.title LIKE ?) DESC LIMIT $limit"; $params = [1, $like, $like, $excludeLike]; if ($category !== null) $params[] = $category; $params[] = $like; $stmt = $ragDb->prepare($sql); $stmt->execute($params); $rows = $stmt->fetchAll(PDO::FETCH_ASSOC); } $hits = array_map(fn($r) => [ 'title' => $r['title'] ?? '', 'category' => $r['category'] ?? '', 'section' => $r['section'] ?? null, 'excerpt' => mb_substr((string)($r['excerpt'] ?? ''), 0, 600, 'UTF-8'), 'score' => isset($r['score']) ? round((float)$r['score'], 4) : null, 'document_id' => (int)$r['document_id'], 'chunk_id' => isset($r['chunk_id']) ? (int)$r['chunk_id'] : null, 'source_url' => $r['source_url'] ?? null, 'language' => $r['language'] ?? null, ], $rows); dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'bm25', 'query' => $query]); } // ── VECTOR: embed → Qdrant ───────────────────────────────────────────── if ($mode === 'vector') { $embeddings = dbnToolsLiteLLMEmbedBatch([$query]); if (empty($embeddings) || !is_array($embeddings[0])) { dbnToolsError('Embedding failed — vector search unavailable.', 502, 'embed_error'); } $filter = ['must' => [['key' => 'corpus_id', 'match' => ['value' => 1]]]]; if ($category !== null) { $filter['must'][] = ['key' => 'category', 'match' => ['value' => $category]]; } $qdrantPayload = json_encode([ 'vector' => $embeddings[0], 'limit' => $limit, 'with_payload' => true, 'filter' => $filter, ]); $ch = curl_init('http://10.0.2.10:6333/collections/bnl_chunks/points/search'); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_POST => true, CURLOPT_POSTFIELDS => $qdrantPayload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_TIMEOUT => 15, ]); $resp = curl_exec($ch); $curlErr = curl_error($ch); curl_close($ch); if ($resp === false) { dbnToolsError('Qdrant unreachable: ' . $curlErr, 502, 'qdrant_error'); } $qdrantResult = json_decode($resp, true); $points = $qdrantResult['result'] ?? []; $hits = []; foreach ($points as $pt) { $p = $pt['payload'] ?? []; $hits[] = [ 'title' => $p['title'] ?? $p['document_title'] ?? '', 'category' => $p['category'] ?? '', 'section' => $p['section_title'] ?? null, 'excerpt' => mb_substr((string)($p['content'] ?? ''), 0, 600, 'UTF-8'), 'score' => round((float)($pt['score'] ?? 0), 4), 'document_id' => isset($p['document_id']) ? (int)$p['document_id'] : null, 'chunk_id' => $pt['id'] ?? null, 'source_url' => $p['source_url'] ?? null, 'language' => $p['language'] ?? null, ]; } $hits = array_values(array_filter($hits, fn($h) => !str_contains($h['source_url'] ?? '', EXCLUDED_DOMAIN))); dbnToolsRespond(['ok' => true, 'hits' => $hits, 'mode' => 'vector', 'query' => $query]); } dbnToolsError('Unknown search mode.', 422, 'invalid_mode'); } catch (DbnToolsHttpException $e) { throw $e; } catch (Throwable $e) { dbnToolsError('Corpus search failed: ' . $e->getMessage(), 500, 'search_error'); }