getMessage(), $e->status, $e->errorCode); } $clientId = (int)$tenant['client_id']; $corpusId = (int)$tenant['corpus_id']; $userId = (int)($tenant['client_user_id'] ?? 0); $tenantRole = (string)($tenant['role'] ?? 'editor'); dbnToolsBootCaveau(); $db = getDb(); $contentType = (string)($_SERVER['CONTENT_TYPE'] ?? ''); $isMultipart = stripos($contentType, 'multipart/form-data') === 0; try { if ($isMultipart) { $result = handleFileUpload($db, $clientId, $corpusId, $userId, $tenantRole); } else { $input = dbnToolsJsonInput(2_500_000); $kind = (string)($input['kind'] ?? 'text'); $result = match ($kind) { 'text' => handleTextPaste($db, $clientId, $corpusId, $userId, $tenantRole, $input), 'url' => handleUrlImport($db, $clientId, $corpusId, $userId, $tenantRole, $input), default => dbnToolsError('Unknown kind: ' . $kind, 400, 'unknown_kind'), }; } } catch (DbnToolsHttpException $e) { dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra); } catch (Throwable $e) { dbnToolsError('Upload failed: ' . $e->getMessage(), 500, 'upload_failed'); } dbnToolsRespond($result, 201); function handleFileUpload(PDO $db, int $clientId, int $corpusId, int $userId, string $tenantRole): array { if (empty($_FILES['file'])) { dbnToolsError('No file uploaded.', 400, 'missing_file'); } $folderId = resolveFolderId($_POST['folder_id'] ?? null); $versionAction = trim((string)($_POST['version_action'] ?? '')); if ($folderId !== null && !dbnDmsUserCanAccessFolder($folderId, 'write', $clientId, $userId, $tenantRole)) { dbnToolsError('You do not have permission to upload here.', 403, 'forbidden_dest'); } $tmpPath = (string)($_FILES['file']['tmp_name'] ?? ''); $extract = dbnToolsExtractUploadedFile($_FILES['file']); $text = (string)$extract['text']; $filename = (string)$extract['filename']; $ext = strtolower(pathinfo($filename, PATHINFO_EXTENSION)); $sourceType = match ($ext) { 'pdf' => 'pdf', 'docx' => 'docx', 'xlsx' => 'xlsx', 'pptx' => 'pptx', 'html', 'htm' => 'html', 'csv' => 'csv', 'md' => 'markdown', default => 'text', }; $importMethod = 'dbn_upload'; if (mb_strlen($text, 'UTF-8') < 200 && $ext === 'pdf') { $ocrText = tryOcrPdf($tmpPath); if ($ocrText !== null && mb_strlen($ocrText, 'UTF-8') > mb_strlen($text, 'UTF-8')) { $text = $ocrText; $importMethod = 'ocr_scan'; } } $title = trim((string)($_POST['title'] ?? '')) ?: pathinfo($filename, PATHINFO_FILENAME); $category = sanitizeCategory((string)($_POST['category'] ?? 'uncategorized')); $tags = sanitizeTagsCsv((string)($_POST['tags'] ?? '')); $author = trim((string)($_POST['author'] ?? '')) ?: null; $language = trim((string)($_POST['language'] ?? 'no')) ?: 'no'; $doc = [ 'title' => $title, 'source_type' => $sourceType, 'content' => $text, 'category' => $category, 'tags' => $tags, 'author' => $author, 'language' => $language, 'import_method' => $importMethod, 'original_filename' => $filename, 'file_size_bytes' => (int)($_FILES['file']['size'] ?? 0), 'source_tool' => 'dashboard-upload', 'folder_id' => $folderId, '_tmp_path' => $tmpPath, '_ext' => $ext, ]; return handleCollisionAndIngest($db, $clientId, $corpusId, $userId, $tenantRole, $doc, $versionAction); } function handleTextPaste(PDO $db, int $clientId, int $corpusId, int $userId, string $tenantRole, array $input): array { $title = trim((string)($input['title'] ?? '')); $content = trim((string)($input['content'] ?? '')); if ($title === '') dbnToolsError('title is required.', 400, 'missing_title'); if (mb_strlen($content, 'UTF-8') < 30) dbnToolsError('content too short (min 30 chars).', 400, 'content_too_short'); if (mb_strlen($content, 'UTF-8') > 2_000_000) dbnToolsError('content exceeds 2 MB.', 400, 'content_too_large'); $folderId = resolveFolderId($input['folder_id'] ?? null); $versionAction = trim((string)($input['version_action'] ?? '')); if ($folderId !== null && !dbnDmsUserCanAccessFolder($folderId, 'write', $clientId, $userId, $tenantRole)) { dbnToolsError('You do not have permission to upload here.', 403, 'forbidden_dest'); } $doc = [ 'title' => $title, 'source_type' => 'text', 'content' => $content, 'category' => sanitizeCategory((string)($input['category'] ?? 'uncategorized')), 'tags' => sanitizeTagsCsv((string)($input['tags'] ?? '')), 'author' => trim((string)($input['author'] ?? '')) ?: null, 'language' => trim((string)($input['language'] ?? 'no')) ?: 'no', 'import_method' => 'manual', 'source_tool' => 'dashboard-paste', 'folder_id' => $folderId, ]; return handleCollisionAndIngest($db, $clientId, $corpusId, $userId, $tenantRole, $doc, $versionAction); } function handleUrlImport(PDO $db, int $clientId, int $corpusId, int $userId, string $tenantRole, array $input): array { $url = trim((string)($input['url'] ?? '')); $title = trim((string)($input['title'] ?? '')); if ($url === '' || !filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED)) { dbnToolsError('Valid URL is required.', 400, 'invalid_url'); } $scheme = strtolower((string)parse_url($url, PHP_URL_SCHEME)); if (!in_array($scheme, ['http', 'https'], true)) { dbnToolsError('URL must use http or https.', 400, 'invalid_scheme'); } if ($title === '') $title = $url; $folderId = resolveFolderId($input['folder_id'] ?? null); if ($folderId !== null && !dbnDmsUserCanAccessFolder($folderId, 'write', $clientId, $userId, $tenantRole)) { dbnToolsError('You do not have permission to upload here.', 403, 'forbidden_dest'); } $stmt = $db->prepare(" INSERT INTO client_documents (client_id, corpus_id, folder_id, title, source_type, source_url, content, category, tags, language, import_method, source_tool, status) VALUES (?, ?, ?, ?, 'url', ?, '', ?, ?, ?, 'url', 'dashboard-url', 'pending') "); $stmt->execute([ $clientId, $corpusId, $folderId, $title, $url, sanitizeCategory((string)($input['category'] ?? 'uncategorized')), sanitizeTagsCsv((string)($input['tags'] ?? '')), trim((string)($input['language'] ?? 'no')) ?: 'no', ]); $docId = (int)$db->lastInsertId(); dbnDmsLogAudit($clientId, $userId ?: null, 'upload', ['mode' => 'url', 'url' => $url], $docId, $folderId); return [ 'ok' => true, 'document_id' => $docId, 'status' => 'pending', 'chunks' => 0, 'note' => 'URL queued for background ingest.', ]; } /** * Title collision detection inside the same folder; dispatches to insert/replace per action. */ function handleCollisionAndIngest(PDO $db, int $clientId, int $corpusId, int $userId, string $tenantRole, array $doc, string $versionAction): array { if ($versionAction !== 'force_separate') { $check = $db->prepare( "SELECT id FROM client_documents WHERE client_id = ? AND (folder_id <=> ?) AND LOWER(title) = LOWER(?) AND deleted_at IS NULL ORDER BY id DESC LIMIT 1" ); $check->execute([$clientId, $doc['folder_id'], $doc['title']]); $existingId = (int)$check->fetchColumn(); if ($existingId > 0 && $versionAction === '') { dbnToolsError( 'A document with this title already exists in the target folder.', 409, 'title_collision', ['collision' => true, 'existing_id' => $existingId, 'options' => ['replace','new','force_separate']] ); } if ($existingId > 0 && in_array($versionAction, ['replace', 'new'], true)) { return replaceAsVersion($db, $clientId, $userId, $existingId, $doc, $versionAction); } } if ($versionAction === 'force_separate') { $doc['title'] = uniqueTitle($db, $clientId, $doc['folder_id'], $doc['title']); } return persistAndIngest($db, $clientId, $corpusId, $userId, $doc); } function replaceAsVersion(PDO $db, int $clientId, int $userId, int $existingId, array $doc, string $versionAction): array { // Snapshot current → versions $newVer = dbnDmsSnapshotVersion($existingId, $clientId, $userId, "Replaced via {$versionAction}"); $current = (int)$db->query("SELECT current_version FROM client_documents WHERE id = {$existingId}")->fetchColumn(); $nextVer = max($current + 1, $newVer + 1); // Update with new content $stmt = $db->prepare( "UPDATE client_documents SET title=?, source_type=?, content=?, category=?, tags=?, author=?, language=?, import_method=?, source_tool=?, original_filename=?, file_size_bytes=?, word_count=?, current_version=?, status='pending', error_message=NULL, updated_at=NOW(), storage_path = NULL WHERE id=? AND client_id=?" ); $stmt->execute([ $doc['title'], $doc['source_type'], $doc['content'], $doc['category'], $doc['tags'], $doc['author'] ?? null, $doc['language'], $doc['import_method'], $doc['source_tool'], $doc['original_filename'] ?? null, (int)($doc['file_size_bytes'] ?? 0), str_word_count((string)$doc['content']), $nextVer, $existingId, $clientId, ]); // Persist file to disk if we have a tmp upload if (!empty($doc['_tmp_path']) && !empty($doc['_ext'])) { $storagePath = dbnDmsPersistFile($doc['_tmp_path'], $clientId, $existingId, $doc['_ext'], $nextVer); if ($storagePath) { $db->prepare('UPDATE client_documents SET storage_path = ? WHERE id = ?') ->execute([$storagePath, $existingId]); } } // Wipe chunks & re-ingest try { $db->prepare('DELETE FROM client_chunks WHERE client_id = ? AND document_id = ?')->execute([$clientId, $existingId]); } catch (Throwable $e) { /* tolerated */ } $chunks = 0; try { $rag = new ClientRagPipeline($clientId); $chunks = (int)$rag->ingestDocument($existingId); dbnDmsLogAudit($clientId, $userId ?: null, 'version', ['version' => $nextVer], $existingId, $doc['folder_id']); return [ 'ok' => true, 'document_id' => $existingId, 'version_number' => $nextVer, 'chunks' => $chunks, 'status' => 'ready', 'collision_resolved' => $versionAction, ]; } catch (Throwable $e) { $db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?") ->execute([substr($e->getMessage(), 0, 1000), $existingId]); return [ 'ok' => false, 'document_id' => $existingId, 'version_number' => $nextVer, 'status' => 'error', 'error' => ['code' => 'index_failed', 'message' => $e->getMessage()], ]; } } function persistAndIngest(PDO $db, int $clientId, int $corpusId, int $userId, array $doc): array { $wordCount = str_word_count((string)$doc['content']); $stmt = $db->prepare(" INSERT INTO client_documents (client_id, corpus_id, folder_id, title, source_type, original_filename, file_size_bytes, content, category, tags, author, language, import_method, source_tool, word_count, status, current_version) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', 1) "); $stmt->execute([ $clientId, $corpusId, $doc['folder_id'] ?? null, $doc['title'], $doc['source_type'], $doc['original_filename'] ?? null, $doc['file_size_bytes'] ?? 0, $doc['content'], $doc['category'], $doc['tags'], $doc['author'] ?? null, $doc['language'], $doc['import_method'], $doc['source_tool'], $wordCount, ]); $docId = (int)$db->lastInsertId(); // Persist original file bytes if available (file upload path only). if (!empty($doc['_tmp_path']) && !empty($doc['_ext'])) { $storagePath = dbnDmsPersistFile($doc['_tmp_path'], $clientId, $docId, $doc['_ext']); if ($storagePath) { $db->prepare('UPDATE client_documents SET storage_path = ? WHERE id = ?') ->execute([$storagePath, $docId]); } } try { $rag = new ClientRagPipeline($clientId); $chunks = $rag->ingestDocument($docId); dbnDmsLogAudit($clientId, $userId ?: null, 'upload', ['source_type' => $doc['source_type'], 'word_count' => $wordCount], $docId, $doc['folder_id'] ?? null); return [ 'ok' => true, 'document_id' => $docId, 'chunks' => (int)$chunks, 'status' => 'ready', 'word_count' => $wordCount, ]; } catch (Throwable $e) { $db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?") ->execute([substr($e->getMessage(), 0, 1000), $docId]); return [ 'ok' => false, 'document_id' => $docId, 'status' => 'error', 'error' => ['code' => 'index_failed', 'message' => 'Saved, but indexing failed: ' . $e->getMessage()], ]; } } function resolveFolderId(mixed $raw): ?int { if ($raw === null || $raw === '' || $raw === 'unassigned' || $raw === '0') { return null; } $n = (int)$raw; return $n > 0 ? $n : null; } function uniqueTitle(PDO $db, int $clientId, ?int $folderId, string $title): string { $check = $db->prepare( "SELECT COUNT(*) FROM client_documents WHERE client_id = ? AND (folder_id <=> ?) AND LOWER(title) = LOWER(?) AND deleted_at IS NULL" ); $n = 2; $base = $title; while ($n < 100) { $candidate = $base . ' (' . $n . ')'; $check->execute([$clientId, $folderId, $candidate]); if ((int)$check->fetchColumn() === 0) { return $candidate; } $n++; } return $base . ' (' . substr(bin2hex(random_bytes(3)), 0, 6) . ')'; } function sanitizeCategory(string $cat): string { $cat = strtolower(trim($cat)); $cat = preg_replace('/[^a-z0-9\-_]/', '', $cat) ?: 'uncategorized'; return substr($cat, 0, 50); } function sanitizeTagsCsv(string $raw): string { $tags = array_filter(array_map('trim', explode(',', $raw))); $tags = array_values(array_slice(array_map(fn($t) => substr($t, 0, 32), $tags), 0, 20)); return implode(',', $tags); } function tryOcrPdf(string $tmpPath): ?string { if ($tmpPath === '' || !is_readable($tmpPath)) return null; if (!function_exists('shell_exec')) return null; $check = @shell_exec('command -v tesseract 2>/dev/null'); if (!$check) return null; $out = trim((string)@shell_exec( 'pdftoppm -r 200 ' . escapeshellarg($tmpPath) . ' - -png 2>/dev/null | ' . 'tesseract -l nor+eng stdin stdout 2>/dev/null' )); return $out !== '' ? $out : null; }