Full DMS: folders + ACLs, versioning, trash, bulk ops, preview, smart folders
Rebuild the dashboard as a Drive-style document management system on top of the existing CaveauAI hybrid RAG pipeline. Backend: - 5 migrations (versions, trash soft-delete, saved searches, categories, audit) - DMS helpers (folder ACL walker, disk storage, audit, version snapshot, XLSX/PPTX/HTML/CSV/MD extractors) - New APIs: folders, document-versions, trash, bulk, preview, saved-searches, categories, diagnostics - Extended APIs: documents (folder_id, soft-delete, ACL filter, sort), upload (9 file types, version-collision detection with replace/new/keep-both, disk persistence), chat-stream (folder scoping + graph related-documents) - 30-day trash purge cron with Qdrant + disk + graph cleanup Frontend: - Drive-style two-pane browser with folder tree, drag-drop, bulk-action bar, right-click context menu, multi-select - New pages: folders (tree + per-folder ACL editor), trash (restore/purge) - Extended pages: upload (folder picker, version-collision modal, 9 file type chips), document (Preview/Versions/Permissions tabs with PDF.js + mammoth.js + audio), index (DMS KPIs + activity feed), settings (live diagnostics ping MariaDB/Qdrant/LiteLLM/FalkorDB/disk), chat (folder scope chips + related-authorities chips) - New CSS (dms.css) + JS bundle (dms.js) exposing window.DBN_DMS - Sidebar nav adds Folders + Trash items All routes return HTTP 200 in local smoke test; all 32 files lint clean. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+218
-34
@@ -3,16 +3,19 @@
|
||||
* POST /api/dashboard/upload.php
|
||||
*
|
||||
* Three input modes:
|
||||
* - multipart/form-data with `file` field (PDF/DOCX/TXT, <= 8 MB)
|
||||
* - JSON body { "kind":"text", "title":..., "content":..., "category"?, "tags"?, "author"?, "language"? }
|
||||
* - JSON body { "kind":"url", "title":..., "url":... } (fetched via ClientUniversalScraper; queued)
|
||||
* - multipart/form-data with `file` field
|
||||
* Allowed: pdf, docx, txt, md, html, htm, csv, xlsx, pptx, json (≤8 MB)
|
||||
* Optional fields: title, category, tags, author, language, folder_id,
|
||||
* version_action (replace|new|force_separate)
|
||||
*
|
||||
* For file + text: writes pending row, runs ClientRagPipeline::ingestDocument() synchronously,
|
||||
* returns { ok, document_id, chunks, status }
|
||||
* For url: writes pending row, returns immediately with status:'pending' — a separate cron job
|
||||
* (run_client_one.php on the ai-portal) does the ingest.
|
||||
* - JSON body { "kind":"text", "title":..., "content":..., "category"?, "tags"?,
|
||||
* "author"?, "language"?, "folder_id"?, "version_action"? }
|
||||
*
|
||||
* If file text extraction yields less than 200 chars, attempts OCR via `tesseract` shell util.
|
||||
* - JSON body { "kind":"url", "title":..., "url":..., "folder_id"? }
|
||||
*
|
||||
* On title collision in the same folder, returns HTTP 409 with
|
||||
* { ok: false, collision: true, existing_id, message }
|
||||
* unless `version_action` is provided.
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
@@ -27,8 +30,11 @@ try {
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode);
|
||||
}
|
||||
$clientId = (int)$tenant['client_id'];
|
||||
$corpusId = (int)$tenant['corpus_id'];
|
||||
|
||||
$clientId = (int)$tenant['client_id'];
|
||||
$corpusId = (int)$tenant['corpus_id'];
|
||||
$userId = (int)($tenant['client_user_id'] ?? 0);
|
||||
$tenantRole = (string)($tenant['role'] ?? 'editor');
|
||||
|
||||
dbnToolsBootCaveau();
|
||||
$db = getDb();
|
||||
@@ -38,13 +44,13 @@ $isMultipart = stripos($contentType, 'multipart/form-data') === 0;
|
||||
|
||||
try {
|
||||
if ($isMultipart) {
|
||||
$result = handleFileUpload($db, $clientId, $corpusId);
|
||||
$result = handleFileUpload($db, $clientId, $corpusId, $userId, $tenantRole);
|
||||
} else {
|
||||
$input = dbnToolsJsonInput(2_500_000);
|
||||
$kind = (string)($input['kind'] ?? 'text');
|
||||
$result = match ($kind) {
|
||||
'text' => handleTextPaste($db, $clientId, $corpusId, $input),
|
||||
'url' => handleUrlImport($db, $clientId, $corpusId, $input),
|
||||
'text' => handleTextPaste($db, $clientId, $corpusId, $userId, $tenantRole, $input),
|
||||
'url' => handleUrlImport($db, $clientId, $corpusId, $userId, $tenantRole, $input),
|
||||
default => dbnToolsError('Unknown kind: ' . $kind, 400, 'unknown_kind'),
|
||||
};
|
||||
}
|
||||
@@ -57,12 +63,19 @@ try {
|
||||
dbnToolsRespond($result, 201);
|
||||
|
||||
|
||||
function handleFileUpload(PDO $db, int $clientId, int $corpusId): array
|
||||
function handleFileUpload(PDO $db, int $clientId, int $corpusId, int $userId, string $tenantRole): array
|
||||
{
|
||||
if (empty($_FILES['file'])) {
|
||||
dbnToolsError('No file uploaded.', 400, 'missing_file');
|
||||
}
|
||||
|
||||
$folderId = resolveFolderId($_POST['folder_id'] ?? null);
|
||||
$versionAction = trim((string)($_POST['version_action'] ?? ''));
|
||||
if ($folderId !== null && !dbnDmsUserCanAccessFolder($folderId, 'write', $clientId, $userId, $tenantRole)) {
|
||||
dbnToolsError('You do not have permission to upload here.', 403, 'forbidden_dest');
|
||||
}
|
||||
|
||||
$tmpPath = (string)($_FILES['file']['tmp_name'] ?? '');
|
||||
$extract = dbnToolsExtractUploadedFile($_FILES['file']);
|
||||
$text = (string)$extract['text'];
|
||||
$filename = (string)$extract['filename'];
|
||||
@@ -71,17 +84,22 @@ function handleFileUpload(PDO $db, int $clientId, int $corpusId): array
|
||||
$sourceType = match ($ext) {
|
||||
'pdf' => 'pdf',
|
||||
'docx' => 'docx',
|
||||
'xlsx' => 'xlsx',
|
||||
'pptx' => 'pptx',
|
||||
'html', 'htm' => 'html',
|
||||
'csv' => 'csv',
|
||||
'md' => 'markdown',
|
||||
default => 'text',
|
||||
};
|
||||
|
||||
$importMethod = 'dbn_upload';
|
||||
if (mb_strlen($text, 'UTF-8') < 200 && $ext === 'pdf') {
|
||||
$ocrText = tryOcrPdf((string)($_FILES['file']['tmp_name'] ?? ''));
|
||||
$ocrText = tryOcrPdf($tmpPath);
|
||||
if ($ocrText !== null && mb_strlen($ocrText, 'UTF-8') > mb_strlen($text, 'UTF-8')) {
|
||||
$text = $ocrText;
|
||||
$text = $ocrText;
|
||||
$importMethod = 'ocr_scan';
|
||||
}
|
||||
}
|
||||
$importMethod = $importMethod ?? 'dbn_upload';
|
||||
|
||||
$title = trim((string)($_POST['title'] ?? '')) ?: pathinfo($filename, PATHINFO_FILENAME);
|
||||
$category = sanitizeCategory((string)($_POST['category'] ?? 'uncategorized'));
|
||||
@@ -89,7 +107,7 @@ function handleFileUpload(PDO $db, int $clientId, int $corpusId): array
|
||||
$author = trim((string)($_POST['author'] ?? '')) ?: null;
|
||||
$language = trim((string)($_POST['language'] ?? 'no')) ?: 'no';
|
||||
|
||||
return persistAndIngest($db, $clientId, $corpusId, [
|
||||
$doc = [
|
||||
'title' => $title,
|
||||
'source_type' => $sourceType,
|
||||
'content' => $text,
|
||||
@@ -101,18 +119,29 @@ function handleFileUpload(PDO $db, int $clientId, int $corpusId): array
|
||||
'original_filename' => $filename,
|
||||
'file_size_bytes' => (int)($_FILES['file']['size'] ?? 0),
|
||||
'source_tool' => 'dashboard-upload',
|
||||
]);
|
||||
'folder_id' => $folderId,
|
||||
'_tmp_path' => $tmpPath,
|
||||
'_ext' => $ext,
|
||||
];
|
||||
|
||||
return handleCollisionAndIngest($db, $clientId, $corpusId, $userId, $tenantRole, $doc, $versionAction);
|
||||
}
|
||||
|
||||
function handleTextPaste(PDO $db, int $clientId, int $corpusId, array $input): array
|
||||
function handleTextPaste(PDO $db, int $clientId, int $corpusId, int $userId, string $tenantRole, array $input): array
|
||||
{
|
||||
$title = trim((string)($input['title'] ?? ''));
|
||||
$content = trim((string)($input['content'] ?? ''));
|
||||
if ($title === '') dbnToolsError('title is required.', 400, 'missing_title');
|
||||
if (mb_strlen($content, 'UTF-8') < 30) dbnToolsError('content too short (min 30 chars).', 400, 'content_too_short');
|
||||
if ($title === '') dbnToolsError('title is required.', 400, 'missing_title');
|
||||
if (mb_strlen($content, 'UTF-8') < 30) dbnToolsError('content too short (min 30 chars).', 400, 'content_too_short');
|
||||
if (mb_strlen($content, 'UTF-8') > 2_000_000) dbnToolsError('content exceeds 2 MB.', 400, 'content_too_large');
|
||||
|
||||
return persistAndIngest($db, $clientId, $corpusId, [
|
||||
$folderId = resolveFolderId($input['folder_id'] ?? null);
|
||||
$versionAction = trim((string)($input['version_action'] ?? ''));
|
||||
if ($folderId !== null && !dbnDmsUserCanAccessFolder($folderId, 'write', $clientId, $userId, $tenantRole)) {
|
||||
dbnToolsError('You do not have permission to upload here.', 403, 'forbidden_dest');
|
||||
}
|
||||
|
||||
$doc = [
|
||||
'title' => $title,
|
||||
'source_type' => 'text',
|
||||
'content' => $content,
|
||||
@@ -122,10 +151,12 @@ function handleTextPaste(PDO $db, int $clientId, int $corpusId, array $input): a
|
||||
'language' => trim((string)($input['language'] ?? 'no')) ?: 'no',
|
||||
'import_method' => 'manual',
|
||||
'source_tool' => 'dashboard-paste',
|
||||
]);
|
||||
'folder_id' => $folderId,
|
||||
];
|
||||
return handleCollisionAndIngest($db, $clientId, $corpusId, $userId, $tenantRole, $doc, $versionAction);
|
||||
}
|
||||
|
||||
function handleUrlImport(PDO $db, int $clientId, int $corpusId, array $input): array
|
||||
function handleUrlImport(PDO $db, int $clientId, int $corpusId, int $userId, string $tenantRole, array $input): array
|
||||
{
|
||||
$url = trim((string)($input['url'] ?? ''));
|
||||
$title = trim((string)($input['title'] ?? ''));
|
||||
@@ -138,42 +169,155 @@ function handleUrlImport(PDO $db, int $clientId, int $corpusId, array $input): a
|
||||
}
|
||||
if ($title === '') $title = $url;
|
||||
|
||||
$folderId = resolveFolderId($input['folder_id'] ?? null);
|
||||
if ($folderId !== null && !dbnDmsUserCanAccessFolder($folderId, 'write', $clientId, $userId, $tenantRole)) {
|
||||
dbnToolsError('You do not have permission to upload here.', 403, 'forbidden_dest');
|
||||
}
|
||||
|
||||
$stmt = $db->prepare("
|
||||
INSERT INTO client_documents
|
||||
(client_id, corpus_id, title, source_type, source_url, content,
|
||||
(client_id, corpus_id, folder_id, title, source_type, source_url, content,
|
||||
category, tags, language, import_method, source_tool, status)
|
||||
VALUES (?, ?, ?, 'url', ?, '', ?, ?, ?, 'url', 'dashboard-url', 'pending')
|
||||
VALUES (?, ?, ?, ?, 'url', ?, '', ?, ?, ?, 'url', 'dashboard-url', 'pending')
|
||||
");
|
||||
$stmt->execute([
|
||||
$clientId, $corpusId, $title, $url,
|
||||
$clientId, $corpusId, $folderId, $title, $url,
|
||||
sanitizeCategory((string)($input['category'] ?? 'uncategorized')),
|
||||
sanitizeTagsCsv((string)($input['tags'] ?? '')),
|
||||
trim((string)($input['language'] ?? 'no')) ?: 'no',
|
||||
]);
|
||||
$docId = (int)$db->lastInsertId();
|
||||
dbnDmsLogAudit($clientId, $userId ?: null, 'upload', ['mode' => 'url', 'url' => $url], $docId, $folderId);
|
||||
|
||||
return [
|
||||
'ok' => true,
|
||||
'document_id' => (int)$db->lastInsertId(),
|
||||
'document_id' => $docId,
|
||||
'status' => 'pending',
|
||||
'chunks' => 0,
|
||||
'note' => 'URL queued for background ingest.',
|
||||
];
|
||||
}
|
||||
|
||||
function persistAndIngest(PDO $db, int $clientId, int $corpusId, array $doc): array
|
||||
/**
|
||||
* Title collision detection inside the same folder; dispatches to insert/replace per action.
|
||||
*/
|
||||
function handleCollisionAndIngest(PDO $db, int $clientId, int $corpusId, int $userId, string $tenantRole, array $doc, string $versionAction): array
|
||||
{
|
||||
$wordCount = str_word_count($doc['content']);
|
||||
if ($versionAction !== 'force_separate') {
|
||||
$check = $db->prepare(
|
||||
"SELECT id FROM client_documents
|
||||
WHERE client_id = ?
|
||||
AND (folder_id <=> ?)
|
||||
AND LOWER(title) = LOWER(?)
|
||||
AND deleted_at IS NULL
|
||||
ORDER BY id DESC LIMIT 1"
|
||||
);
|
||||
$check->execute([$clientId, $doc['folder_id'], $doc['title']]);
|
||||
$existingId = (int)$check->fetchColumn();
|
||||
|
||||
if ($existingId > 0 && $versionAction === '') {
|
||||
dbnToolsError(
|
||||
'A document with this title already exists in the target folder.',
|
||||
409,
|
||||
'title_collision',
|
||||
['collision' => true, 'existing_id' => $existingId,
|
||||
'options' => ['replace','new','force_separate']]
|
||||
);
|
||||
}
|
||||
|
||||
if ($existingId > 0 && in_array($versionAction, ['replace', 'new'], true)) {
|
||||
return replaceAsVersion($db, $clientId, $userId, $existingId, $doc, $versionAction);
|
||||
}
|
||||
}
|
||||
|
||||
if ($versionAction === 'force_separate') {
|
||||
$doc['title'] = uniqueTitle($db, $clientId, $doc['folder_id'], $doc['title']);
|
||||
}
|
||||
|
||||
return persistAndIngest($db, $clientId, $corpusId, $userId, $doc);
|
||||
}
|
||||
|
||||
function replaceAsVersion(PDO $db, int $clientId, int $userId, int $existingId, array $doc, string $versionAction): array
|
||||
{
|
||||
// Snapshot current → versions
|
||||
$newVer = dbnDmsSnapshotVersion($existingId, $clientId, $userId, "Replaced via {$versionAction}");
|
||||
$current = (int)$db->query("SELECT current_version FROM client_documents WHERE id = {$existingId}")->fetchColumn();
|
||||
$nextVer = max($current + 1, $newVer + 1);
|
||||
|
||||
// Update with new content
|
||||
$stmt = $db->prepare(
|
||||
"UPDATE client_documents
|
||||
SET title=?, source_type=?, content=?, category=?, tags=?, author=?, language=?,
|
||||
import_method=?, source_tool=?, original_filename=?, file_size_bytes=?, word_count=?,
|
||||
current_version=?, status='pending', error_message=NULL, updated_at=NOW(),
|
||||
storage_path = NULL
|
||||
WHERE id=? AND client_id=?"
|
||||
);
|
||||
$stmt->execute([
|
||||
$doc['title'], $doc['source_type'], $doc['content'], $doc['category'], $doc['tags'],
|
||||
$doc['author'] ?? null, $doc['language'], $doc['import_method'], $doc['source_tool'],
|
||||
$doc['original_filename'] ?? null,
|
||||
(int)($doc['file_size_bytes'] ?? 0),
|
||||
str_word_count((string)$doc['content']),
|
||||
$nextVer,
|
||||
$existingId, $clientId,
|
||||
]);
|
||||
|
||||
// Persist file to disk if we have a tmp upload
|
||||
if (!empty($doc['_tmp_path']) && !empty($doc['_ext'])) {
|
||||
$storagePath = dbnDmsPersistFile($doc['_tmp_path'], $clientId, $existingId, $doc['_ext'], $nextVer);
|
||||
if ($storagePath) {
|
||||
$db->prepare('UPDATE client_documents SET storage_path = ? WHERE id = ?')
|
||||
->execute([$storagePath, $existingId]);
|
||||
}
|
||||
}
|
||||
|
||||
// Wipe chunks & re-ingest
|
||||
try {
|
||||
$db->prepare('DELETE FROM client_chunks WHERE client_id = ? AND document_id = ?')->execute([$clientId, $existingId]);
|
||||
} catch (Throwable $e) { /* tolerated */ }
|
||||
|
||||
$chunks = 0;
|
||||
try {
|
||||
$rag = new ClientRagPipeline($clientId);
|
||||
$chunks = (int)$rag->ingestDocument($existingId);
|
||||
dbnDmsLogAudit($clientId, $userId ?: null, 'version', ['version' => $nextVer], $existingId, $doc['folder_id']);
|
||||
return [
|
||||
'ok' => true,
|
||||
'document_id' => $existingId,
|
||||
'version_number' => $nextVer,
|
||||
'chunks' => $chunks,
|
||||
'status' => 'ready',
|
||||
'collision_resolved' => $versionAction,
|
||||
];
|
||||
} catch (Throwable $e) {
|
||||
$db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?")
|
||||
->execute([substr($e->getMessage(), 0, 1000), $existingId]);
|
||||
return [
|
||||
'ok' => false,
|
||||
'document_id' => $existingId,
|
||||
'version_number' => $nextVer,
|
||||
'status' => 'error',
|
||||
'error' => ['code' => 'index_failed', 'message' => $e->getMessage()],
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
function persistAndIngest(PDO $db, int $clientId, int $corpusId, int $userId, array $doc): array
|
||||
{
|
||||
$wordCount = str_word_count((string)$doc['content']);
|
||||
|
||||
$stmt = $db->prepare("
|
||||
INSERT INTO client_documents
|
||||
(client_id, corpus_id, title, source_type, original_filename, file_size_bytes,
|
||||
(client_id, corpus_id, folder_id, title, source_type, original_filename, file_size_bytes,
|
||||
content, category, tags, author, language,
|
||||
import_method, source_tool, word_count, status)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending')
|
||||
import_method, source_tool, word_count, status, current_version)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', 1)
|
||||
");
|
||||
$stmt->execute([
|
||||
$clientId,
|
||||
$corpusId,
|
||||
$doc['folder_id'] ?? null,
|
||||
$doc['title'],
|
||||
$doc['source_type'],
|
||||
$doc['original_filename'] ?? null,
|
||||
@@ -189,9 +333,21 @@ function persistAndIngest(PDO $db, int $clientId, int $corpusId, array $doc): ar
|
||||
]);
|
||||
$docId = (int)$db->lastInsertId();
|
||||
|
||||
// Persist original file bytes if available (file upload path only).
|
||||
if (!empty($doc['_tmp_path']) && !empty($doc['_ext'])) {
|
||||
$storagePath = dbnDmsPersistFile($doc['_tmp_path'], $clientId, $docId, $doc['_ext']);
|
||||
if ($storagePath) {
|
||||
$db->prepare('UPDATE client_documents SET storage_path = ? WHERE id = ?')
|
||||
->execute([$storagePath, $docId]);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
$rag = new ClientRagPipeline($clientId);
|
||||
$chunks = $rag->ingestDocument($docId);
|
||||
dbnDmsLogAudit($clientId, $userId ?: null, 'upload',
|
||||
['source_type' => $doc['source_type'], 'word_count' => $wordCount],
|
||||
$docId, $doc['folder_id'] ?? null);
|
||||
return [
|
||||
'ok' => true,
|
||||
'document_id' => $docId,
|
||||
@@ -211,6 +367,34 @@ function persistAndIngest(PDO $db, int $clientId, int $corpusId, array $doc): ar
|
||||
}
|
||||
}
|
||||
|
||||
function resolveFolderId(mixed $raw): ?int
|
||||
{
|
||||
if ($raw === null || $raw === '' || $raw === 'unassigned' || $raw === '0') {
|
||||
return null;
|
||||
}
|
||||
$n = (int)$raw;
|
||||
return $n > 0 ? $n : null;
|
||||
}
|
||||
|
||||
function uniqueTitle(PDO $db, int $clientId, ?int $folderId, string $title): string
|
||||
{
|
||||
$check = $db->prepare(
|
||||
"SELECT COUNT(*) FROM client_documents
|
||||
WHERE client_id = ? AND (folder_id <=> ?) AND LOWER(title) = LOWER(?) AND deleted_at IS NULL"
|
||||
);
|
||||
$n = 2;
|
||||
$base = $title;
|
||||
while ($n < 100) {
|
||||
$candidate = $base . ' (' . $n . ')';
|
||||
$check->execute([$clientId, $folderId, $candidate]);
|
||||
if ((int)$check->fetchColumn() === 0) {
|
||||
return $candidate;
|
||||
}
|
||||
$n++;
|
||||
}
|
||||
return $base . ' (' . substr(bin2hex(random_bytes(3)), 0, 6) . ')';
|
||||
}
|
||||
|
||||
function sanitizeCategory(string $cat): string
|
||||
{
|
||||
$cat = strtolower(trim($cat));
|
||||
|
||||
Reference in New Issue
Block a user