Full DMS: folders + ACLs, versioning, trash, bulk ops, preview, smart folders

Rebuild the dashboard as a Drive-style document management system on top of
the existing CaveauAI hybrid RAG pipeline.

Backend:
- 5 migrations (versions, trash soft-delete, saved searches, categories, audit)
- DMS helpers (folder ACL walker, disk storage, audit, version snapshot,
  XLSX/PPTX/HTML/CSV/MD extractors)
- New APIs: folders, document-versions, trash, bulk, preview, saved-searches,
  categories, diagnostics
- Extended APIs: documents (folder_id, soft-delete, ACL filter, sort),
  upload (9 file types, version-collision detection with replace/new/keep-both,
  disk persistence), chat-stream (folder scoping + graph related-documents)
- 30-day trash purge cron with Qdrant + disk + graph cleanup

Frontend:
- Drive-style two-pane browser with folder tree, drag-drop, bulk-action bar,
  right-click context menu, multi-select
- New pages: folders (tree + per-folder ACL editor), trash (restore/purge)
- Extended pages: upload (folder picker, version-collision modal, 9 file
  type chips), document (Preview/Versions/Permissions tabs with PDF.js +
  mammoth.js + audio), index (DMS KPIs + activity feed), settings (live
  diagnostics ping MariaDB/Qdrant/LiteLLM/FalkorDB/disk), chat (folder
  scope chips + related-authorities chips)
- New CSS (dms.css) + JS bundle (dms.js) exposing window.DBN_DMS
- Sidebar nav adds Folders + Trash items

All routes return HTTP 200 in local smoke test; all 32 files lint clean.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-26 22:24:56 +02:00
parent b84827ecea
commit 2e2b0b45fa
30 changed files with 5438 additions and 335 deletions
+15 -5
View File
@@ -891,7 +891,9 @@ function dbnToolsExcerpt(string $text, int $limit = 520): string
const DBN_TOOLS_EXTRACT_MAX_BYTES = 8 * 1024 * 1024;
const DBN_TOOLS_EXTRACT_TEXT_LIMIT = 128000;
const DBN_TOOLS_TIMELINE_EXTRACT_TEXT_LIMIT = 600000;
const DBN_TOOLS_EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx'];
const DBN_TOOLS_EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx', 'xlsx', 'pptx', 'html', 'htm', 'csv', 'md', 'json'];
const DBN_TOOLS_EXTRACT_AUDIO_EXTS = ['mp3', 'wav', 'm4a', 'ogg', 'flac', 'webm'];
const DBN_TOOLS_EXTRACT_IMAGE_EXTS = ['png', 'jpg', 'jpeg', 'webp'];
function dbnToolsExtractUploadedFile(array $file, int $textLimit = DBN_TOOLS_EXTRACT_TEXT_LIMIT): array
{
@@ -922,13 +924,19 @@ function dbnToolsExtractUploadedFile(array $file, int $textLimit = DBN_TOOLS_EXT
$ext = strtolower(pathinfo($originalName, PATHINFO_EXTENSION));
if (!in_array($ext, DBN_TOOLS_EXTRACT_ALLOWED_EXTS, true)) {
dbnToolsAbort('Unsupported file type. Upload a .pdf, .docx, or .txt file.', 422, 'unsupported_type');
$allowed = strtoupper(implode(', .', DBN_TOOLS_EXTRACT_ALLOWED_EXTS));
dbnToolsAbort("Unsupported file type. Allowed: .{$allowed}.", 422, 'unsupported_type');
}
$text = match ($ext) {
'txt' => dbnToolsExtractTxt($tmpPath),
'pdf' => dbnToolsExtractPdf($tmpPath),
'docx' => dbnToolsExtractDocx($tmpPath),
'txt', 'md', 'json' => dbnToolsExtractTxt($tmpPath),
'pdf' => dbnToolsExtractPdf($tmpPath),
'docx' => dbnToolsExtractDocx($tmpPath),
'html', 'htm' => dbnDmsExtractHtml($tmpPath),
'csv' => dbnDmsExtractCsv($tmpPath),
'xlsx' => dbnDmsExtractXlsx($tmpPath),
'pptx' => dbnDmsExtractPptx($tmpPath),
default => dbnToolsExtractTxt($tmpPath),
};
$text = trim($text);
@@ -1370,3 +1378,5 @@ function dbnToolsInjectDocContent(array $input, string $text): string
}
return $docText . ($text !== '' ? "\n\n---\n\n" . $text : '');
}
require_once __DIR__ . '/dms_helpers.php';
+471
View File
@@ -0,0 +1,471 @@
<?php
declare(strict_types=1);
/**
* DMS helpers: folder ACLs, storage paths, audit logging, version bookkeeping.
* Loaded from bootstrap.php so all dashboard pages + APIs get them implicitly.
*/
const DBN_DMS_MAX_FOLDER_DEPTH = 2; // Matches ai-portal app-layer cap. Raise here to unlock deeper nesting.
const DBN_DMS_MAX_VERSIONS_PER_DOC = 20; // Oldest auto-pruned beyond this.
const DBN_DMS_TRASH_RETENTION_DAYS = 30;
const DBN_DMS_DEFAULT_CATEGORIES = [
['slug' => 'uncategorized', 'label' => 'Uncategorized', 'color' => '#94a3b8', 'icon' => 'folder', 'sort_order' => 0],
['slug' => 'legal', 'label' => 'Legal', 'color' => '#1d4ed8', 'icon' => 'scale', 'sort_order' => 10],
['slug' => 'financial', 'label' => 'Financial', 'color' => '#047857', 'icon' => 'chart', 'sort_order' => 20],
['slug' => 'internal', 'label' => 'Internal', 'color' => '#7c3aed', 'icon' => 'building', 'sort_order' => 30],
['slug' => 'hr', 'label' => 'HR', 'color' => '#db2777', 'icon' => 'people', 'sort_order' => 40],
['slug' => 'marketing', 'label' => 'Marketing', 'color' => '#b88a2c', 'icon' => 'megaphone', 'sort_order' => 50],
];
/**
* Resolve the on-disk storage path for an uploaded document.
* Production: /home/dobetternorge/uploads/{client_id}/{document_id}.{ext}
* Local dev: DBN_TOOLS_UPLOAD_ROOT env override, else DBN_TOOLS_ROOT/uploads/...
*/
function dbnDmsStoragePath(int $clientId, int $documentId, string $ext, ?int $versionNumber = null): string
{
$root = dbnToolsEnv('DBN_TOOLS_UPLOAD_ROOT', '');
if ($root === '' || $root === null) {
$root = is_dir('/home/dobetternorge/uploads')
? '/home/dobetternorge/uploads'
: DBN_TOOLS_ROOT . '/uploads';
}
$ext = preg_replace('/[^a-z0-9]/', '', strtolower($ext)) ?: 'bin';
$clientDir = rtrim($root, '/') . '/' . $clientId;
if (!is_dir($clientDir)) {
@mkdir($clientDir, 0750, true);
}
if ($versionNumber !== null && $versionNumber > 0) {
$versionDir = $clientDir . '/' . $documentId . '_versions';
if (!is_dir($versionDir)) {
@mkdir($versionDir, 0750, true);
}
return $versionDir . '/v' . $versionNumber . '.' . $ext;
}
return $clientDir . '/' . $documentId . '.' . $ext;
}
/**
* Stream an uploaded file into permanent storage. Returns the storage_path string,
* or null if persistence is disabled (no upload root and not writable).
*/
function dbnDmsPersistFile(string $tmpPath, int $clientId, int $documentId, string $ext, ?int $versionNumber = null): ?string
{
$dest = dbnDmsStoragePath($clientId, $documentId, $ext, $versionNumber);
$dir = dirname($dest);
if (!is_dir($dir) || !is_writable($dir)) {
return null;
}
if (!@copy($tmpPath, $dest)) {
return null;
}
@chmod($dest, 0640);
return $dest;
}
/**
* Walk the folder tree starting at $folderId upward; returns the chain root→leaf.
* Returns [] if $folderId is null/0 (root).
*/
function dbnDmsFolderChain(?int $folderId, int $clientId): array
{
if (!$folderId) {
return [];
}
$db = dbnToolsDb();
$chain = [];
$current = $folderId;
$guard = 0;
while ($current && $guard++ < 50) {
$stmt = $db->prepare('SELECT id, name, parent_id, color FROM client_folders WHERE id = ? AND client_id = ? AND deleted_at IS NULL');
$stmt->execute([$current, $clientId]);
$row = $stmt->fetch();
if (!$row) {
break;
}
$chain[] = $row;
$current = $row['parent_id'] ? (int)$row['parent_id'] : 0;
}
return array_reverse($chain);
}
/**
* Resolve the breadcrumb for a folder as [{id, name}, …] starting at the root.
* Returns [] when at corpus root.
*/
function dbnDmsBreadcrumb(?int $folderId, int $clientId): array
{
return array_map(fn($r) => ['id' => (int)$r['id'], 'name' => (string)$r['name'], 'color' => $r['color'] ?? null],
dbnDmsFolderChain($folderId, $clientId));
}
/**
* Folder depth, where root-level = 1. Used to enforce DBN_DMS_MAX_FOLDER_DEPTH.
*/
function dbnDmsFolderDepth(?int $folderId, int $clientId): int
{
if (!$folderId) {
return 0;
}
return count(dbnDmsFolderChain($folderId, $clientId));
}
/**
* Check whether the current user can act on $folderId with $perm = 'read'|'write'|'manage'.
* Permission resolution:
* - tenant owner/admin role → always allowed
* - walk folder chain leaf→root; first matching ACL row wins
* - no ACL rows anywhere → open (default)
*/
function dbnDmsUserCanAccessFolder(?int $folderId, string $perm, int $clientId, int $userId, string $tenantRole = 'viewer'): bool
{
// Tenant root is always readable; only manage requires editor+.
if (!$folderId) {
if ($perm === 'manage' || $perm === 'write') {
return in_array($tenantRole, ['editor', 'admin', 'owner'], true);
}
return true;
}
if (in_array($tenantRole, ['admin', 'owner'], true)) {
return true;
}
$db = dbnToolsDb();
$chain = dbnDmsFolderChain($folderId, $clientId);
if (!$chain) {
return false;
}
$col = match ($perm) {
'write' => 'can_write',
'manage' => 'can_manage',
default => 'can_read',
};
$anyAcl = false;
foreach (array_reverse($chain) as $folder) {
$stmt = $db->prepare(
"SELECT min_role, user_id, can_read, can_write, can_manage
FROM client_folder_permissions
WHERE folder_id = ? AND client_id = ?"
);
$stmt->execute([(int)$folder['id'], $clientId]);
$rows = $stmt->fetchAll();
if (!$rows) {
continue;
}
$anyAcl = true;
foreach ($rows as $row) {
if ($row['user_id'] !== null && (int)$row['user_id'] === $userId) {
if ((int)$row[$col] === 1) return true;
}
if ($row['min_role'] !== null) {
if (dbnDmsRoleAtLeast($tenantRole, (string)$row['min_role']) && (int)$row[$col] === 1) {
return true;
}
}
}
// ACL at this level but user not granted — block (no inheritance past explicit restriction).
return false;
}
// No ACL rows anywhere → open per migration 119 convention.
return !$anyAcl;
}
function dbnDmsRoleAtLeast(string $userRole, string $minRole): bool
{
$rank = ['viewer' => 0, 'editor' => 1, 'admin' => 2, 'owner' => 3];
return ($rank[$userRole] ?? 0) >= ($rank[$minRole] ?? 0);
}
/**
* Append an audit row. Failure is swallowed — auditing must never break the request.
*/
function dbnDmsLogAudit(int $clientId, ?int $userId, string $action, array $details = [], ?int $documentId = null, ?int $folderId = null): void
{
try {
$db = dbnToolsDb();
$stmt = $db->prepare(
'INSERT INTO client_document_audit (client_id, user_id, document_id, folder_id, action, details, ip_addr, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, NOW())'
);
$stmt->execute([
$clientId,
$userId ?: null,
$documentId ?: null,
$folderId ?: null,
substr($action, 0, 40),
$details ? json_encode($details, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) : null,
substr((string)($_SERVER['REMOTE_ADDR'] ?? ''), 0, 45),
]);
} catch (Throwable $e) {
error_log('[dbn-dms] audit insert failed: ' . $e->getMessage());
}
}
/**
* Seed default categories for a tenant if their dictionary is empty.
* Idempotent — safe to call on every dashboard page load.
*/
function dbnDmsSeedDefaultCategoriesIfEmpty(int $clientId): void
{
try {
$db = dbnToolsDb();
$check = $db->prepare('SELECT COUNT(*) FROM client_categories WHERE client_id = ?');
$check->execute([$clientId]);
if ((int)$check->fetchColumn() > 0) {
return;
}
$ins = $db->prepare(
'INSERT INTO client_categories (client_id, slug, label, color, icon, sort_order, is_system)
VALUES (?, ?, ?, ?, ?, ?, 1)'
);
foreach (DBN_DMS_DEFAULT_CATEGORIES as $cat) {
$ins->execute([
$clientId,
$cat['slug'],
$cat['label'],
$cat['color'],
$cat['icon'],
$cat['sort_order'],
]);
}
} catch (Throwable $e) {
// Likely table doesn't exist yet (migration not applied).
error_log('[dbn-dms] seed categories failed: ' . $e->getMessage());
}
}
/**
* Snapshot a document into client_document_versions before overwriting.
* Returns the new version_number, or 0 on failure.
*/
function dbnDmsSnapshotVersion(int $documentId, int $clientId, ?int $userId, ?string $notes = null): int
{
$db = dbnToolsDb();
$doc = $db->prepare('SELECT * FROM client_documents WHERE id = ? AND client_id = ?');
$doc->execute([$documentId, $clientId]);
$row = $doc->fetch();
if (!$row) {
return 0;
}
$next = (int)($row['current_version'] ?? 1);
$ins = $db->prepare(
'INSERT INTO client_document_versions
(document_id, client_id, version_number, title, content, file_size_bytes,
original_filename, storage_path, word_count, uploaded_by, notes, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NOW())'
);
$ins->execute([
$documentId,
$clientId,
$next,
(string)$row['title'],
(string)($row['content'] ?? ''),
(int)($row['file_size_bytes'] ?? 0),
$row['original_filename'] ?? null,
$row['storage_path'] ?? null,
(int)($row['word_count'] ?? 0),
$userId ?: null,
$notes,
]);
// Prune oldest versions beyond cap.
$count = $db->prepare('SELECT COUNT(*) FROM client_document_versions WHERE document_id = ?');
$count->execute([$documentId]);
$total = (int)$count->fetchColumn();
if ($total > DBN_DMS_MAX_VERSIONS_PER_DOC) {
$prune = $db->prepare(
'DELETE FROM client_document_versions
WHERE document_id = ?
ORDER BY version_number ASC
LIMIT ' . ($total - DBN_DMS_MAX_VERSIONS_PER_DOC)
);
$prune->execute([$documentId]);
}
return $next;
}
/**
* Convenience: file extension from an upload array (original_filename) or filename string.
*/
function dbnDmsExtensionFromFilename(string $filename): string
{
$dot = strrpos($filename, '.');
if ($dot === false) {
return '';
}
return strtolower(substr($filename, $dot + 1));
}
/**
* Extract plain text from HTML (strip tags, decode entities).
*/
function dbnDmsExtractHtml(string $path): string
{
$raw = file_get_contents($path);
if ($raw === false) {
throw new DbnToolsHttpException('Unable to read HTML file.', 500, 'read_error');
}
$raw = mb_convert_encoding($raw, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252');
$raw = preg_replace('#<script\b[^>]*>.*?</script>#is', '', $raw) ?? $raw;
$raw = preg_replace('#<style\b[^>]*>.*?</style>#is', '', $raw) ?? $raw;
$text = trim(html_entity_decode(strip_tags($raw), ENT_QUOTES | ENT_HTML5, 'UTF-8'));
return preg_replace("/[\r\n]{3,}/", "\n\n", $text) ?? $text;
}
/**
* Extract CSV as readable text (header row repeated each line for context).
*/
function dbnDmsExtractCsv(string $path): string
{
$fh = @fopen($path, 'rb');
if (!$fh) {
throw new DbnToolsHttpException('Unable to read CSV file.', 500, 'read_error');
}
$lines = [];
$header = null;
$rowNum = 0;
while (($row = fgetcsv($fh, 0, ',', '"', '\\')) !== false) {
$row = array_map(fn($c) => (string)$c, $row);
if ($header === null) {
$header = $row;
$lines[] = implode(' | ', $header);
continue;
}
$pairs = [];
foreach ($row as $i => $cell) {
$col = $header[$i] ?? "col{$i}";
if ($cell !== '') {
$pairs[] = $col . ': ' . $cell;
}
}
$lines[] = '- ' . implode('; ', $pairs);
if (++$rowNum > 5000) {
$lines[] = '... (truncated)';
break;
}
}
fclose($fh);
return implode("\n", $lines);
}
/**
* Extract plain text from XLSX (concatenate sharedStrings + cell values).
* Lightweight — no PhpSpreadsheet dependency.
*/
function dbnDmsExtractXlsx(string $path): string
{
$zip = new ZipArchive();
if ($zip->open($path) !== true) {
throw new DbnToolsHttpException('Unable to open XLSX file.', 422, 'xlsx_open_failed');
}
$shared = [];
$sharedXml = $zip->getFromName('xl/sharedStrings.xml');
if ($sharedXml !== false) {
if (preg_match_all('#<t[^>]*>(.*?)</t>#s', $sharedXml, $m)) {
foreach ($m[1] as $s) {
$shared[] = html_entity_decode(strip_tags($s), ENT_QUOTES | ENT_XML1, 'UTF-8');
}
}
}
$out = [];
for ($i = 1; $i < 100; $i++) {
$sheet = $zip->getFromName("xl/worksheets/sheet{$i}.xml");
if ($sheet === false) break;
// Inline strings + numeric/text values.
if (preg_match_all('#<c\b[^>]*?(?:\s+t="([^"]*)")?[^>]*>(.*?)</c>#s', $sheet, $m, PREG_SET_ORDER)) {
$cells = [];
foreach ($m as $cell) {
$type = $cell[1] ?? '';
$inner = $cell[2];
if ($type === 's') {
if (preg_match('#<v>(\d+)</v>#', $inner, $vm)) {
$idx = (int)$vm[1];
if (isset($shared[$idx])) $cells[] = $shared[$idx];
}
} elseif ($type === 'inlineStr') {
if (preg_match('#<t[^>]*>(.*?)</t>#s', $inner, $tm)) {
$cells[] = html_entity_decode(strip_tags($tm[1]), ENT_QUOTES | ENT_XML1, 'UTF-8');
}
} else {
if (preg_match('#<v>(.*?)</v>#', $inner, $vm)) {
$cells[] = $vm[1];
}
}
}
$out[] = "=== Sheet {$i} ===\n" . implode("\t", $cells);
}
}
$zip->close();
$text = implode("\n\n", $out);
if (trim($text) === '') {
throw new DbnToolsHttpException('No readable content in XLSX.', 422, 'xlsx_empty');
}
return $text;
}
/**
* Extract plain text from PPTX (slide notes + text frames).
*/
function dbnDmsExtractPptx(string $path): string
{
$zip = new ZipArchive();
if ($zip->open($path) !== true) {
throw new DbnToolsHttpException('Unable to open PPTX file.', 422, 'pptx_open_failed');
}
$slides = [];
for ($i = 1; $i < 500; $i++) {
$xml = $zip->getFromName("ppt/slides/slide{$i}.xml");
if ($xml === false) break;
$text = [];
if (preg_match_all('#<a:t[^>]*>(.*?)</a:t>#s', $xml, $m)) {
foreach ($m[1] as $t) {
$text[] = html_entity_decode(strip_tags($t), ENT_QUOTES | ENT_XML1, 'UTF-8');
}
}
if ($text) {
$slides[] = "=== Slide {$i} ===\n" . implode("\n", $text);
}
}
$zip->close();
if (!$slides) {
throw new DbnToolsHttpException('No readable content in PPTX.', 422, 'pptx_empty');
}
return implode("\n\n", $slides);
}
/**
* Convenience: MIME type → safe content type for inline preview/download streaming.
*/
function dbnDmsContentTypeForExt(string $ext): string
{
return match (strtolower($ext)) {
'pdf' => 'application/pdf',
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'txt' => 'text/plain; charset=utf-8',
'md' => 'text/markdown; charset=utf-8',
'csv' => 'text/csv; charset=utf-8',
'html', 'htm' => 'text/html; charset=utf-8',
'json' => 'application/json',
'mp3' => 'audio/mpeg',
'wav' => 'audio/wav',
'm4a' => 'audio/mp4',
'ogg' => 'audio/ogg',
'png' => 'image/png',
'jpg', 'jpeg' => 'image/jpeg',
'webp' => 'image/webp',
default => 'application/octet-stream',
};
}
+4
View File
@@ -45,8 +45,10 @@ if ($dashAuthUser !== null) {
$dashboardNav = [
'index' => ['url' => '/dashboard/', 'label' => dbnToolsT('dash_nav_overview', $uiLang), 'sub' => 'Overview'],
'documents' => ['url' => '/dashboard/documents.php', 'label' => dbnToolsT('dash_nav_documents', $uiLang), 'sub' => 'Documents'],
'folders' => ['url' => '/dashboard/folders.php', 'label' => dbnToolsT('dash_nav_folders', $uiLang) ?: 'Folders', 'sub' => 'Folder tree & access'],
'upload' => ['url' => '/dashboard/upload.php', 'label' => dbnToolsT('dash_nav_upload', $uiLang), 'sub' => 'Upload'],
'chat' => ['url' => '/dashboard/chat.php', 'label' => dbnToolsT('dash_nav_ask', $uiLang), 'sub' => 'Ask'],
'trash' => ['url' => '/dashboard/trash.php', 'label' => dbnToolsT('dash_nav_trash', $uiLang) ?: 'Trash', 'sub' => 'Restore or purge'],
'settings' => ['url' => '/dashboard/settings.php', 'label' => dbnToolsT('dash_nav_settings', $uiLang), 'sub' => 'Settings'],
];
?>
@@ -59,6 +61,8 @@ $dashboardNav = [
<link rel="stylesheet" href="../assets/css/tools.css">
<link rel="stylesheet" href="../assets/css/dashboard.css">
<link rel="stylesheet" href="../assets/css/dbn-tools-redesign.css">
<link rel="stylesheet" href="../assets/css/dms.css">
<script src="../assets/js/dashboard/dms.js" defer></script>
</head>
<body data-authenticated="true" data-dashboard-page="<?= htmlspecialchars($dashboardPage) ?>">
<script>