2e2b0b45fa
Rebuild the dashboard as a Drive-style document management system on top of the existing CaveauAI hybrid RAG pipeline. Backend: - 5 migrations (versions, trash soft-delete, saved searches, categories, audit) - DMS helpers (folder ACL walker, disk storage, audit, version snapshot, XLSX/PPTX/HTML/CSV/MD extractors) - New APIs: folders, document-versions, trash, bulk, preview, saved-searches, categories, diagnostics - Extended APIs: documents (folder_id, soft-delete, ACL filter, sort), upload (9 file types, version-collision detection with replace/new/keep-both, disk persistence), chat-stream (folder scoping + graph related-documents) - 30-day trash purge cron with Qdrant + disk + graph cleanup Frontend: - Drive-style two-pane browser with folder tree, drag-drop, bulk-action bar, right-click context menu, multi-select - New pages: folders (tree + per-folder ACL editor), trash (restore/purge) - Extended pages: upload (folder picker, version-collision modal, 9 file type chips), document (Preview/Versions/Permissions tabs with PDF.js + mammoth.js + audio), index (DMS KPIs + activity feed), settings (live diagnostics ping MariaDB/Qdrant/LiteLLM/FalkorDB/disk), chat (folder scope chips + related-authorities chips) - New CSS (dms.css) + JS bundle (dms.js) exposing window.DBN_DMS - Sidebar nav adds Folders + Trash items All routes return HTTP 200 in local smoke test; all 32 files lint clean. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
472 lines
16 KiB
PHP
472 lines
16 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
/**
|
|
* DMS helpers: folder ACLs, storage paths, audit logging, version bookkeeping.
|
|
* Loaded from bootstrap.php so all dashboard pages + APIs get them implicitly.
|
|
*/
|
|
|
|
const DBN_DMS_MAX_FOLDER_DEPTH = 2; // Matches ai-portal app-layer cap. Raise here to unlock deeper nesting.
|
|
const DBN_DMS_MAX_VERSIONS_PER_DOC = 20; // Oldest auto-pruned beyond this.
|
|
const DBN_DMS_TRASH_RETENTION_DAYS = 30;
|
|
const DBN_DMS_DEFAULT_CATEGORIES = [
|
|
['slug' => 'uncategorized', 'label' => 'Uncategorized', 'color' => '#94a3b8', 'icon' => 'folder', 'sort_order' => 0],
|
|
['slug' => 'legal', 'label' => 'Legal', 'color' => '#1d4ed8', 'icon' => 'scale', 'sort_order' => 10],
|
|
['slug' => 'financial', 'label' => 'Financial', 'color' => '#047857', 'icon' => 'chart', 'sort_order' => 20],
|
|
['slug' => 'internal', 'label' => 'Internal', 'color' => '#7c3aed', 'icon' => 'building', 'sort_order' => 30],
|
|
['slug' => 'hr', 'label' => 'HR', 'color' => '#db2777', 'icon' => 'people', 'sort_order' => 40],
|
|
['slug' => 'marketing', 'label' => 'Marketing', 'color' => '#b88a2c', 'icon' => 'megaphone', 'sort_order' => 50],
|
|
];
|
|
|
|
/**
|
|
* Resolve the on-disk storage path for an uploaded document.
|
|
* Production: /home/dobetternorge/uploads/{client_id}/{document_id}.{ext}
|
|
* Local dev: DBN_TOOLS_UPLOAD_ROOT env override, else DBN_TOOLS_ROOT/uploads/...
|
|
*/
|
|
function dbnDmsStoragePath(int $clientId, int $documentId, string $ext, ?int $versionNumber = null): string
|
|
{
|
|
$root = dbnToolsEnv('DBN_TOOLS_UPLOAD_ROOT', '');
|
|
if ($root === '' || $root === null) {
|
|
$root = is_dir('/home/dobetternorge/uploads')
|
|
? '/home/dobetternorge/uploads'
|
|
: DBN_TOOLS_ROOT . '/uploads';
|
|
}
|
|
|
|
$ext = preg_replace('/[^a-z0-9]/', '', strtolower($ext)) ?: 'bin';
|
|
$clientDir = rtrim($root, '/') . '/' . $clientId;
|
|
if (!is_dir($clientDir)) {
|
|
@mkdir($clientDir, 0750, true);
|
|
}
|
|
|
|
if ($versionNumber !== null && $versionNumber > 0) {
|
|
$versionDir = $clientDir . '/' . $documentId . '_versions';
|
|
if (!is_dir($versionDir)) {
|
|
@mkdir($versionDir, 0750, true);
|
|
}
|
|
return $versionDir . '/v' . $versionNumber . '.' . $ext;
|
|
}
|
|
|
|
return $clientDir . '/' . $documentId . '.' . $ext;
|
|
}
|
|
|
|
/**
|
|
* Stream an uploaded file into permanent storage. Returns the storage_path string,
|
|
* or null if persistence is disabled (no upload root and not writable).
|
|
*/
|
|
function dbnDmsPersistFile(string $tmpPath, int $clientId, int $documentId, string $ext, ?int $versionNumber = null): ?string
|
|
{
|
|
$dest = dbnDmsStoragePath($clientId, $documentId, $ext, $versionNumber);
|
|
$dir = dirname($dest);
|
|
if (!is_dir($dir) || !is_writable($dir)) {
|
|
return null;
|
|
}
|
|
if (!@copy($tmpPath, $dest)) {
|
|
return null;
|
|
}
|
|
@chmod($dest, 0640);
|
|
return $dest;
|
|
}
|
|
|
|
/**
|
|
* Walk the folder tree starting at $folderId upward; returns the chain root→leaf.
|
|
* Returns [] if $folderId is null/0 (root).
|
|
*/
|
|
function dbnDmsFolderChain(?int $folderId, int $clientId): array
|
|
{
|
|
if (!$folderId) {
|
|
return [];
|
|
}
|
|
$db = dbnToolsDb();
|
|
$chain = [];
|
|
$current = $folderId;
|
|
$guard = 0;
|
|
while ($current && $guard++ < 50) {
|
|
$stmt = $db->prepare('SELECT id, name, parent_id, color FROM client_folders WHERE id = ? AND client_id = ? AND deleted_at IS NULL');
|
|
$stmt->execute([$current, $clientId]);
|
|
$row = $stmt->fetch();
|
|
if (!$row) {
|
|
break;
|
|
}
|
|
$chain[] = $row;
|
|
$current = $row['parent_id'] ? (int)$row['parent_id'] : 0;
|
|
}
|
|
return array_reverse($chain);
|
|
}
|
|
|
|
/**
|
|
* Resolve the breadcrumb for a folder as [{id, name}, …] starting at the root.
|
|
* Returns [] when at corpus root.
|
|
*/
|
|
function dbnDmsBreadcrumb(?int $folderId, int $clientId): array
|
|
{
|
|
return array_map(fn($r) => ['id' => (int)$r['id'], 'name' => (string)$r['name'], 'color' => $r['color'] ?? null],
|
|
dbnDmsFolderChain($folderId, $clientId));
|
|
}
|
|
|
|
/**
|
|
* Folder depth, where root-level = 1. Used to enforce DBN_DMS_MAX_FOLDER_DEPTH.
|
|
*/
|
|
function dbnDmsFolderDepth(?int $folderId, int $clientId): int
|
|
{
|
|
if (!$folderId) {
|
|
return 0;
|
|
}
|
|
return count(dbnDmsFolderChain($folderId, $clientId));
|
|
}
|
|
|
|
/**
|
|
* Check whether the current user can act on $folderId with $perm = 'read'|'write'|'manage'.
|
|
* Permission resolution:
|
|
* - tenant owner/admin role → always allowed
|
|
* - walk folder chain leaf→root; first matching ACL row wins
|
|
* - no ACL rows anywhere → open (default)
|
|
*/
|
|
function dbnDmsUserCanAccessFolder(?int $folderId, string $perm, int $clientId, int $userId, string $tenantRole = 'viewer'): bool
|
|
{
|
|
// Tenant root is always readable; only manage requires editor+.
|
|
if (!$folderId) {
|
|
if ($perm === 'manage' || $perm === 'write') {
|
|
return in_array($tenantRole, ['editor', 'admin', 'owner'], true);
|
|
}
|
|
return true;
|
|
}
|
|
if (in_array($tenantRole, ['admin', 'owner'], true)) {
|
|
return true;
|
|
}
|
|
|
|
$db = dbnToolsDb();
|
|
$chain = dbnDmsFolderChain($folderId, $clientId);
|
|
if (!$chain) {
|
|
return false;
|
|
}
|
|
|
|
$col = match ($perm) {
|
|
'write' => 'can_write',
|
|
'manage' => 'can_manage',
|
|
default => 'can_read',
|
|
};
|
|
|
|
$anyAcl = false;
|
|
foreach (array_reverse($chain) as $folder) {
|
|
$stmt = $db->prepare(
|
|
"SELECT min_role, user_id, can_read, can_write, can_manage
|
|
FROM client_folder_permissions
|
|
WHERE folder_id = ? AND client_id = ?"
|
|
);
|
|
$stmt->execute([(int)$folder['id'], $clientId]);
|
|
$rows = $stmt->fetchAll();
|
|
if (!$rows) {
|
|
continue;
|
|
}
|
|
$anyAcl = true;
|
|
|
|
foreach ($rows as $row) {
|
|
if ($row['user_id'] !== null && (int)$row['user_id'] === $userId) {
|
|
if ((int)$row[$col] === 1) return true;
|
|
}
|
|
if ($row['min_role'] !== null) {
|
|
if (dbnDmsRoleAtLeast($tenantRole, (string)$row['min_role']) && (int)$row[$col] === 1) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
// ACL at this level but user not granted — block (no inheritance past explicit restriction).
|
|
return false;
|
|
}
|
|
|
|
// No ACL rows anywhere → open per migration 119 convention.
|
|
return !$anyAcl;
|
|
}
|
|
|
|
function dbnDmsRoleAtLeast(string $userRole, string $minRole): bool
|
|
{
|
|
$rank = ['viewer' => 0, 'editor' => 1, 'admin' => 2, 'owner' => 3];
|
|
return ($rank[$userRole] ?? 0) >= ($rank[$minRole] ?? 0);
|
|
}
|
|
|
|
/**
|
|
* Append an audit row. Failure is swallowed — auditing must never break the request.
|
|
*/
|
|
function dbnDmsLogAudit(int $clientId, ?int $userId, string $action, array $details = [], ?int $documentId = null, ?int $folderId = null): void
|
|
{
|
|
try {
|
|
$db = dbnToolsDb();
|
|
$stmt = $db->prepare(
|
|
'INSERT INTO client_document_audit (client_id, user_id, document_id, folder_id, action, details, ip_addr, created_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, NOW())'
|
|
);
|
|
$stmt->execute([
|
|
$clientId,
|
|
$userId ?: null,
|
|
$documentId ?: null,
|
|
$folderId ?: null,
|
|
substr($action, 0, 40),
|
|
$details ? json_encode($details, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) : null,
|
|
substr((string)($_SERVER['REMOTE_ADDR'] ?? ''), 0, 45),
|
|
]);
|
|
} catch (Throwable $e) {
|
|
error_log('[dbn-dms] audit insert failed: ' . $e->getMessage());
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Seed default categories for a tenant if their dictionary is empty.
|
|
* Idempotent — safe to call on every dashboard page load.
|
|
*/
|
|
function dbnDmsSeedDefaultCategoriesIfEmpty(int $clientId): void
|
|
{
|
|
try {
|
|
$db = dbnToolsDb();
|
|
$check = $db->prepare('SELECT COUNT(*) FROM client_categories WHERE client_id = ?');
|
|
$check->execute([$clientId]);
|
|
if ((int)$check->fetchColumn() > 0) {
|
|
return;
|
|
}
|
|
$ins = $db->prepare(
|
|
'INSERT INTO client_categories (client_id, slug, label, color, icon, sort_order, is_system)
|
|
VALUES (?, ?, ?, ?, ?, ?, 1)'
|
|
);
|
|
foreach (DBN_DMS_DEFAULT_CATEGORIES as $cat) {
|
|
$ins->execute([
|
|
$clientId,
|
|
$cat['slug'],
|
|
$cat['label'],
|
|
$cat['color'],
|
|
$cat['icon'],
|
|
$cat['sort_order'],
|
|
]);
|
|
}
|
|
} catch (Throwable $e) {
|
|
// Likely table doesn't exist yet (migration not applied).
|
|
error_log('[dbn-dms] seed categories failed: ' . $e->getMessage());
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Snapshot a document into client_document_versions before overwriting.
|
|
* Returns the new version_number, or 0 on failure.
|
|
*/
|
|
function dbnDmsSnapshotVersion(int $documentId, int $clientId, ?int $userId, ?string $notes = null): int
|
|
{
|
|
$db = dbnToolsDb();
|
|
$doc = $db->prepare('SELECT * FROM client_documents WHERE id = ? AND client_id = ?');
|
|
$doc->execute([$documentId, $clientId]);
|
|
$row = $doc->fetch();
|
|
if (!$row) {
|
|
return 0;
|
|
}
|
|
|
|
$next = (int)($row['current_version'] ?? 1);
|
|
|
|
$ins = $db->prepare(
|
|
'INSERT INTO client_document_versions
|
|
(document_id, client_id, version_number, title, content, file_size_bytes,
|
|
original_filename, storage_path, word_count, uploaded_by, notes, created_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NOW())'
|
|
);
|
|
$ins->execute([
|
|
$documentId,
|
|
$clientId,
|
|
$next,
|
|
(string)$row['title'],
|
|
(string)($row['content'] ?? ''),
|
|
(int)($row['file_size_bytes'] ?? 0),
|
|
$row['original_filename'] ?? null,
|
|
$row['storage_path'] ?? null,
|
|
(int)($row['word_count'] ?? 0),
|
|
$userId ?: null,
|
|
$notes,
|
|
]);
|
|
|
|
// Prune oldest versions beyond cap.
|
|
$count = $db->prepare('SELECT COUNT(*) FROM client_document_versions WHERE document_id = ?');
|
|
$count->execute([$documentId]);
|
|
$total = (int)$count->fetchColumn();
|
|
if ($total > DBN_DMS_MAX_VERSIONS_PER_DOC) {
|
|
$prune = $db->prepare(
|
|
'DELETE FROM client_document_versions
|
|
WHERE document_id = ?
|
|
ORDER BY version_number ASC
|
|
LIMIT ' . ($total - DBN_DMS_MAX_VERSIONS_PER_DOC)
|
|
);
|
|
$prune->execute([$documentId]);
|
|
}
|
|
|
|
return $next;
|
|
}
|
|
|
|
/**
|
|
* Convenience: file extension from an upload array (original_filename) or filename string.
|
|
*/
|
|
function dbnDmsExtensionFromFilename(string $filename): string
|
|
{
|
|
$dot = strrpos($filename, '.');
|
|
if ($dot === false) {
|
|
return '';
|
|
}
|
|
return strtolower(substr($filename, $dot + 1));
|
|
}
|
|
|
|
/**
|
|
* Extract plain text from HTML (strip tags, decode entities).
|
|
*/
|
|
function dbnDmsExtractHtml(string $path): string
|
|
{
|
|
$raw = file_get_contents($path);
|
|
if ($raw === false) {
|
|
throw new DbnToolsHttpException('Unable to read HTML file.', 500, 'read_error');
|
|
}
|
|
$raw = mb_convert_encoding($raw, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252');
|
|
$raw = preg_replace('#<script\b[^>]*>.*?</script>#is', '', $raw) ?? $raw;
|
|
$raw = preg_replace('#<style\b[^>]*>.*?</style>#is', '', $raw) ?? $raw;
|
|
$text = trim(html_entity_decode(strip_tags($raw), ENT_QUOTES | ENT_HTML5, 'UTF-8'));
|
|
return preg_replace("/[\r\n]{3,}/", "\n\n", $text) ?? $text;
|
|
}
|
|
|
|
/**
|
|
* Extract CSV as readable text (header row repeated each line for context).
|
|
*/
|
|
function dbnDmsExtractCsv(string $path): string
|
|
{
|
|
$fh = @fopen($path, 'rb');
|
|
if (!$fh) {
|
|
throw new DbnToolsHttpException('Unable to read CSV file.', 500, 'read_error');
|
|
}
|
|
$lines = [];
|
|
$header = null;
|
|
$rowNum = 0;
|
|
while (($row = fgetcsv($fh, 0, ',', '"', '\\')) !== false) {
|
|
$row = array_map(fn($c) => (string)$c, $row);
|
|
if ($header === null) {
|
|
$header = $row;
|
|
$lines[] = implode(' | ', $header);
|
|
continue;
|
|
}
|
|
$pairs = [];
|
|
foreach ($row as $i => $cell) {
|
|
$col = $header[$i] ?? "col{$i}";
|
|
if ($cell !== '') {
|
|
$pairs[] = $col . ': ' . $cell;
|
|
}
|
|
}
|
|
$lines[] = '- ' . implode('; ', $pairs);
|
|
if (++$rowNum > 5000) {
|
|
$lines[] = '... (truncated)';
|
|
break;
|
|
}
|
|
}
|
|
fclose($fh);
|
|
return implode("\n", $lines);
|
|
}
|
|
|
|
/**
|
|
* Extract plain text from XLSX (concatenate sharedStrings + cell values).
|
|
* Lightweight — no PhpSpreadsheet dependency.
|
|
*/
|
|
function dbnDmsExtractXlsx(string $path): string
|
|
{
|
|
$zip = new ZipArchive();
|
|
if ($zip->open($path) !== true) {
|
|
throw new DbnToolsHttpException('Unable to open XLSX file.', 422, 'xlsx_open_failed');
|
|
}
|
|
$shared = [];
|
|
$sharedXml = $zip->getFromName('xl/sharedStrings.xml');
|
|
if ($sharedXml !== false) {
|
|
if (preg_match_all('#<t[^>]*>(.*?)</t>#s', $sharedXml, $m)) {
|
|
foreach ($m[1] as $s) {
|
|
$shared[] = html_entity_decode(strip_tags($s), ENT_QUOTES | ENT_XML1, 'UTF-8');
|
|
}
|
|
}
|
|
}
|
|
$out = [];
|
|
for ($i = 1; $i < 100; $i++) {
|
|
$sheet = $zip->getFromName("xl/worksheets/sheet{$i}.xml");
|
|
if ($sheet === false) break;
|
|
// Inline strings + numeric/text values.
|
|
if (preg_match_all('#<c\b[^>]*?(?:\s+t="([^"]*)")?[^>]*>(.*?)</c>#s', $sheet, $m, PREG_SET_ORDER)) {
|
|
$cells = [];
|
|
foreach ($m as $cell) {
|
|
$type = $cell[1] ?? '';
|
|
$inner = $cell[2];
|
|
if ($type === 's') {
|
|
if (preg_match('#<v>(\d+)</v>#', $inner, $vm)) {
|
|
$idx = (int)$vm[1];
|
|
if (isset($shared[$idx])) $cells[] = $shared[$idx];
|
|
}
|
|
} elseif ($type === 'inlineStr') {
|
|
if (preg_match('#<t[^>]*>(.*?)</t>#s', $inner, $tm)) {
|
|
$cells[] = html_entity_decode(strip_tags($tm[1]), ENT_QUOTES | ENT_XML1, 'UTF-8');
|
|
}
|
|
} else {
|
|
if (preg_match('#<v>(.*?)</v>#', $inner, $vm)) {
|
|
$cells[] = $vm[1];
|
|
}
|
|
}
|
|
}
|
|
$out[] = "=== Sheet {$i} ===\n" . implode("\t", $cells);
|
|
}
|
|
}
|
|
$zip->close();
|
|
$text = implode("\n\n", $out);
|
|
if (trim($text) === '') {
|
|
throw new DbnToolsHttpException('No readable content in XLSX.', 422, 'xlsx_empty');
|
|
}
|
|
return $text;
|
|
}
|
|
|
|
/**
|
|
* Extract plain text from PPTX (slide notes + text frames).
|
|
*/
|
|
function dbnDmsExtractPptx(string $path): string
|
|
{
|
|
$zip = new ZipArchive();
|
|
if ($zip->open($path) !== true) {
|
|
throw new DbnToolsHttpException('Unable to open PPTX file.', 422, 'pptx_open_failed');
|
|
}
|
|
$slides = [];
|
|
for ($i = 1; $i < 500; $i++) {
|
|
$xml = $zip->getFromName("ppt/slides/slide{$i}.xml");
|
|
if ($xml === false) break;
|
|
$text = [];
|
|
if (preg_match_all('#<a:t[^>]*>(.*?)</a:t>#s', $xml, $m)) {
|
|
foreach ($m[1] as $t) {
|
|
$text[] = html_entity_decode(strip_tags($t), ENT_QUOTES | ENT_XML1, 'UTF-8');
|
|
}
|
|
}
|
|
if ($text) {
|
|
$slides[] = "=== Slide {$i} ===\n" . implode("\n", $text);
|
|
}
|
|
}
|
|
$zip->close();
|
|
if (!$slides) {
|
|
throw new DbnToolsHttpException('No readable content in PPTX.', 422, 'pptx_empty');
|
|
}
|
|
return implode("\n\n", $slides);
|
|
}
|
|
|
|
/**
|
|
* Convenience: MIME type → safe content type for inline preview/download streaming.
|
|
*/
|
|
function dbnDmsContentTypeForExt(string $ext): string
|
|
{
|
|
return match (strtolower($ext)) {
|
|
'pdf' => 'application/pdf',
|
|
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
'pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
'txt' => 'text/plain; charset=utf-8',
|
|
'md' => 'text/markdown; charset=utf-8',
|
|
'csv' => 'text/csv; charset=utf-8',
|
|
'html', 'htm' => 'text/html; charset=utf-8',
|
|
'json' => 'application/json',
|
|
'mp3' => 'audio/mpeg',
|
|
'wav' => 'audio/wav',
|
|
'm4a' => 'audio/mp4',
|
|
'ogg' => 'audio/ogg',
|
|
'png' => 'image/png',
|
|
'jpg', 'jpeg' => 'image/jpeg',
|
|
'webp' => 'image/webp',
|
|
default => 'application/octet-stream',
|
|
};
|
|
}
|