feat(dashboard): add corpus dashboard at /dashboard/
Full private corpus dashboard for tools.dobetternorge.no users — each SSO
account gets an auto-provisioned CaveauAI tenant (clients row, corpus) on
first visit. Includes upload (file/paste/URL), RAG chat with SSE streaming
and citation chips, document CRUD, FalkorDB graph relations tab, and
improved save-from-tool flow with tag/preview support.
- dashboard/{index,documents,document,upload,chat,settings}.php
- api/dashboard/{corpus-init,documents,upload,ingest-status,chat-stream,
save-from-tool,graph}.php
- includes/{CorpusProvision,layout_dashboard,layout_dashboard_footer}.php
- assets/css/dashboard.css assets/js/corpus-save.js (routing upgrade)
- includes/{bootstrap,layout}.php extended for dashboard provisioning
Migration 141 (clients.dbn_sso_uid + import_method enum) applied on chloe.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,110 @@
|
||||
<?php
|
||||
/**
|
||||
* POST /api/dashboard/chat-stream.php (SSE)
|
||||
*
|
||||
* Streams a RAG chat answer using the user's private corpus + the dobetter
|
||||
* legal package. Each output token is delivered as an SSE event named "token".
|
||||
* On completion, sources, chunks_used, model, and elapsed_ms are sent as a
|
||||
* "done" event. Errors are sent as a "fail" event.
|
||||
*
|
||||
* Request body (JSON):
|
||||
* {
|
||||
* "question": "Hva sier barnevernloven § 4-12?",
|
||||
* "history": [{role:"user"|"assistant", content:"..."}], // optional, capped at 8
|
||||
* "category": "barnevern" (optional),
|
||||
* "language": "no" | "en" (optional, default no)
|
||||
* }
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once dirname(__DIR__, 2) . '/includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
try {
|
||||
$tenant = dbnToolsEnsureDashboardTenant();
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode);
|
||||
}
|
||||
$clientId = (int)$tenant['client_id'];
|
||||
|
||||
$input = dbnToolsJsonInput(80_000);
|
||||
$question = trim((string)($input['question'] ?? ''));
|
||||
if ($question === '') {
|
||||
dbnToolsError('question is required.', 400, 'missing_question');
|
||||
}
|
||||
if (mb_strlen($question, 'UTF-8') > 4000) {
|
||||
dbnToolsError('question is too long (max 4000 chars).', 422, 'question_too_long');
|
||||
}
|
||||
|
||||
$history = is_array($input['history'] ?? null) ? $input['history'] : [];
|
||||
$history = array_slice($history, -8);
|
||||
$history = array_values(array_filter($history, fn($m) => is_array($m)
|
||||
&& in_array($m['role'] ?? '', ['user', 'assistant'], true)
|
||||
&& is_string($m['content'] ?? null)));
|
||||
|
||||
$category = trim((string)($input['category'] ?? '')) ?: null;
|
||||
$language = in_array($input['language'] ?? 'no', ['no', 'en'], true) ? $input['language'] : 'no';
|
||||
|
||||
// SSE setup
|
||||
header('Content-Type: text/event-stream');
|
||||
header('Cache-Control: no-cache, no-transform');
|
||||
header('X-Accel-Buffering: no');
|
||||
@ini_set('output_buffering', 'off');
|
||||
@ini_set('zlib.output_compression', '0');
|
||||
while (ob_get_level() > 0) ob_end_flush();
|
||||
ob_implicit_flush(true);
|
||||
|
||||
function sseEmit(string $event, array $data): void {
|
||||
echo "event: {$event}\n";
|
||||
echo 'data: ' . json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) . "\n\n";
|
||||
if (function_exists('flush')) @flush();
|
||||
}
|
||||
|
||||
dbnToolsBootCaveau();
|
||||
|
||||
try {
|
||||
$rag = new ClientRagPipeline($clientId);
|
||||
|
||||
$options = [
|
||||
'conversation_history' => $history,
|
||||
'language' => $language,
|
||||
'user_id' => (int)($tenant['client_user_id'] ?? 0),
|
||||
'user_role' => 'owner',
|
||||
];
|
||||
|
||||
$result = $rag->askStreaming(
|
||||
$question,
|
||||
null, // model: let pipeline choose default
|
||||
$category,
|
||||
$options,
|
||||
function (string $chunk): void {
|
||||
if ($chunk !== '') sseEmit('token', ['t' => $chunk]);
|
||||
}
|
||||
);
|
||||
|
||||
$sources = [];
|
||||
foreach (($result['fullChunks'] ?? $result['chunks'] ?? []) as $c) {
|
||||
if (!is_array($c)) continue;
|
||||
$sources[] = [
|
||||
'document_id' => (int)($c['document_id'] ?? 0),
|
||||
'title' => (string)($c['title'] ?? ''),
|
||||
'section' => (string)($c['section_title'] ?? $c['section'] ?? ''),
|
||||
'source_url' => (string)($c['source_url'] ?? ''),
|
||||
'score' => isset($c['score']) ? (float)$c['score'] : null,
|
||||
];
|
||||
}
|
||||
|
||||
sseEmit('done', [
|
||||
'ok' => true,
|
||||
'chunks_used' => (int)($result['chunks_used'] ?? count($sources)),
|
||||
'model' => (string)($result['model'] ?? ''),
|
||||
'response_time_ms'=> (int)($result['response_time_ms'] ?? 0),
|
||||
'sources' => $sources,
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
sseEmit('fail', ['ok' => false, 'message' => $e->getMessage()]);
|
||||
}
|
||||
exit;
|
||||
@@ -0,0 +1,38 @@
|
||||
<?php
|
||||
/**
|
||||
* GET /api/dashboard/corpus-init.php
|
||||
*
|
||||
* Idempotent: ensures the current session has a CaveauAI client tenant +
|
||||
* default corpus, lazy-creating both on first hit. Safe to call on every
|
||||
* dashboard page load (results are session-cached).
|
||||
*
|
||||
* Response:
|
||||
* {
|
||||
* "ok": true,
|
||||
* "client_id": 102,
|
||||
* "client_user_id": 257,
|
||||
* "corpus_id": 18,
|
||||
* "created": false
|
||||
* }
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once dirname(__DIR__, 2) . '/includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('GET');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
try {
|
||||
$tenant = dbnToolsEnsureDashboardTenant();
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra);
|
||||
}
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'client_id' => (int)$tenant['client_id'],
|
||||
'client_user_id' => (int)$tenant['client_user_id'],
|
||||
'corpus_id' => (int)$tenant['corpus_id'],
|
||||
'created' => (bool)($tenant['created'] ?? false),
|
||||
]);
|
||||
@@ -0,0 +1,249 @@
|
||||
<?php
|
||||
/**
|
||||
* /api/dashboard/documents.php — CRUD for the current user's CaveauAI documents.
|
||||
*
|
||||
* GET ?action=list&offset=0&limit=20&q=&status=&category=
|
||||
* → { ok, total, documents: [...] }
|
||||
* GET ?action=get&id=123
|
||||
* → { ok, document: {...} }
|
||||
* POST ?action=update body: { id, title?, category?, tags?, language?, author? }
|
||||
* → { ok, document: {...} }
|
||||
* POST ?action=delete body: { ids: [1,2,3] }
|
||||
* → { ok, deleted: N }
|
||||
*
|
||||
* All filtered by client_id from the dashboard session — no cross-tenant access possible.
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once dirname(__DIR__, 2) . '/includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
try {
|
||||
$tenant = dbnToolsEnsureDashboardTenant();
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode);
|
||||
}
|
||||
$clientId = (int)$tenant['client_id'];
|
||||
|
||||
$method = strtoupper((string)($_SERVER['REQUEST_METHOD'] ?? 'GET'));
|
||||
$action = (string)($_GET['action'] ?? ($method === 'POST' ? '' : 'list'));
|
||||
|
||||
$db = dbnToolsDb();
|
||||
|
||||
switch ($action) {
|
||||
case 'list':
|
||||
dbnToolsRequireMethod('GET');
|
||||
respondList($db, $clientId);
|
||||
break;
|
||||
case 'get':
|
||||
dbnToolsRequireMethod('GET');
|
||||
respondGet($db, $clientId);
|
||||
break;
|
||||
case 'update':
|
||||
dbnToolsRequireMethod('POST');
|
||||
respondUpdate($db, $clientId);
|
||||
break;
|
||||
case 'delete':
|
||||
dbnToolsRequireMethod('POST');
|
||||
respondDelete($db, $clientId);
|
||||
break;
|
||||
default:
|
||||
dbnToolsError('Unknown action.', 400, 'unknown_action');
|
||||
}
|
||||
|
||||
function respondList(PDO $db, int $clientId): void
|
||||
{
|
||||
$offset = max(0, (int)($_GET['offset'] ?? 0));
|
||||
$limit = max(1, min(100, (int)($_GET['limit'] ?? 20)));
|
||||
$q = trim((string)($_GET['q'] ?? ''));
|
||||
$status = trim((string)($_GET['status'] ?? ''));
|
||||
$category = trim((string)($_GET['category'] ?? ''));
|
||||
|
||||
$where = ['client_id = ?'];
|
||||
$params = [$clientId];
|
||||
|
||||
if ($q !== '') {
|
||||
$where[] = '(title LIKE ? OR tags LIKE ?)';
|
||||
$like = '%' . str_replace(['%', '_'], ['\%', '\_'], $q) . '%';
|
||||
$params[] = $like;
|
||||
$params[] = $like;
|
||||
}
|
||||
$allowedStatus = ['pending', 'processing', 'ready', 'error'];
|
||||
if ($status !== '' && in_array($status, $allowedStatus, true)) {
|
||||
$where[] = 'status = ?';
|
||||
$params[] = $status;
|
||||
}
|
||||
if ($category !== '') {
|
||||
$where[] = 'category = ?';
|
||||
$params[] = $category;
|
||||
}
|
||||
|
||||
$whereSql = 'WHERE ' . implode(' AND ', $where);
|
||||
|
||||
$countStmt = $db->prepare("SELECT COUNT(*) FROM client_documents {$whereSql}");
|
||||
$countStmt->execute($params);
|
||||
$total = (int)$countStmt->fetchColumn();
|
||||
|
||||
$sql = "SELECT id, title, source_type, language, category, tags, author,
|
||||
source_tool, import_method, status, word_count, chunk_count,
|
||||
file_size_bytes, source_url, error_message,
|
||||
created_at, updated_at
|
||||
FROM client_documents
|
||||
{$whereSql}
|
||||
ORDER BY id DESC
|
||||
LIMIT {$limit} OFFSET {$offset}";
|
||||
$stmt = $db->prepare($sql);
|
||||
$stmt->execute($params);
|
||||
$rows = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'total' => $total,
|
||||
'offset' => $offset,
|
||||
'limit' => $limit,
|
||||
'documents' => array_map('shapeDoc', $rows),
|
||||
]);
|
||||
}
|
||||
|
||||
function respondGet(PDO $db, int $clientId): void
|
||||
{
|
||||
$id = (int)($_GET['id'] ?? 0);
|
||||
if ($id <= 0) {
|
||||
dbnToolsError('id is required.', 400, 'missing_id');
|
||||
}
|
||||
$stmt = $db->prepare(
|
||||
'SELECT * FROM client_documents WHERE id = ? AND client_id = ? LIMIT 1'
|
||||
);
|
||||
$stmt->execute([$id, $clientId]);
|
||||
$doc = $stmt->fetch(PDO::FETCH_ASSOC);
|
||||
if (!$doc) {
|
||||
dbnToolsError('Document not found.', 404, 'not_found');
|
||||
}
|
||||
|
||||
$chunks = $db->prepare(
|
||||
'SELECT id, content, section_title
|
||||
FROM client_chunks
|
||||
WHERE client_id = ? AND document_id = ?
|
||||
ORDER BY id ASC
|
||||
LIMIT 200'
|
||||
);
|
||||
try {
|
||||
$chunks->execute([$clientId, $id]);
|
||||
$chunkRows = $chunks->fetchAll(PDO::FETCH_ASSOC);
|
||||
} catch (Throwable $e) {
|
||||
$chunkRows = [];
|
||||
}
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'document' => shapeDoc($doc) + ['content' => (string)$doc['content']],
|
||||
'chunks' => $chunkRows,
|
||||
]);
|
||||
}
|
||||
|
||||
function respondUpdate(PDO $db, int $clientId): void
|
||||
{
|
||||
$input = dbnToolsJsonInput(20_000);
|
||||
$id = (int)($input['id'] ?? 0);
|
||||
if ($id <= 0) {
|
||||
dbnToolsError('id is required.', 400, 'missing_id');
|
||||
}
|
||||
|
||||
$fields = [];
|
||||
$params = [];
|
||||
$allowed = [
|
||||
'title' => ['VARCHAR', 500],
|
||||
'category' => ['VARCHAR', 50],
|
||||
'tags' => ['VARCHAR', 500],
|
||||
'language' => ['VARCHAR', 10],
|
||||
'author' => ['VARCHAR', 200],
|
||||
];
|
||||
foreach ($allowed as $col => [$kind, $max]) {
|
||||
if (!array_key_exists($col, $input)) {
|
||||
continue;
|
||||
}
|
||||
$val = trim((string)$input[$col]);
|
||||
if (mb_strlen($val, 'UTF-8') > $max) {
|
||||
dbnToolsError("Field {$col} exceeds {$max} chars.", 422, 'field_too_long');
|
||||
}
|
||||
$fields[] = "{$col} = ?";
|
||||
$params[] = $val !== '' ? $val : null;
|
||||
}
|
||||
if (!$fields) {
|
||||
dbnToolsError('No editable fields supplied.', 400, 'no_fields');
|
||||
}
|
||||
$params[] = $id;
|
||||
$params[] = $clientId;
|
||||
|
||||
$stmt = $db->prepare(
|
||||
'UPDATE client_documents SET ' . implode(', ', $fields)
|
||||
. ', updated_at = NOW() WHERE id = ? AND client_id = ?'
|
||||
);
|
||||
$stmt->execute($params);
|
||||
|
||||
$stmt = $db->prepare('SELECT * FROM client_documents WHERE id = ? AND client_id = ? LIMIT 1');
|
||||
$stmt->execute([$id, $clientId]);
|
||||
$doc = $stmt->fetch(PDO::FETCH_ASSOC);
|
||||
|
||||
dbnToolsRespond(['ok' => true, 'document' => shapeDoc($doc ?: [])]);
|
||||
}
|
||||
|
||||
function respondDelete(PDO $db, int $clientId): void
|
||||
{
|
||||
$input = dbnToolsJsonInput(50_000);
|
||||
$ids = $input['ids'] ?? [];
|
||||
if (!is_array($ids) || !$ids) {
|
||||
dbnToolsError('ids array is required.', 400, 'missing_ids');
|
||||
}
|
||||
$ids = array_values(array_unique(array_map('intval', $ids)));
|
||||
$ids = array_filter($ids, fn($v) => $v > 0);
|
||||
if (!$ids) {
|
||||
dbnToolsError('No valid ids.', 400, 'invalid_ids');
|
||||
}
|
||||
if (count($ids) > 200) {
|
||||
dbnToolsError('Cannot delete more than 200 documents at once.', 422, 'too_many');
|
||||
}
|
||||
|
||||
$placeholders = implode(',', array_fill(0, count($ids), '?'));
|
||||
$stmt = $db->prepare(
|
||||
"DELETE FROM client_documents
|
||||
WHERE client_id = ? AND id IN ({$placeholders})"
|
||||
);
|
||||
$stmt->execute(array_merge([$clientId], $ids));
|
||||
|
||||
try {
|
||||
$chunks = $db->prepare(
|
||||
"DELETE FROM client_chunks WHERE client_id = ? AND document_id IN ({$placeholders})"
|
||||
);
|
||||
$chunks->execute(array_merge([$clientId], $ids));
|
||||
} catch (Throwable $e) {
|
||||
// table may be filtered to client_id only; non-fatal
|
||||
}
|
||||
|
||||
dbnToolsRespond(['ok' => true, 'deleted' => $stmt->rowCount()]);
|
||||
}
|
||||
|
||||
function shapeDoc(array $row): array
|
||||
{
|
||||
return [
|
||||
'id' => (int)($row['id'] ?? 0),
|
||||
'title' => (string)($row['title'] ?? ''),
|
||||
'source_type' => (string)($row['source_type'] ?? ''),
|
||||
'language' => (string)($row['language'] ?? ''),
|
||||
'category' => (string)($row['category'] ?? ''),
|
||||
'tags' => (string)($row['tags'] ?? ''),
|
||||
'author' => $row['author'] ?? null,
|
||||
'source_url' => $row['source_url'] ?? null,
|
||||
'source_tool' => $row['source_tool'] ?? null,
|
||||
'import_method' => (string)($row['import_method'] ?? ''),
|
||||
'status' => (string)($row['status'] ?? ''),
|
||||
'word_count' => (int)($row['word_count'] ?? 0),
|
||||
'chunk_count' => (int)($row['chunk_count'] ?? 0),
|
||||
'file_size_bytes'=> (int)($row['file_size_bytes'] ?? 0),
|
||||
'error_message' => $row['error_message'] ?? null,
|
||||
'created_at' => (string)($row['created_at'] ?? ''),
|
||||
'updated_at' => (string)($row['updated_at'] ?? ''),
|
||||
];
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
<?php
|
||||
/**
|
||||
* GET /api/dashboard/graph.php?action=cites|cited_by|implements|chain&doc_id=N&limit=20&depth=2
|
||||
*
|
||||
* Wraps ai-portal/lib/ai/LegalGraphAgent for the dashboard. Reads the FalkorDB
|
||||
* `bnl_legal` graph on Colin (10.0.2.10:6379). Public graph metadata — no
|
||||
* sensitive content — but we still gate on dashboard auth to avoid being a
|
||||
* generic open proxy.
|
||||
*
|
||||
* Response shape mirrors ai-portal/api/graph-search.php:
|
||||
* { ok, action, doc_id, count, results: [ {rel_type, doc_id, title, ...}, ...] }
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once dirname(__DIR__, 2) . '/includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('GET');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
// Don't require dashboard provisioning here — graph is public metadata.
|
||||
|
||||
$action = trim((string)($_GET['action'] ?? ''));
|
||||
$docId = (int)($_GET['doc_id'] ?? 0);
|
||||
$limit = max(1, min(100, (int)($_GET['limit'] ?? 20)));
|
||||
$depth = max(1, min(3, (int)($_GET['depth'] ?? 2)));
|
||||
|
||||
$validActions = ['cites', 'cited_by', 'implements', 'chain'];
|
||||
if (!in_array($action, $validActions, true)) {
|
||||
dbnToolsError(
|
||||
'action must be one of: ' . implode(', ', $validActions),
|
||||
400, 'invalid_action', ['actions' => $validActions]
|
||||
);
|
||||
}
|
||||
if ($docId <= 0) {
|
||||
dbnToolsError('doc_id must be a positive integer.', 400, 'missing_doc_id');
|
||||
}
|
||||
|
||||
$root = dbnToolsAiPortalRoot();
|
||||
$graphFile = $root . '/lib/ai/GraphClient.php';
|
||||
$agentFile = $root . '/lib/ai/LegalGraphAgent.php';
|
||||
|
||||
if (!is_file($graphFile) || !is_file($agentFile)) {
|
||||
dbnToolsError('Graph backend not installed.', 503, 'graph_unavailable');
|
||||
}
|
||||
require_once $graphFile;
|
||||
require_once $agentFile;
|
||||
|
||||
try {
|
||||
$config = file_exists('/etc/bnl/config.php') ? include '/etc/bnl/config.php' : [];
|
||||
$host = (string)($config['falkordb']['host'] ?? dbnToolsEnv('DBN_FALKORDB_HOST', '10.0.2.10'));
|
||||
$port = (int) ($config['falkordb']['port'] ?? (int)dbnToolsEnv('DBN_FALKORDB_PORT', '6379'));
|
||||
$pass = (string)($config['falkordb']['password'] ?? dbnToolsEnv('DBN_FALKORDB_PASSWORD', ''));
|
||||
|
||||
$client = new GraphClient($host, $port, $pass);
|
||||
$agent = new LegalGraphAgent($client);
|
||||
|
||||
$results = match ($action) {
|
||||
'cites' => $agent->cites($docId, $limit),
|
||||
'cited_by' => $agent->citedBy($docId, $limit),
|
||||
'implements' => $agent->implements($docId, $limit),
|
||||
'chain' => $agent->chain($docId, $depth),
|
||||
};
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'action' => $action,
|
||||
'doc_id' => $docId,
|
||||
'count' => 0,
|
||||
'results' => [],
|
||||
'warning' => 'Graph backend unavailable: ' . $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'action' => $action,
|
||||
'doc_id' => $docId,
|
||||
'count' => count($results),
|
||||
'results' => $results,
|
||||
]);
|
||||
@@ -0,0 +1,53 @@
|
||||
<?php
|
||||
/**
|
||||
* GET /api/dashboard/ingest-status.php?ids=1,2,3
|
||||
*
|
||||
* Returns per-doc status for polling during URL ingest (background) or to
|
||||
* surface error messages after a failed sync upload.
|
||||
*
|
||||
* Response:
|
||||
* { ok, statuses: [ {id, status, chunk_count, error_message}, ... ] }
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once dirname(__DIR__, 2) . '/includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('GET');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
try {
|
||||
$tenant = dbnToolsEnsureDashboardTenant();
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode);
|
||||
}
|
||||
$clientId = (int)$tenant['client_id'];
|
||||
|
||||
$raw = (string)($_GET['ids'] ?? '');
|
||||
$ids = array_values(array_filter(
|
||||
array_map('intval', explode(',', $raw)),
|
||||
fn($v) => $v > 0
|
||||
));
|
||||
if (!$ids) {
|
||||
dbnToolsRespond(['ok' => true, 'statuses' => []]);
|
||||
}
|
||||
$ids = array_slice($ids, 0, 100);
|
||||
|
||||
$db = dbnToolsDb();
|
||||
$placeholders = implode(',', array_fill(0, count($ids), '?'));
|
||||
$sql = "SELECT id, status, chunk_count, error_message
|
||||
FROM client_documents
|
||||
WHERE client_id = ? AND id IN ({$placeholders})";
|
||||
$stmt = $db->prepare($sql);
|
||||
$stmt->execute(array_merge([$clientId], $ids));
|
||||
$rows = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'statuses' => array_map(fn($r) => [
|
||||
'id' => (int)$r['id'],
|
||||
'status' => (string)$r['status'],
|
||||
'chunk_count' => (int)$r['chunk_count'],
|
||||
'error_message' => $r['error_message'] ?? null,
|
||||
], $rows),
|
||||
]);
|
||||
@@ -0,0 +1,136 @@
|
||||
<?php
|
||||
/**
|
||||
* POST /api/dashboard/save-from-tool.php
|
||||
*
|
||||
* Improved successor to /api/save-to-corpus.php — adds:
|
||||
* - tags as either CSV string or array
|
||||
* - source_tool slug recorded as import provenance
|
||||
* - chat-answer kind (records import_method='chat_answer')
|
||||
* - preview flag: if true, returns the proposed chunks WITHOUT persisting (dry-run)
|
||||
*
|
||||
* Request body (JSON, max 2 MB):
|
||||
* title: string (required)
|
||||
* content: string (required, min 30 chars)
|
||||
* source_tool: string (optional slug; default 'dashboard-save')
|
||||
* tags: string[] | string CSV (optional, max 20 tags, 32 chars each)
|
||||
* category: string (optional; default 'tool-output')
|
||||
* language: string (optional; default 'no')
|
||||
* author: string (optional)
|
||||
* kind: 'tool_output'|'chat_answer'|'manual' (default 'tool_output')
|
||||
* preview: bool (optional; if true, return chunk preview without saving)
|
||||
*
|
||||
* Response (saved):
|
||||
* { ok, document_id, chunks, status }
|
||||
* Response (preview):
|
||||
* { ok, preview:true, chunks: [...], word_count }
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once dirname(__DIR__, 2) . '/includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
try {
|
||||
$tenant = dbnToolsEnsureDashboardTenant();
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode);
|
||||
}
|
||||
$clientId = (int)$tenant['client_id'];
|
||||
$corpusId = (int)$tenant['corpus_id'];
|
||||
|
||||
$input = dbnToolsJsonInput(2_000_000);
|
||||
|
||||
$title = trim((string)($input['title'] ?? ''));
|
||||
if ($title === '') dbnToolsError('title is required.', 400, 'missing_title');
|
||||
if (mb_strlen($title, 'UTF-8') > 500) dbnToolsError('title too long (max 500).', 422, 'title_too_long');
|
||||
|
||||
$content = trim((string)($input['content'] ?? ''));
|
||||
if (mb_strlen($content, 'UTF-8') < 30) dbnToolsError('content too short (min 30 chars).', 400, 'content_too_short');
|
||||
if (mb_strlen($content, 'UTF-8') > 1_900_000) dbnToolsError('content exceeds 2 MB.', 422, 'content_too_large');
|
||||
|
||||
$sourceTool = trim((string)($input['source_tool'] ?? 'dashboard-save'));
|
||||
$sourceTool = substr(preg_replace('/[^a-z0-9\-_]/', '', strtolower($sourceTool)) ?: 'dashboard-save', 0, 64);
|
||||
|
||||
$rawTags = $input['tags'] ?? '';
|
||||
$tagList = is_array($rawTags)
|
||||
? array_map('strval', $rawTags)
|
||||
: array_map('trim', explode(',', (string)$rawTags));
|
||||
$tagList = array_values(array_filter(array_map(fn($t) => substr(trim($t), 0, 32), $tagList)));
|
||||
$tagList = array_slice($tagList, 0, 20);
|
||||
$tagsCsv = implode(',', $tagList);
|
||||
|
||||
$category = strtolower(trim((string)($input['category'] ?? 'tool-output')));
|
||||
$category = substr(preg_replace('/[^a-z0-9\-_]/', '', $category) ?: 'tool-output', 0, 50);
|
||||
|
||||
$language = trim((string)($input['language'] ?? 'no')) ?: 'no';
|
||||
$author = trim((string)($input['author'] ?? '')) ?: null;
|
||||
|
||||
$kind = (string)($input['kind'] ?? 'tool_output');
|
||||
$importMethod = match ($kind) {
|
||||
'chat_answer' => 'chat_answer',
|
||||
'manual' => 'manual',
|
||||
default => 'tool_output',
|
||||
};
|
||||
|
||||
$preview = !empty($input['preview']);
|
||||
$wordCount = str_word_count($content);
|
||||
|
||||
dbnToolsBootCaveau();
|
||||
|
||||
try {
|
||||
if ($preview) {
|
||||
require_once dbnToolsAiPortalRoot() . '/lib/ai/TextChunker.php';
|
||||
$chunker = new TextChunker();
|
||||
$chunks = $chunker->chunk($content);
|
||||
$sample = array_slice($chunks, 0, 8);
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'preview' => true,
|
||||
'word_count' => $wordCount,
|
||||
'chunks' => array_map(fn($c) => [
|
||||
'section_title' => (string)($c['section_title'] ?? ''),
|
||||
'word_count' => (int)str_word_count((string)($c['content'] ?? '')),
|
||||
'snippet' => mb_substr((string)($c['content'] ?? ''), 0, 240, 'UTF-8'),
|
||||
], $sample),
|
||||
'total_chunks' => count($chunks),
|
||||
]);
|
||||
}
|
||||
|
||||
$db = getDb();
|
||||
$ins = $db->prepare("
|
||||
INSERT INTO client_documents
|
||||
(client_id, corpus_id, title, source_type, content, category, language,
|
||||
tags, author, import_method, source_tool, word_count, status)
|
||||
VALUES (?, ?, ?, 'text', ?, ?, ?, ?, ?, ?, ?, ?, 'pending')
|
||||
");
|
||||
$ins->execute([
|
||||
$clientId, $corpusId, $title, $content, $category, $language,
|
||||
$tagsCsv, $author, $importMethod, $sourceTool, $wordCount,
|
||||
]);
|
||||
$docId = (int)$db->lastInsertId();
|
||||
|
||||
$rag = new ClientRagPipeline($clientId);
|
||||
$chunks = $rag->ingestDocument($docId);
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'document_id' => $docId,
|
||||
'chunks' => (int)$chunks,
|
||||
'status' => 'ready',
|
||||
], 201);
|
||||
} catch (Throwable $e) {
|
||||
if (isset($docId)) {
|
||||
try {
|
||||
$db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?")
|
||||
->execute([substr($e->getMessage(), 0, 1000), $docId]);
|
||||
} catch (Throwable $ignored) { /* non-fatal */ }
|
||||
dbnToolsError(
|
||||
'Saved to corpus but indexing failed: ' . $e->getMessage(),
|
||||
500, 'index_failed',
|
||||
['document_id' => $docId]
|
||||
);
|
||||
}
|
||||
dbnToolsError('Save failed: ' . $e->getMessage(), 500, 'save_failed');
|
||||
}
|
||||
@@ -0,0 +1,241 @@
|
||||
<?php
|
||||
/**
|
||||
* POST /api/dashboard/upload.php
|
||||
*
|
||||
* Three input modes:
|
||||
* - multipart/form-data with `file` field (PDF/DOCX/TXT, <= 8 MB)
|
||||
* - JSON body { "kind":"text", "title":..., "content":..., "category"?, "tags"?, "author"?, "language"? }
|
||||
* - JSON body { "kind":"url", "title":..., "url":... } (fetched via ClientUniversalScraper; queued)
|
||||
*
|
||||
* For file + text: writes pending row, runs ClientRagPipeline::ingestDocument() synchronously,
|
||||
* returns { ok, document_id, chunks, status }
|
||||
* For url: writes pending row, returns immediately with status:'pending' — a separate cron job
|
||||
* (run_client_one.php on the ai-portal) does the ingest.
|
||||
*
|
||||
* If file text extraction yields less than 200 chars, attempts OCR via `tesseract` shell util.
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once dirname(__DIR__, 2) . '/includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
try {
|
||||
$tenant = dbnToolsEnsureDashboardTenant();
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode);
|
||||
}
|
||||
$clientId = (int)$tenant['client_id'];
|
||||
$corpusId = (int)$tenant['corpus_id'];
|
||||
|
||||
dbnToolsBootCaveau();
|
||||
$db = getDb();
|
||||
|
||||
$contentType = (string)($_SERVER['CONTENT_TYPE'] ?? '');
|
||||
$isMultipart = stripos($contentType, 'multipart/form-data') === 0;
|
||||
|
||||
try {
|
||||
if ($isMultipart) {
|
||||
$result = handleFileUpload($db, $clientId, $corpusId);
|
||||
} else {
|
||||
$input = dbnToolsJsonInput(2_500_000);
|
||||
$kind = (string)($input['kind'] ?? 'text');
|
||||
$result = match ($kind) {
|
||||
'text' => handleTextPaste($db, $clientId, $corpusId, $input),
|
||||
'url' => handleUrlImport($db, $clientId, $corpusId, $input),
|
||||
default => dbnToolsError('Unknown kind: ' . $kind, 400, 'unknown_kind'),
|
||||
};
|
||||
}
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra);
|
||||
} catch (Throwable $e) {
|
||||
dbnToolsError('Upload failed: ' . $e->getMessage(), 500, 'upload_failed');
|
||||
}
|
||||
|
||||
dbnToolsRespond($result, 201);
|
||||
|
||||
|
||||
function handleFileUpload(PDO $db, int $clientId, int $corpusId): array
|
||||
{
|
||||
if (empty($_FILES['file'])) {
|
||||
dbnToolsError('No file uploaded.', 400, 'missing_file');
|
||||
}
|
||||
|
||||
$extract = dbnToolsExtractUploadedFile($_FILES['file']);
|
||||
$text = (string)$extract['text'];
|
||||
$filename = (string)$extract['filename'];
|
||||
$ext = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
|
||||
|
||||
$sourceType = match ($ext) {
|
||||
'pdf' => 'pdf',
|
||||
'docx' => 'docx',
|
||||
default => 'text',
|
||||
};
|
||||
|
||||
if (mb_strlen($text, 'UTF-8') < 200 && $ext === 'pdf') {
|
||||
$ocrText = tryOcrPdf((string)($_FILES['file']['tmp_name'] ?? ''));
|
||||
if ($ocrText !== null && mb_strlen($ocrText, 'UTF-8') > mb_strlen($text, 'UTF-8')) {
|
||||
$text = $ocrText;
|
||||
$importMethod = 'ocr_scan';
|
||||
}
|
||||
}
|
||||
$importMethod = $importMethod ?? 'dbn_upload';
|
||||
|
||||
$title = trim((string)($_POST['title'] ?? '')) ?: pathinfo($filename, PATHINFO_FILENAME);
|
||||
$category = sanitizeCategory((string)($_POST['category'] ?? 'uncategorized'));
|
||||
$tags = sanitizeTagsCsv((string)($_POST['tags'] ?? ''));
|
||||
$author = trim((string)($_POST['author'] ?? '')) ?: null;
|
||||
$language = trim((string)($_POST['language'] ?? 'no')) ?: 'no';
|
||||
|
||||
return persistAndIngest($db, $clientId, $corpusId, [
|
||||
'title' => $title,
|
||||
'source_type' => $sourceType,
|
||||
'content' => $text,
|
||||
'category' => $category,
|
||||
'tags' => $tags,
|
||||
'author' => $author,
|
||||
'language' => $language,
|
||||
'import_method' => $importMethod,
|
||||
'original_filename' => $filename,
|
||||
'file_size_bytes' => (int)($_FILES['file']['size'] ?? 0),
|
||||
'source_tool' => 'dashboard-upload',
|
||||
]);
|
||||
}
|
||||
|
||||
function handleTextPaste(PDO $db, int $clientId, int $corpusId, array $input): array
|
||||
{
|
||||
$title = trim((string)($input['title'] ?? ''));
|
||||
$content = trim((string)($input['content'] ?? ''));
|
||||
if ($title === '') dbnToolsError('title is required.', 400, 'missing_title');
|
||||
if (mb_strlen($content, 'UTF-8') < 30) dbnToolsError('content too short (min 30 chars).', 400, 'content_too_short');
|
||||
if (mb_strlen($content, 'UTF-8') > 2_000_000) dbnToolsError('content exceeds 2 MB.', 400, 'content_too_large');
|
||||
|
||||
return persistAndIngest($db, $clientId, $corpusId, [
|
||||
'title' => $title,
|
||||
'source_type' => 'text',
|
||||
'content' => $content,
|
||||
'category' => sanitizeCategory((string)($input['category'] ?? 'uncategorized')),
|
||||
'tags' => sanitizeTagsCsv((string)($input['tags'] ?? '')),
|
||||
'author' => trim((string)($input['author'] ?? '')) ?: null,
|
||||
'language' => trim((string)($input['language'] ?? 'no')) ?: 'no',
|
||||
'import_method' => 'manual',
|
||||
'source_tool' => 'dashboard-paste',
|
||||
]);
|
||||
}
|
||||
|
||||
function handleUrlImport(PDO $db, int $clientId, int $corpusId, array $input): array
|
||||
{
|
||||
$url = trim((string)($input['url'] ?? ''));
|
||||
$title = trim((string)($input['title'] ?? ''));
|
||||
if ($url === '' || !filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED)) {
|
||||
dbnToolsError('Valid URL is required.', 400, 'invalid_url');
|
||||
}
|
||||
$scheme = strtolower((string)parse_url($url, PHP_URL_SCHEME));
|
||||
if (!in_array($scheme, ['http', 'https'], true)) {
|
||||
dbnToolsError('URL must use http or https.', 400, 'invalid_scheme');
|
||||
}
|
||||
if ($title === '') $title = $url;
|
||||
|
||||
$stmt = $db->prepare("
|
||||
INSERT INTO client_documents
|
||||
(client_id, corpus_id, title, source_type, source_url, content,
|
||||
category, tags, language, import_method, source_tool, status)
|
||||
VALUES (?, ?, ?, 'url', ?, '', ?, ?, ?, 'url', 'dashboard-url', 'pending')
|
||||
");
|
||||
$stmt->execute([
|
||||
$clientId, $corpusId, $title, $url,
|
||||
sanitizeCategory((string)($input['category'] ?? 'uncategorized')),
|
||||
sanitizeTagsCsv((string)($input['tags'] ?? '')),
|
||||
trim((string)($input['language'] ?? 'no')) ?: 'no',
|
||||
]);
|
||||
|
||||
return [
|
||||
'ok' => true,
|
||||
'document_id' => (int)$db->lastInsertId(),
|
||||
'status' => 'pending',
|
||||
'chunks' => 0,
|
||||
'note' => 'URL queued for background ingest.',
|
||||
];
|
||||
}
|
||||
|
||||
function persistAndIngest(PDO $db, int $clientId, int $corpusId, array $doc): array
|
||||
{
|
||||
$wordCount = str_word_count($doc['content']);
|
||||
|
||||
$stmt = $db->prepare("
|
||||
INSERT INTO client_documents
|
||||
(client_id, corpus_id, title, source_type, original_filename, file_size_bytes,
|
||||
content, category, tags, author, language,
|
||||
import_method, source_tool, word_count, status)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending')
|
||||
");
|
||||
$stmt->execute([
|
||||
$clientId,
|
||||
$corpusId,
|
||||
$doc['title'],
|
||||
$doc['source_type'],
|
||||
$doc['original_filename'] ?? null,
|
||||
$doc['file_size_bytes'] ?? 0,
|
||||
$doc['content'],
|
||||
$doc['category'],
|
||||
$doc['tags'],
|
||||
$doc['author'] ?? null,
|
||||
$doc['language'],
|
||||
$doc['import_method'],
|
||||
$doc['source_tool'],
|
||||
$wordCount,
|
||||
]);
|
||||
$docId = (int)$db->lastInsertId();
|
||||
|
||||
try {
|
||||
$rag = new ClientRagPipeline($clientId);
|
||||
$chunks = $rag->ingestDocument($docId);
|
||||
return [
|
||||
'ok' => true,
|
||||
'document_id' => $docId,
|
||||
'chunks' => (int)$chunks,
|
||||
'status' => 'ready',
|
||||
'word_count' => $wordCount,
|
||||
];
|
||||
} catch (Throwable $e) {
|
||||
$db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?")
|
||||
->execute([substr($e->getMessage(), 0, 1000), $docId]);
|
||||
return [
|
||||
'ok' => false,
|
||||
'document_id' => $docId,
|
||||
'status' => 'error',
|
||||
'error' => ['code' => 'index_failed', 'message' => 'Saved, but indexing failed: ' . $e->getMessage()],
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
function sanitizeCategory(string $cat): string
|
||||
{
|
||||
$cat = strtolower(trim($cat));
|
||||
$cat = preg_replace('/[^a-z0-9\-_]/', '', $cat) ?: 'uncategorized';
|
||||
return substr($cat, 0, 50);
|
||||
}
|
||||
|
||||
function sanitizeTagsCsv(string $raw): string
|
||||
{
|
||||
$tags = array_filter(array_map('trim', explode(',', $raw)));
|
||||
$tags = array_values(array_slice(array_map(fn($t) => substr($t, 0, 32), $tags), 0, 20));
|
||||
return implode(',', $tags);
|
||||
}
|
||||
|
||||
function tryOcrPdf(string $tmpPath): ?string
|
||||
{
|
||||
if ($tmpPath === '' || !is_readable($tmpPath)) return null;
|
||||
if (!function_exists('shell_exec')) return null;
|
||||
|
||||
$check = @shell_exec('command -v tesseract 2>/dev/null');
|
||||
if (!$check) return null;
|
||||
|
||||
$out = trim((string)@shell_exec(
|
||||
'pdftoppm -r 200 ' . escapeshellarg($tmpPath) . ' - -png 2>/dev/null | '
|
||||
. 'tesseract -l nor+eng stdin stdout 2>/dev/null'
|
||||
));
|
||||
return $out !== '' ? $out : null;
|
||||
}
|
||||
Reference in New Issue
Block a user