Files
dobetternorge-tools/api/save-to-corpus.php
T
daveadmin 56cd87dd7b redact: UX overhaul — engine simplification, credits, spinner, save-to-docs, badges
- Remove GPU/regex engine options; keep only azure_mini (1 credit) and azure_full (2 credits)
- Variable credit cost: engine-aware pre-check and charge in api/redact.php; PricingCatalog base = 1
- Fix ATTORNEY not preserved when keepOfficials=true: add to LLM prompt, generic-tag, pseudonym regexes
- Replace Azure credits hint with per-engine credit cost text (all 4 languages)
- Single-file upload only (was: up to 5); simplify status messages
- Clear previous redaction output and show pulsing spinner when a new run starts
- Add "Save to My Docs" button in redact output panel (corpus-save.js path)
- corpus-save.js: capture source_doc_ids from button dataset, pass in POST payload
- api/save-to-corpus.php: accept source_doc_ids, store first as source_url=corpus-doc:{id}
- doc-picker.js: show "✂ Redacted" badge for documents saved from the redact tool
- CSS: .redact-working spinner, doc-item__badge--redact pill styles

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-25 08:18:51 +02:00

103 lines
3.5 KiB
PHP

<?php
/**
* POST /api/save-to-corpus.php
*
* Save tool output text into the user's CaveauAI corpus.
* Uses dbnToolsBootCaveau() to call ClientRagPipeline directly via filesystem include.
*
* Request body (JSON, max 500 KB):
* title string (required)
* content string (required, min 30 chars)
* source_tool string (optional, slug)
* tags string (optional, comma-separated)
*/
declare(strict_types=1);
require_once dirname(__DIR__) . '/includes/bootstrap.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
$user = dbnToolsAuthenticatedUser();
$clientId = (int)($user['client_id'] ?? 0);
if ($clientId <= 0) {
dbnToolsError('No linked CaveauAI workspace. Log in via the CaveauAI portal first.', 403, 'no_workspace');
}
$input = dbnToolsJsonInput(500_000);
$title = trim($input['title'] ?? '');
$content = trim($input['content'] ?? '');
$sourceTool = substr(preg_replace('/[^a-z0-9\-_]/', '', strtolower($input['source_tool'] ?? '')), 0, 64) ?: null;
$rawTags = trim($input['tags'] ?? '');
$tags = json_encode(
array_values(array_filter(array_map('trim', explode(',', $rawTags)))),
JSON_UNESCAPED_UNICODE
);
$rawSourceDocIds = $input['source_doc_ids'] ?? null;
$sourceDocIdArr = is_array($rawSourceDocIds)
? $rawSourceDocIds
: (is_string($rawSourceDocIds) ? array_filter(array_map('trim', explode(',', $rawSourceDocIds))) : []);
$firstSourceDocId = (int)(reset($sourceDocIdArr) ?: 0);
$sourceUrl = $firstSourceDocId > 0 ? "corpus-doc:{$firstSourceDocId}" : null;
if ($title === '') {
dbnToolsError('title is required.', 400, 'bad_request');
}
if (strlen($content) < 30) {
dbnToolsError('content too short (min 30 chars).', 400, 'bad_request');
}
if (strlen($content) > 2_000_000) {
dbnToolsError('content exceeds 2 MB limit.', 400, 'too_large');
}
// Load CaveauAI platform (getDb, ClientRagPipeline, etc.)
dbnToolsBootCaveau();
try {
$db = getDb();
} catch (Throwable $e) {
dbnToolsError('CaveauAI database unavailable: ' . $e->getMessage(), 503, 'db_unavailable');
}
// Resolve default corpus for this client
$stmt = $db->prepare('SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1');
$stmt->execute([$clientId]);
$corpusId = (int)($stmt->fetchColumn() ?: 0);
if ($corpusId === 0) {
dbnToolsError(
'No default corpus found for your account. Set one up in the CaveauAI portal.',
409,
'no_corpus'
);
}
$wordCount = str_word_count($content);
$ins = $db->prepare("
INSERT INTO client_documents
(client_id, corpus_id, title, source_type, content, category,
tags, import_method, source_tool, source_url, word_count, status)
VALUES (?, ?, ?, 'text', ?, 'tool-output', ?, 'tool_output', ?, ?, ?, 'pending')
");
$ins->execute([$clientId, $corpusId, $title, $content, $tags, $sourceTool, $sourceUrl, $wordCount]);
$docId = (int)$db->lastInsertId();
try {
$rag = new ClientRagPipeline($clientId);
$chunks = $rag->ingestDocument($docId);
} catch (Throwable $e) {
// Document is saved but not indexed — mark error and return partial success
$db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?")
->execute([$e->getMessage(), $docId]);
dbnToolsError(
'Saved to corpus but indexing failed: ' . $e->getMessage(),
500,
'index_failed',
['document_id' => $docId]
);
}
dbnToolsRespond(['ok' => true, 'document_id' => $docId, 'chunks' => $chunks], 201);