56cd87dd7b
- Remove GPU/regex engine options; keep only azure_mini (1 credit) and azure_full (2 credits)
- Variable credit cost: engine-aware pre-check and charge in api/redact.php; PricingCatalog base = 1
- Fix ATTORNEY not preserved when keepOfficials=true: add to LLM prompt, generic-tag, pseudonym regexes
- Replace Azure credits hint with per-engine credit cost text (all 4 languages)
- Single-file upload only (was: up to 5); simplify status messages
- Clear previous redaction output and show pulsing spinner when a new run starts
- Add "Save to My Docs" button in redact output panel (corpus-save.js path)
- corpus-save.js: capture source_doc_ids from button dataset, pass in POST payload
- api/save-to-corpus.php: accept source_doc_ids, store first as source_url=corpus-doc:{id}
- doc-picker.js: show "✂ Redacted" badge for documents saved from the redact tool
- CSS: .redact-working spinner, doc-item__badge--redact pill styles
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
103 lines
3.5 KiB
PHP
103 lines
3.5 KiB
PHP
<?php
|
|
/**
|
|
* POST /api/save-to-corpus.php
|
|
*
|
|
* Save tool output text into the user's CaveauAI corpus.
|
|
* Uses dbnToolsBootCaveau() to call ClientRagPipeline directly via filesystem include.
|
|
*
|
|
* Request body (JSON, max 500 KB):
|
|
* title string (required)
|
|
* content string (required, min 30 chars)
|
|
* source_tool string (optional, slug)
|
|
* tags string (optional, comma-separated)
|
|
*/
|
|
|
|
declare(strict_types=1);
|
|
|
|
require_once dirname(__DIR__) . '/includes/bootstrap.php';
|
|
|
|
dbnToolsRequireMethod('POST');
|
|
dbnToolsRequireAuth();
|
|
|
|
$user = dbnToolsAuthenticatedUser();
|
|
$clientId = (int)($user['client_id'] ?? 0);
|
|
if ($clientId <= 0) {
|
|
dbnToolsError('No linked CaveauAI workspace. Log in via the CaveauAI portal first.', 403, 'no_workspace');
|
|
}
|
|
|
|
$input = dbnToolsJsonInput(500_000);
|
|
$title = trim($input['title'] ?? '');
|
|
$content = trim($input['content'] ?? '');
|
|
$sourceTool = substr(preg_replace('/[^a-z0-9\-_]/', '', strtolower($input['source_tool'] ?? '')), 0, 64) ?: null;
|
|
$rawTags = trim($input['tags'] ?? '');
|
|
$tags = json_encode(
|
|
array_values(array_filter(array_map('trim', explode(',', $rawTags)))),
|
|
JSON_UNESCAPED_UNICODE
|
|
);
|
|
|
|
$rawSourceDocIds = $input['source_doc_ids'] ?? null;
|
|
$sourceDocIdArr = is_array($rawSourceDocIds)
|
|
? $rawSourceDocIds
|
|
: (is_string($rawSourceDocIds) ? array_filter(array_map('trim', explode(',', $rawSourceDocIds))) : []);
|
|
$firstSourceDocId = (int)(reset($sourceDocIdArr) ?: 0);
|
|
$sourceUrl = $firstSourceDocId > 0 ? "corpus-doc:{$firstSourceDocId}" : null;
|
|
|
|
if ($title === '') {
|
|
dbnToolsError('title is required.', 400, 'bad_request');
|
|
}
|
|
if (strlen($content) < 30) {
|
|
dbnToolsError('content too short (min 30 chars).', 400, 'bad_request');
|
|
}
|
|
if (strlen($content) > 2_000_000) {
|
|
dbnToolsError('content exceeds 2 MB limit.', 400, 'too_large');
|
|
}
|
|
|
|
// Load CaveauAI platform (getDb, ClientRagPipeline, etc.)
|
|
dbnToolsBootCaveau();
|
|
|
|
try {
|
|
$db = getDb();
|
|
} catch (Throwable $e) {
|
|
dbnToolsError('CaveauAI database unavailable: ' . $e->getMessage(), 503, 'db_unavailable');
|
|
}
|
|
|
|
// Resolve default corpus for this client
|
|
$stmt = $db->prepare('SELECT id FROM client_corpora WHERE client_id = ? AND is_default = 1 LIMIT 1');
|
|
$stmt->execute([$clientId]);
|
|
$corpusId = (int)($stmt->fetchColumn() ?: 0);
|
|
if ($corpusId === 0) {
|
|
dbnToolsError(
|
|
'No default corpus found for your account. Set one up in the CaveauAI portal.',
|
|
409,
|
|
'no_corpus'
|
|
);
|
|
}
|
|
|
|
$wordCount = str_word_count($content);
|
|
|
|
$ins = $db->prepare("
|
|
INSERT INTO client_documents
|
|
(client_id, corpus_id, title, source_type, content, category,
|
|
tags, import_method, source_tool, source_url, word_count, status)
|
|
VALUES (?, ?, ?, 'text', ?, 'tool-output', ?, 'tool_output', ?, ?, ?, 'pending')
|
|
");
|
|
$ins->execute([$clientId, $corpusId, $title, $content, $tags, $sourceTool, $sourceUrl, $wordCount]);
|
|
$docId = (int)$db->lastInsertId();
|
|
|
|
try {
|
|
$rag = new ClientRagPipeline($clientId);
|
|
$chunks = $rag->ingestDocument($docId);
|
|
} catch (Throwable $e) {
|
|
// Document is saved but not indexed — mark error and return partial success
|
|
$db->prepare("UPDATE client_documents SET status='error', error_message=? WHERE id=?")
|
|
->execute([$e->getMessage(), $docId]);
|
|
dbnToolsError(
|
|
'Saved to corpus but indexing failed: ' . $e->getMessage(),
|
|
500,
|
|
'index_failed',
|
|
['document_id' => $docId]
|
|
);
|
|
}
|
|
|
|
dbnToolsRespond(['ok' => true, 'document_id' => $docId, 'chunks' => $chunks], 201);
|