Add document upload to Redact tool
api/extract.php — new endpoint accepting .pdf/.docx/.txt up to 4 MB; pdftotext for PDFs, ZipArchive+DOMXPath for DOCX, mb_convert_encoding for TXT; truncates to 32 000 chars to stay within redact limit. index.php — drop/browse upload zone above the textarea, visible only in Redact mode. tools.js — setupUpload(), handleFileUpload(), resetUpload(); drag-and-drop and file picker both call the extract endpoint then populate the textarea. tools.css — upload zone, drag-over, file-info, clear button styles. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+137
@@ -0,0 +1,137 @@
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/../includes/bootstrap.php';
|
||||
|
||||
dbnToolsRequireMethod('POST');
|
||||
dbnToolsRequireAuth();
|
||||
|
||||
const EXTRACT_MAX_BYTES = 4 * 1024 * 1024;
|
||||
const EXTRACT_TEXT_LIMIT = 32000;
|
||||
const EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx'];
|
||||
|
||||
try {
|
||||
if (empty($_FILES['file']) || !is_array($_FILES['file'])) {
|
||||
dbnToolsError('No file was uploaded.', 422, 'missing_file');
|
||||
}
|
||||
|
||||
$file = $_FILES['file'];
|
||||
$errCode = (int)($file['error'] ?? UPLOAD_ERR_NO_FILE);
|
||||
|
||||
if ($errCode !== UPLOAD_ERR_OK) {
|
||||
$msg = match ($errCode) {
|
||||
UPLOAD_ERR_INI_SIZE, UPLOAD_ERR_FORM_SIZE => 'The file exceeds the allowed size limit.',
|
||||
UPLOAD_ERR_NO_TMP_DIR => 'No temporary directory is available.',
|
||||
UPLOAD_ERR_CANT_WRITE => 'Unable to save the uploaded file.',
|
||||
default => 'File upload failed.',
|
||||
};
|
||||
dbnToolsError($msg, 422, 'upload_error');
|
||||
}
|
||||
|
||||
$originalName = basename((string)($file['name'] ?? ''));
|
||||
$tmpPath = (string)($file['tmp_name'] ?? '');
|
||||
$size = (int)($file['size'] ?? 0);
|
||||
|
||||
if (!is_uploaded_file($tmpPath)) {
|
||||
dbnToolsError('Invalid file upload.', 400, 'invalid_upload');
|
||||
}
|
||||
if ($size === 0) {
|
||||
dbnToolsError('The uploaded file is empty.', 422, 'file_empty');
|
||||
}
|
||||
if ($size > EXTRACT_MAX_BYTES) {
|
||||
dbnToolsError('File exceeds the 4 MB limit.', 413, 'file_too_large');
|
||||
}
|
||||
|
||||
$ext = strtolower(pathinfo($originalName, PATHINFO_EXTENSION));
|
||||
if (!in_array($ext, EXTRACT_ALLOWED_EXTS, true)) {
|
||||
dbnToolsError('Unsupported file type. Upload a .pdf, .docx, or .txt file.', 422, 'unsupported_type');
|
||||
}
|
||||
|
||||
$text = match ($ext) {
|
||||
'txt' => extractTxt($tmpPath),
|
||||
'pdf' => extractPdf($tmpPath),
|
||||
'docx' => extractDocx($tmpPath),
|
||||
};
|
||||
|
||||
$text = trim($text);
|
||||
if ($text === '') {
|
||||
dbnToolsError('No text could be extracted from this file.', 422, 'no_text');
|
||||
}
|
||||
|
||||
$truncated = false;
|
||||
if (mb_strlen($text, 'UTF-8') > EXTRACT_TEXT_LIMIT) {
|
||||
$text = mb_substr($text, 0, EXTRACT_TEXT_LIMIT, 'UTF-8');
|
||||
$truncated = true;
|
||||
}
|
||||
|
||||
dbnToolsRespond([
|
||||
'ok' => true,
|
||||
'text' => $text,
|
||||
'filename' => $originalName,
|
||||
'chars' => mb_strlen($text, 'UTF-8'),
|
||||
'truncated' => $truncated,
|
||||
]);
|
||||
} catch (DbnToolsHttpException $e) {
|
||||
dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra);
|
||||
} catch (Throwable $e) {
|
||||
error_log('DBN extract error: ' . $e->getMessage());
|
||||
dbnToolsError('Text extraction failed.', 500, 'extract_error');
|
||||
}
|
||||
|
||||
function extractTxt(string $path): string
|
||||
{
|
||||
$content = file_get_contents($path);
|
||||
if ($content === false) {
|
||||
throw new DbnToolsHttpException('Unable to read the file.', 500, 'read_error');
|
||||
}
|
||||
return mb_convert_encoding($content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252');
|
||||
}
|
||||
|
||||
function extractPdf(string $path): string
|
||||
{
|
||||
$cmd = 'pdftotext ' . escapeshellarg($path) . ' - 2>/dev/null';
|
||||
$output = shell_exec($cmd);
|
||||
if ($output === null || $output === false || trim($output) === '') {
|
||||
throw new DbnToolsHttpException(
|
||||
'PDF text extraction failed. The file may be image-only or encrypted.',
|
||||
422,
|
||||
'pdf_extract_failed'
|
||||
);
|
||||
}
|
||||
return $output;
|
||||
}
|
||||
|
||||
function extractDocx(string $path): string
|
||||
{
|
||||
$zip = new ZipArchive();
|
||||
$result = $zip->open($path);
|
||||
if ($result !== true) {
|
||||
throw new DbnToolsHttpException('Unable to open the .docx file.', 422, 'docx_open_failed');
|
||||
}
|
||||
|
||||
$xml = $zip->getFromName('word/document.xml');
|
||||
$zip->close();
|
||||
|
||||
if ($xml === false) {
|
||||
throw new DbnToolsHttpException('No document content found in this .docx file.', 422, 'docx_no_content');
|
||||
}
|
||||
|
||||
$doc = new DOMDocument();
|
||||
libxml_use_internal_errors(true);
|
||||
$doc->loadXML($xml);
|
||||
libxml_clear_errors();
|
||||
|
||||
$xpath = new DOMXPath($doc);
|
||||
$xpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
|
||||
|
||||
$paragraphs = [];
|
||||
foreach ($xpath->query('//w:p') as $para) {
|
||||
$runs = [];
|
||||
foreach ($xpath->query('.//w:t', $para) as $t) {
|
||||
$runs[] = $t->textContent;
|
||||
}
|
||||
$paragraphs[] = implode('', $runs);
|
||||
}
|
||||
|
||||
return implode("\n", $paragraphs);
|
||||
}
|
||||
@@ -873,3 +873,93 @@ p {
|
||||
overflow-x: auto;
|
||||
color: #374151;
|
||||
}
|
||||
|
||||
/* ─── Upload zone (Redact tool) ──────────────────────────────────────────── */
|
||||
|
||||
.upload-zone {
|
||||
border: 2px dashed var(--line);
|
||||
border-radius: 8px;
|
||||
padding: 18px 14px;
|
||||
text-align: center;
|
||||
cursor: pointer;
|
||||
transition: border-color 0.15s, background 0.15s;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.upload-zone:hover {
|
||||
border-color: var(--teal);
|
||||
background: #f7fdfb;
|
||||
}
|
||||
|
||||
.upload-zone.is-drag-over {
|
||||
border-color: var(--teal);
|
||||
background: var(--soft-teal);
|
||||
}
|
||||
|
||||
#uploadInput {
|
||||
position: absolute;
|
||||
width: 0;
|
||||
height: 0;
|
||||
opacity: 0;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
.upload-icon {
|
||||
display: block;
|
||||
font-size: 1.8rem;
|
||||
line-height: 1;
|
||||
color: var(--teal);
|
||||
opacity: 0.55;
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.upload-prompt p {
|
||||
margin: 4px 0 0;
|
||||
color: var(--muted);
|
||||
font-size: 0.88rem;
|
||||
}
|
||||
|
||||
.upload-browse {
|
||||
color: var(--teal);
|
||||
font-weight: 700;
|
||||
cursor: pointer;
|
||||
text-decoration: underline;
|
||||
text-underline-offset: 2px;
|
||||
}
|
||||
|
||||
.upload-hint {
|
||||
font-size: 0.76rem !important;
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
.upload-file {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 10px;
|
||||
min-height: 48px;
|
||||
}
|
||||
|
||||
.upload-filename {
|
||||
font-size: 0.88rem;
|
||||
font-weight: 600;
|
||||
color: var(--ink);
|
||||
word-break: break-all;
|
||||
}
|
||||
|
||||
.upload-clear {
|
||||
flex-shrink: 0;
|
||||
background: transparent;
|
||||
color: var(--muted);
|
||||
font-size: 1.25rem;
|
||||
line-height: 1;
|
||||
padding: 2px 7px;
|
||||
border-radius: 4px;
|
||||
border: 1px solid transparent;
|
||||
}
|
||||
|
||||
.upload-clear:hover {
|
||||
background: var(--soft-coral);
|
||||
color: var(--coral);
|
||||
border-color: #f5c6aa;
|
||||
}
|
||||
|
||||
@@ -80,6 +80,12 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
traceList: document.querySelector('#traceList'),
|
||||
healthButton: document.querySelector('#healthButton'),
|
||||
healthPill: document.querySelector('#healthPill'),
|
||||
uploadZone: document.querySelector('#uploadZone'),
|
||||
uploadInput: document.querySelector('#uploadInput'),
|
||||
uploadPrompt: document.querySelector('#uploadPrompt'),
|
||||
uploadFileInfo: document.querySelector('#uploadFileInfo'),
|
||||
uploadFileName: document.querySelector('#uploadFileName'),
|
||||
uploadClear: document.querySelector('#uploadClear'),
|
||||
});
|
||||
|
||||
els.tabs.forEach((button) => {
|
||||
@@ -88,6 +94,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
els.form.addEventListener('submit', runTool);
|
||||
els.passcodeForm.addEventListener('submit', submitPasscode);
|
||||
els.healthButton.addEventListener('click', checkHealth);
|
||||
setupUpload();
|
||||
setTool(state.activeTool);
|
||||
|
||||
if (state.authenticated) {
|
||||
@@ -114,6 +121,8 @@ function setTool(toolName) {
|
||||
els.input.placeholder = tool.placeholder;
|
||||
els.languageControl.classList.toggle('is-hidden', !tool.usesLanguage);
|
||||
els.redactionControl.classList.toggle('is-hidden', toolName !== 'redact');
|
||||
els.uploadZone.classList.toggle('is-hidden', toolName !== 'redact');
|
||||
resetUpload();
|
||||
els.status.textContent = '';
|
||||
renderTrace([]);
|
||||
}
|
||||
@@ -186,6 +195,92 @@ async function runTool(event) {
|
||||
}
|
||||
}
|
||||
|
||||
function resetUpload() {
|
||||
if (!els.uploadInput) return;
|
||||
els.uploadInput.value = '';
|
||||
els.uploadPrompt.classList.remove('is-hidden');
|
||||
els.uploadFileInfo.classList.add('is-hidden');
|
||||
els.uploadFileName.textContent = '';
|
||||
els.uploadZone.classList.remove('is-drag-over');
|
||||
}
|
||||
|
||||
function setupUpload() {
|
||||
els.uploadZone.addEventListener('dragover', (e) => {
|
||||
e.preventDefault();
|
||||
els.uploadZone.classList.add('is-drag-over');
|
||||
});
|
||||
|
||||
els.uploadZone.addEventListener('dragleave', (e) => {
|
||||
if (!els.uploadZone.contains(e.relatedTarget)) {
|
||||
els.uploadZone.classList.remove('is-drag-over');
|
||||
}
|
||||
});
|
||||
|
||||
els.uploadZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
els.uploadZone.classList.remove('is-drag-over');
|
||||
const file = e.dataTransfer?.files?.[0];
|
||||
if (file) handleFileUpload(file);
|
||||
});
|
||||
|
||||
els.uploadZone.addEventListener('click', (e) => {
|
||||
if (e.target === els.uploadClear || els.uploadClear?.contains(e.target)) return;
|
||||
if (e.target.tagName === 'LABEL') return;
|
||||
els.uploadInput.click();
|
||||
});
|
||||
|
||||
els.uploadInput.addEventListener('change', () => {
|
||||
const file = els.uploadInput.files?.[0];
|
||||
if (file) handleFileUpload(file);
|
||||
});
|
||||
|
||||
els.uploadClear.addEventListener('click', () => {
|
||||
resetUpload();
|
||||
els.status.textContent = '';
|
||||
});
|
||||
}
|
||||
|
||||
async function handleFileUpload(file) {
|
||||
const allowed = ['pdf', 'docx', 'txt'];
|
||||
const ext = file.name.split('.').pop().toLowerCase();
|
||||
if (!allowed.includes(ext)) {
|
||||
els.status.textContent = 'Unsupported file type. Use .pdf, .docx, or .txt.';
|
||||
return;
|
||||
}
|
||||
|
||||
els.status.textContent = `Extracting ${file.name}…`;
|
||||
setBusy(true);
|
||||
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
const resp = await fetch('api/extract.php', {
|
||||
method: 'POST',
|
||||
credentials: 'same-origin',
|
||||
body: formData,
|
||||
});
|
||||
const data = await resp.json().catch(() => ({}));
|
||||
|
||||
if (!resp.ok || !data.ok) {
|
||||
throw new Error(data.error?.message || `Extraction failed (HTTP ${resp.status}).`);
|
||||
}
|
||||
|
||||
els.input.value = data.text;
|
||||
els.uploadFileName.textContent = file.name;
|
||||
els.uploadPrompt.classList.add('is-hidden');
|
||||
els.uploadFileInfo.classList.remove('is-hidden');
|
||||
|
||||
const note = data.truncated ? ' (truncated to 32 000 chars)' : '';
|
||||
els.status.textContent = `Extracted ${data.chars.toLocaleString()} chars from ${file.name}${note}.`;
|
||||
} catch (err) {
|
||||
els.status.textContent = err.message;
|
||||
resetUpload();
|
||||
} finally {
|
||||
setBusy(false);
|
||||
}
|
||||
}
|
||||
|
||||
async function checkHealth() {
|
||||
els.healthPill.textContent = 'Checking...';
|
||||
try {
|
||||
|
||||
@@ -218,6 +218,19 @@ $authenticated = dbnToolsIsAuthenticated();
|
||||
<label><input type="radio" name="redactionRegion" value="global"> Global</label>
|
||||
</div>
|
||||
|
||||
<div class="upload-zone is-hidden" id="uploadZone" role="region" aria-label="File upload">
|
||||
<input type="file" id="uploadInput" accept=".pdf,.docx,.txt" aria-label="Choose a file">
|
||||
<div id="uploadPrompt" class="upload-prompt">
|
||||
<span class="upload-icon" aria-hidden="true">⇧</span>
|
||||
<p>Drop a <strong>.pdf</strong>, <strong>.docx</strong>, or <strong>.txt</strong>, or <label for="uploadInput" class="upload-browse">browse</label></p>
|
||||
<p class="upload-hint">Text is extracted and never stored.</p>
|
||||
</div>
|
||||
<div id="uploadFileInfo" class="upload-file is-hidden">
|
||||
<span id="uploadFileName" class="upload-filename"></span>
|
||||
<button type="button" id="uploadClear" class="upload-clear" aria-label="Clear uploaded file">×</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<label class="input-label" for="toolInput" id="inputLabel">Question</label>
|
||||
<textarea id="toolInput" name="toolInput" rows="10" required></textarea>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user