Add document upload to Redact tool

api/extract.php — new endpoint accepting .pdf/.docx/.txt up to 4 MB;
pdftotext for PDFs, ZipArchive+DOMXPath for DOCX, mb_convert_encoding
for TXT; truncates to 32 000 chars to stay within redact limit.

index.php — drop/browse upload zone above the textarea, visible only
in Redact mode.

tools.js — setupUpload(), handleFileUpload(), resetUpload(); drag-and-drop
and file picker both call the extract endpoint then populate the textarea.

tools.css — upload zone, drag-over, file-info, clear button styles.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 06:52:14 +02:00
parent 3c8d7ebc34
commit bbe5307c03
4 changed files with 335 additions and 0 deletions
+137
View File
@@ -0,0 +1,137 @@
<?php
declare(strict_types=1);
require_once __DIR__ . '/../includes/bootstrap.php';
dbnToolsRequireMethod('POST');
dbnToolsRequireAuth();
const EXTRACT_MAX_BYTES = 4 * 1024 * 1024;
const EXTRACT_TEXT_LIMIT = 32000;
const EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx'];
try {
if (empty($_FILES['file']) || !is_array($_FILES['file'])) {
dbnToolsError('No file was uploaded.', 422, 'missing_file');
}
$file = $_FILES['file'];
$errCode = (int)($file['error'] ?? UPLOAD_ERR_NO_FILE);
if ($errCode !== UPLOAD_ERR_OK) {
$msg = match ($errCode) {
UPLOAD_ERR_INI_SIZE, UPLOAD_ERR_FORM_SIZE => 'The file exceeds the allowed size limit.',
UPLOAD_ERR_NO_TMP_DIR => 'No temporary directory is available.',
UPLOAD_ERR_CANT_WRITE => 'Unable to save the uploaded file.',
default => 'File upload failed.',
};
dbnToolsError($msg, 422, 'upload_error');
}
$originalName = basename((string)($file['name'] ?? ''));
$tmpPath = (string)($file['tmp_name'] ?? '');
$size = (int)($file['size'] ?? 0);
if (!is_uploaded_file($tmpPath)) {
dbnToolsError('Invalid file upload.', 400, 'invalid_upload');
}
if ($size === 0) {
dbnToolsError('The uploaded file is empty.', 422, 'file_empty');
}
if ($size > EXTRACT_MAX_BYTES) {
dbnToolsError('File exceeds the 4 MB limit.', 413, 'file_too_large');
}
$ext = strtolower(pathinfo($originalName, PATHINFO_EXTENSION));
if (!in_array($ext, EXTRACT_ALLOWED_EXTS, true)) {
dbnToolsError('Unsupported file type. Upload a .pdf, .docx, or .txt file.', 422, 'unsupported_type');
}
$text = match ($ext) {
'txt' => extractTxt($tmpPath),
'pdf' => extractPdf($tmpPath),
'docx' => extractDocx($tmpPath),
};
$text = trim($text);
if ($text === '') {
dbnToolsError('No text could be extracted from this file.', 422, 'no_text');
}
$truncated = false;
if (mb_strlen($text, 'UTF-8') > EXTRACT_TEXT_LIMIT) {
$text = mb_substr($text, 0, EXTRACT_TEXT_LIMIT, 'UTF-8');
$truncated = true;
}
dbnToolsRespond([
'ok' => true,
'text' => $text,
'filename' => $originalName,
'chars' => mb_strlen($text, 'UTF-8'),
'truncated' => $truncated,
]);
} catch (DbnToolsHttpException $e) {
dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra);
} catch (Throwable $e) {
error_log('DBN extract error: ' . $e->getMessage());
dbnToolsError('Text extraction failed.', 500, 'extract_error');
}
function extractTxt(string $path): string
{
$content = file_get_contents($path);
if ($content === false) {
throw new DbnToolsHttpException('Unable to read the file.', 500, 'read_error');
}
return mb_convert_encoding($content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252');
}
function extractPdf(string $path): string
{
$cmd = 'pdftotext ' . escapeshellarg($path) . ' - 2>/dev/null';
$output = shell_exec($cmd);
if ($output === null || $output === false || trim($output) === '') {
throw new DbnToolsHttpException(
'PDF text extraction failed. The file may be image-only or encrypted.',
422,
'pdf_extract_failed'
);
}
return $output;
}
function extractDocx(string $path): string
{
$zip = new ZipArchive();
$result = $zip->open($path);
if ($result !== true) {
throw new DbnToolsHttpException('Unable to open the .docx file.', 422, 'docx_open_failed');
}
$xml = $zip->getFromName('word/document.xml');
$zip->close();
if ($xml === false) {
throw new DbnToolsHttpException('No document content found in this .docx file.', 422, 'docx_no_content');
}
$doc = new DOMDocument();
libxml_use_internal_errors(true);
$doc->loadXML($xml);
libxml_clear_errors();
$xpath = new DOMXPath($doc);
$xpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
$paragraphs = [];
foreach ($xpath->query('//w:p') as $para) {
$runs = [];
foreach ($xpath->query('.//w:t', $para) as $t) {
$runs[] = $t->textContent;
}
$paragraphs[] = implode('', $runs);
}
return implode("\n", $paragraphs);
}
+90
View File
@@ -873,3 +873,93 @@ p {
overflow-x: auto;
color: #374151;
}
/* ─── Upload zone (Redact tool) ──────────────────────────────────────────── */
.upload-zone {
border: 2px dashed var(--line);
border-radius: 8px;
padding: 18px 14px;
text-align: center;
cursor: pointer;
transition: border-color 0.15s, background 0.15s;
position: relative;
}
.upload-zone:hover {
border-color: var(--teal);
background: #f7fdfb;
}
.upload-zone.is-drag-over {
border-color: var(--teal);
background: var(--soft-teal);
}
#uploadInput {
position: absolute;
width: 0;
height: 0;
opacity: 0;
pointer-events: none;
}
.upload-icon {
display: block;
font-size: 1.8rem;
line-height: 1;
color: var(--teal);
opacity: 0.55;
margin-bottom: 6px;
}
.upload-prompt p {
margin: 4px 0 0;
color: var(--muted);
font-size: 0.88rem;
}
.upload-browse {
color: var(--teal);
font-weight: 700;
cursor: pointer;
text-decoration: underline;
text-underline-offset: 2px;
}
.upload-hint {
font-size: 0.76rem !important;
opacity: 0.7;
}
.upload-file {
display: flex;
align-items: center;
justify-content: center;
gap: 10px;
min-height: 48px;
}
.upload-filename {
font-size: 0.88rem;
font-weight: 600;
color: var(--ink);
word-break: break-all;
}
.upload-clear {
flex-shrink: 0;
background: transparent;
color: var(--muted);
font-size: 1.25rem;
line-height: 1;
padding: 2px 7px;
border-radius: 4px;
border: 1px solid transparent;
}
.upload-clear:hover {
background: var(--soft-coral);
color: var(--coral);
border-color: #f5c6aa;
}
+95
View File
@@ -80,6 +80,12 @@ document.addEventListener('DOMContentLoaded', () => {
traceList: document.querySelector('#traceList'),
healthButton: document.querySelector('#healthButton'),
healthPill: document.querySelector('#healthPill'),
uploadZone: document.querySelector('#uploadZone'),
uploadInput: document.querySelector('#uploadInput'),
uploadPrompt: document.querySelector('#uploadPrompt'),
uploadFileInfo: document.querySelector('#uploadFileInfo'),
uploadFileName: document.querySelector('#uploadFileName'),
uploadClear: document.querySelector('#uploadClear'),
});
els.tabs.forEach((button) => {
@@ -88,6 +94,7 @@ document.addEventListener('DOMContentLoaded', () => {
els.form.addEventListener('submit', runTool);
els.passcodeForm.addEventListener('submit', submitPasscode);
els.healthButton.addEventListener('click', checkHealth);
setupUpload();
setTool(state.activeTool);
if (state.authenticated) {
@@ -114,6 +121,8 @@ function setTool(toolName) {
els.input.placeholder = tool.placeholder;
els.languageControl.classList.toggle('is-hidden', !tool.usesLanguage);
els.redactionControl.classList.toggle('is-hidden', toolName !== 'redact');
els.uploadZone.classList.toggle('is-hidden', toolName !== 'redact');
resetUpload();
els.status.textContent = '';
renderTrace([]);
}
@@ -186,6 +195,92 @@ async function runTool(event) {
}
}
function resetUpload() {
if (!els.uploadInput) return;
els.uploadInput.value = '';
els.uploadPrompt.classList.remove('is-hidden');
els.uploadFileInfo.classList.add('is-hidden');
els.uploadFileName.textContent = '';
els.uploadZone.classList.remove('is-drag-over');
}
function setupUpload() {
els.uploadZone.addEventListener('dragover', (e) => {
e.preventDefault();
els.uploadZone.classList.add('is-drag-over');
});
els.uploadZone.addEventListener('dragleave', (e) => {
if (!els.uploadZone.contains(e.relatedTarget)) {
els.uploadZone.classList.remove('is-drag-over');
}
});
els.uploadZone.addEventListener('drop', (e) => {
e.preventDefault();
els.uploadZone.classList.remove('is-drag-over');
const file = e.dataTransfer?.files?.[0];
if (file) handleFileUpload(file);
});
els.uploadZone.addEventListener('click', (e) => {
if (e.target === els.uploadClear || els.uploadClear?.contains(e.target)) return;
if (e.target.tagName === 'LABEL') return;
els.uploadInput.click();
});
els.uploadInput.addEventListener('change', () => {
const file = els.uploadInput.files?.[0];
if (file) handleFileUpload(file);
});
els.uploadClear.addEventListener('click', () => {
resetUpload();
els.status.textContent = '';
});
}
async function handleFileUpload(file) {
const allowed = ['pdf', 'docx', 'txt'];
const ext = file.name.split('.').pop().toLowerCase();
if (!allowed.includes(ext)) {
els.status.textContent = 'Unsupported file type. Use .pdf, .docx, or .txt.';
return;
}
els.status.textContent = `Extracting ${file.name}`;
setBusy(true);
try {
const formData = new FormData();
formData.append('file', file);
const resp = await fetch('api/extract.php', {
method: 'POST',
credentials: 'same-origin',
body: formData,
});
const data = await resp.json().catch(() => ({}));
if (!resp.ok || !data.ok) {
throw new Error(data.error?.message || `Extraction failed (HTTP ${resp.status}).`);
}
els.input.value = data.text;
els.uploadFileName.textContent = file.name;
els.uploadPrompt.classList.add('is-hidden');
els.uploadFileInfo.classList.remove('is-hidden');
const note = data.truncated ? ' (truncated to 32000 chars)' : '';
els.status.textContent = `Extracted ${data.chars.toLocaleString()} chars from ${file.name}${note}.`;
} catch (err) {
els.status.textContent = err.message;
resetUpload();
} finally {
setBusy(false);
}
}
async function checkHealth() {
els.healthPill.textContent = 'Checking...';
try {
+13
View File
@@ -218,6 +218,19 @@ $authenticated = dbnToolsIsAuthenticated();
<label><input type="radio" name="redactionRegion" value="global"> Global</label>
</div>
<div class="upload-zone is-hidden" id="uploadZone" role="region" aria-label="File upload">
<input type="file" id="uploadInput" accept=".pdf,.docx,.txt" aria-label="Choose a file">
<div id="uploadPrompt" class="upload-prompt">
<span class="upload-icon" aria-hidden="true">&#8679;</span>
<p>Drop a <strong>.pdf</strong>, <strong>.docx</strong>, or <strong>.txt</strong>, or <label for="uploadInput" class="upload-browse">browse</label></p>
<p class="upload-hint">Text is extracted and never stored.</p>
</div>
<div id="uploadFileInfo" class="upload-file is-hidden">
<span id="uploadFileName" class="upload-filename"></span>
<button type="button" id="uploadClear" class="upload-clear" aria-label="Clear uploaded file">&times;</button>
</div>
</div>
<label class="input-label" for="toolInput" id="inputLabel">Question</label>
<textarea id="toolInput" name="toolInput" rows="10" required></textarea>