From bbe5307c0370c6119bcc9f19133bcbcbe6e7c448 Mon Sep 17 00:00:00 2001 From: davegilligan Date: Wed, 13 May 2026 06:52:14 +0200 Subject: [PATCH] Add document upload to Redact tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit api/extract.php — new endpoint accepting .pdf/.docx/.txt up to 4 MB; pdftotext for PDFs, ZipArchive+DOMXPath for DOCX, mb_convert_encoding for TXT; truncates to 32 000 chars to stay within redact limit. index.php — drop/browse upload zone above the textarea, visible only in Redact mode. tools.js — setupUpload(), handleFileUpload(), resetUpload(); drag-and-drop and file picker both call the extract endpoint then populate the textarea. tools.css — upload zone, drag-over, file-info, clear button styles. Co-Authored-By: Claude Sonnet 4.6 --- api/extract.php | 137 +++++++++++++++++++++++++++++++++++++++++++ assets/css/tools.css | 90 ++++++++++++++++++++++++++++ assets/js/tools.js | 95 ++++++++++++++++++++++++++++++ index.php | 13 ++++ 4 files changed, 335 insertions(+) create mode 100644 api/extract.php diff --git a/api/extract.php b/api/extract.php new file mode 100644 index 0000000..c3e8c38 --- /dev/null +++ b/api/extract.php @@ -0,0 +1,137 @@ + 'The file exceeds the allowed size limit.', + UPLOAD_ERR_NO_TMP_DIR => 'No temporary directory is available.', + UPLOAD_ERR_CANT_WRITE => 'Unable to save the uploaded file.', + default => 'File upload failed.', + }; + dbnToolsError($msg, 422, 'upload_error'); + } + + $originalName = basename((string)($file['name'] ?? '')); + $tmpPath = (string)($file['tmp_name'] ?? ''); + $size = (int)($file['size'] ?? 0); + + if (!is_uploaded_file($tmpPath)) { + dbnToolsError('Invalid file upload.', 400, 'invalid_upload'); + } + if ($size === 0) { + dbnToolsError('The uploaded file is empty.', 422, 'file_empty'); + } + if ($size > EXTRACT_MAX_BYTES) { + dbnToolsError('File exceeds the 4 MB limit.', 413, 'file_too_large'); + } + + $ext = strtolower(pathinfo($originalName, PATHINFO_EXTENSION)); + if (!in_array($ext, EXTRACT_ALLOWED_EXTS, true)) { + dbnToolsError('Unsupported file type. Upload a .pdf, .docx, or .txt file.', 422, 'unsupported_type'); + } + + $text = match ($ext) { + 'txt' => extractTxt($tmpPath), + 'pdf' => extractPdf($tmpPath), + 'docx' => extractDocx($tmpPath), + }; + + $text = trim($text); + if ($text === '') { + dbnToolsError('No text could be extracted from this file.', 422, 'no_text'); + } + + $truncated = false; + if (mb_strlen($text, 'UTF-8') > EXTRACT_TEXT_LIMIT) { + $text = mb_substr($text, 0, EXTRACT_TEXT_LIMIT, 'UTF-8'); + $truncated = true; + } + + dbnToolsRespond([ + 'ok' => true, + 'text' => $text, + 'filename' => $originalName, + 'chars' => mb_strlen($text, 'UTF-8'), + 'truncated' => $truncated, + ]); +} catch (DbnToolsHttpException $e) { + dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra); +} catch (Throwable $e) { + error_log('DBN extract error: ' . $e->getMessage()); + dbnToolsError('Text extraction failed.', 500, 'extract_error'); +} + +function extractTxt(string $path): string +{ + $content = file_get_contents($path); + if ($content === false) { + throw new DbnToolsHttpException('Unable to read the file.', 500, 'read_error'); + } + return mb_convert_encoding($content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252'); +} + +function extractPdf(string $path): string +{ + $cmd = 'pdftotext ' . escapeshellarg($path) . ' - 2>/dev/null'; + $output = shell_exec($cmd); + if ($output === null || $output === false || trim($output) === '') { + throw new DbnToolsHttpException( + 'PDF text extraction failed. The file may be image-only or encrypted.', + 422, + 'pdf_extract_failed' + ); + } + return $output; +} + +function extractDocx(string $path): string +{ + $zip = new ZipArchive(); + $result = $zip->open($path); + if ($result !== true) { + throw new DbnToolsHttpException('Unable to open the .docx file.', 422, 'docx_open_failed'); + } + + $xml = $zip->getFromName('word/document.xml'); + $zip->close(); + + if ($xml === false) { + throw new DbnToolsHttpException('No document content found in this .docx file.', 422, 'docx_no_content'); + } + + $doc = new DOMDocument(); + libxml_use_internal_errors(true); + $doc->loadXML($xml); + libxml_clear_errors(); + + $xpath = new DOMXPath($doc); + $xpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'); + + $paragraphs = []; + foreach ($xpath->query('//w:p') as $para) { + $runs = []; + foreach ($xpath->query('.//w:t', $para) as $t) { + $runs[] = $t->textContent; + } + $paragraphs[] = implode('', $runs); + } + + return implode("\n", $paragraphs); +} diff --git a/assets/css/tools.css b/assets/css/tools.css index 6c364bc..24c5223 100644 --- a/assets/css/tools.css +++ b/assets/css/tools.css @@ -873,3 +873,93 @@ p { overflow-x: auto; color: #374151; } + +/* ─── Upload zone (Redact tool) ──────────────────────────────────────────── */ + +.upload-zone { + border: 2px dashed var(--line); + border-radius: 8px; + padding: 18px 14px; + text-align: center; + cursor: pointer; + transition: border-color 0.15s, background 0.15s; + position: relative; +} + +.upload-zone:hover { + border-color: var(--teal); + background: #f7fdfb; +} + +.upload-zone.is-drag-over { + border-color: var(--teal); + background: var(--soft-teal); +} + +#uploadInput { + position: absolute; + width: 0; + height: 0; + opacity: 0; + pointer-events: none; +} + +.upload-icon { + display: block; + font-size: 1.8rem; + line-height: 1; + color: var(--teal); + opacity: 0.55; + margin-bottom: 6px; +} + +.upload-prompt p { + margin: 4px 0 0; + color: var(--muted); + font-size: 0.88rem; +} + +.upload-browse { + color: var(--teal); + font-weight: 700; + cursor: pointer; + text-decoration: underline; + text-underline-offset: 2px; +} + +.upload-hint { + font-size: 0.76rem !important; + opacity: 0.7; +} + +.upload-file { + display: flex; + align-items: center; + justify-content: center; + gap: 10px; + min-height: 48px; +} + +.upload-filename { + font-size: 0.88rem; + font-weight: 600; + color: var(--ink); + word-break: break-all; +} + +.upload-clear { + flex-shrink: 0; + background: transparent; + color: var(--muted); + font-size: 1.25rem; + line-height: 1; + padding: 2px 7px; + border-radius: 4px; + border: 1px solid transparent; +} + +.upload-clear:hover { + background: var(--soft-coral); + color: var(--coral); + border-color: #f5c6aa; +} diff --git a/assets/js/tools.js b/assets/js/tools.js index b8aafed..2be7308 100644 --- a/assets/js/tools.js +++ b/assets/js/tools.js @@ -80,6 +80,12 @@ document.addEventListener('DOMContentLoaded', () => { traceList: document.querySelector('#traceList'), healthButton: document.querySelector('#healthButton'), healthPill: document.querySelector('#healthPill'), + uploadZone: document.querySelector('#uploadZone'), + uploadInput: document.querySelector('#uploadInput'), + uploadPrompt: document.querySelector('#uploadPrompt'), + uploadFileInfo: document.querySelector('#uploadFileInfo'), + uploadFileName: document.querySelector('#uploadFileName'), + uploadClear: document.querySelector('#uploadClear'), }); els.tabs.forEach((button) => { @@ -88,6 +94,7 @@ document.addEventListener('DOMContentLoaded', () => { els.form.addEventListener('submit', runTool); els.passcodeForm.addEventListener('submit', submitPasscode); els.healthButton.addEventListener('click', checkHealth); + setupUpload(); setTool(state.activeTool); if (state.authenticated) { @@ -114,6 +121,8 @@ function setTool(toolName) { els.input.placeholder = tool.placeholder; els.languageControl.classList.toggle('is-hidden', !tool.usesLanguage); els.redactionControl.classList.toggle('is-hidden', toolName !== 'redact'); + els.uploadZone.classList.toggle('is-hidden', toolName !== 'redact'); + resetUpload(); els.status.textContent = ''; renderTrace([]); } @@ -186,6 +195,92 @@ async function runTool(event) { } } +function resetUpload() { + if (!els.uploadInput) return; + els.uploadInput.value = ''; + els.uploadPrompt.classList.remove('is-hidden'); + els.uploadFileInfo.classList.add('is-hidden'); + els.uploadFileName.textContent = ''; + els.uploadZone.classList.remove('is-drag-over'); +} + +function setupUpload() { + els.uploadZone.addEventListener('dragover', (e) => { + e.preventDefault(); + els.uploadZone.classList.add('is-drag-over'); + }); + + els.uploadZone.addEventListener('dragleave', (e) => { + if (!els.uploadZone.contains(e.relatedTarget)) { + els.uploadZone.classList.remove('is-drag-over'); + } + }); + + els.uploadZone.addEventListener('drop', (e) => { + e.preventDefault(); + els.uploadZone.classList.remove('is-drag-over'); + const file = e.dataTransfer?.files?.[0]; + if (file) handleFileUpload(file); + }); + + els.uploadZone.addEventListener('click', (e) => { + if (e.target === els.uploadClear || els.uploadClear?.contains(e.target)) return; + if (e.target.tagName === 'LABEL') return; + els.uploadInput.click(); + }); + + els.uploadInput.addEventListener('change', () => { + const file = els.uploadInput.files?.[0]; + if (file) handleFileUpload(file); + }); + + els.uploadClear.addEventListener('click', () => { + resetUpload(); + els.status.textContent = ''; + }); +} + +async function handleFileUpload(file) { + const allowed = ['pdf', 'docx', 'txt']; + const ext = file.name.split('.').pop().toLowerCase(); + if (!allowed.includes(ext)) { + els.status.textContent = 'Unsupported file type. Use .pdf, .docx, or .txt.'; + return; + } + + els.status.textContent = `Extracting ${file.name}…`; + setBusy(true); + + try { + const formData = new FormData(); + formData.append('file', file); + + const resp = await fetch('api/extract.php', { + method: 'POST', + credentials: 'same-origin', + body: formData, + }); + const data = await resp.json().catch(() => ({})); + + if (!resp.ok || !data.ok) { + throw new Error(data.error?.message || `Extraction failed (HTTP ${resp.status}).`); + } + + els.input.value = data.text; + els.uploadFileName.textContent = file.name; + els.uploadPrompt.classList.add('is-hidden'); + els.uploadFileInfo.classList.remove('is-hidden'); + + const note = data.truncated ? ' (truncated to 32 000 chars)' : ''; + els.status.textContent = `Extracted ${data.chars.toLocaleString()} chars from ${file.name}${note}.`; + } catch (err) { + els.status.textContent = err.message; + resetUpload(); + } finally { + setBusy(false); + } +} + async function checkHealth() { els.healthPill.textContent = 'Checking...'; try { diff --git a/index.php b/index.php index ddb731b..e28cf25 100644 --- a/index.php +++ b/index.php @@ -218,6 +218,19 @@ $authenticated = dbnToolsIsAuthenticated(); + +