Add document upload to Redact tool
api/extract.php — new endpoint accepting .pdf/.docx/.txt up to 4 MB; pdftotext for PDFs, ZipArchive+DOMXPath for DOCX, mb_convert_encoding for TXT; truncates to 32 000 chars to stay within redact limit. index.php — drop/browse upload zone above the textarea, visible only in Redact mode. tools.js — setupUpload(), handleFileUpload(), resetUpload(); drag-and-drop and file picker both call the extract endpoint then populate the textarea. tools.css — upload zone, drag-over, file-info, clear button styles. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+137
@@ -0,0 +1,137 @@
|
|||||||
|
<?php
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
require_once __DIR__ . '/../includes/bootstrap.php';
|
||||||
|
|
||||||
|
dbnToolsRequireMethod('POST');
|
||||||
|
dbnToolsRequireAuth();
|
||||||
|
|
||||||
|
const EXTRACT_MAX_BYTES = 4 * 1024 * 1024;
|
||||||
|
const EXTRACT_TEXT_LIMIT = 32000;
|
||||||
|
const EXTRACT_ALLOWED_EXTS = ['txt', 'pdf', 'docx'];
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (empty($_FILES['file']) || !is_array($_FILES['file'])) {
|
||||||
|
dbnToolsError('No file was uploaded.', 422, 'missing_file');
|
||||||
|
}
|
||||||
|
|
||||||
|
$file = $_FILES['file'];
|
||||||
|
$errCode = (int)($file['error'] ?? UPLOAD_ERR_NO_FILE);
|
||||||
|
|
||||||
|
if ($errCode !== UPLOAD_ERR_OK) {
|
||||||
|
$msg = match ($errCode) {
|
||||||
|
UPLOAD_ERR_INI_SIZE, UPLOAD_ERR_FORM_SIZE => 'The file exceeds the allowed size limit.',
|
||||||
|
UPLOAD_ERR_NO_TMP_DIR => 'No temporary directory is available.',
|
||||||
|
UPLOAD_ERR_CANT_WRITE => 'Unable to save the uploaded file.',
|
||||||
|
default => 'File upload failed.',
|
||||||
|
};
|
||||||
|
dbnToolsError($msg, 422, 'upload_error');
|
||||||
|
}
|
||||||
|
|
||||||
|
$originalName = basename((string)($file['name'] ?? ''));
|
||||||
|
$tmpPath = (string)($file['tmp_name'] ?? '');
|
||||||
|
$size = (int)($file['size'] ?? 0);
|
||||||
|
|
||||||
|
if (!is_uploaded_file($tmpPath)) {
|
||||||
|
dbnToolsError('Invalid file upload.', 400, 'invalid_upload');
|
||||||
|
}
|
||||||
|
if ($size === 0) {
|
||||||
|
dbnToolsError('The uploaded file is empty.', 422, 'file_empty');
|
||||||
|
}
|
||||||
|
if ($size > EXTRACT_MAX_BYTES) {
|
||||||
|
dbnToolsError('File exceeds the 4 MB limit.', 413, 'file_too_large');
|
||||||
|
}
|
||||||
|
|
||||||
|
$ext = strtolower(pathinfo($originalName, PATHINFO_EXTENSION));
|
||||||
|
if (!in_array($ext, EXTRACT_ALLOWED_EXTS, true)) {
|
||||||
|
dbnToolsError('Unsupported file type. Upload a .pdf, .docx, or .txt file.', 422, 'unsupported_type');
|
||||||
|
}
|
||||||
|
|
||||||
|
$text = match ($ext) {
|
||||||
|
'txt' => extractTxt($tmpPath),
|
||||||
|
'pdf' => extractPdf($tmpPath),
|
||||||
|
'docx' => extractDocx($tmpPath),
|
||||||
|
};
|
||||||
|
|
||||||
|
$text = trim($text);
|
||||||
|
if ($text === '') {
|
||||||
|
dbnToolsError('No text could be extracted from this file.', 422, 'no_text');
|
||||||
|
}
|
||||||
|
|
||||||
|
$truncated = false;
|
||||||
|
if (mb_strlen($text, 'UTF-8') > EXTRACT_TEXT_LIMIT) {
|
||||||
|
$text = mb_substr($text, 0, EXTRACT_TEXT_LIMIT, 'UTF-8');
|
||||||
|
$truncated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
dbnToolsRespond([
|
||||||
|
'ok' => true,
|
||||||
|
'text' => $text,
|
||||||
|
'filename' => $originalName,
|
||||||
|
'chars' => mb_strlen($text, 'UTF-8'),
|
||||||
|
'truncated' => $truncated,
|
||||||
|
]);
|
||||||
|
} catch (DbnToolsHttpException $e) {
|
||||||
|
dbnToolsError($e->getMessage(), $e->status, $e->errorCode, $e->extra);
|
||||||
|
} catch (Throwable $e) {
|
||||||
|
error_log('DBN extract error: ' . $e->getMessage());
|
||||||
|
dbnToolsError('Text extraction failed.', 500, 'extract_error');
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractTxt(string $path): string
|
||||||
|
{
|
||||||
|
$content = file_get_contents($path);
|
||||||
|
if ($content === false) {
|
||||||
|
throw new DbnToolsHttpException('Unable to read the file.', 500, 'read_error');
|
||||||
|
}
|
||||||
|
return mb_convert_encoding($content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252');
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractPdf(string $path): string
|
||||||
|
{
|
||||||
|
$cmd = 'pdftotext ' . escapeshellarg($path) . ' - 2>/dev/null';
|
||||||
|
$output = shell_exec($cmd);
|
||||||
|
if ($output === null || $output === false || trim($output) === '') {
|
||||||
|
throw new DbnToolsHttpException(
|
||||||
|
'PDF text extraction failed. The file may be image-only or encrypted.',
|
||||||
|
422,
|
||||||
|
'pdf_extract_failed'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return $output;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractDocx(string $path): string
|
||||||
|
{
|
||||||
|
$zip = new ZipArchive();
|
||||||
|
$result = $zip->open($path);
|
||||||
|
if ($result !== true) {
|
||||||
|
throw new DbnToolsHttpException('Unable to open the .docx file.', 422, 'docx_open_failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
$xml = $zip->getFromName('word/document.xml');
|
||||||
|
$zip->close();
|
||||||
|
|
||||||
|
if ($xml === false) {
|
||||||
|
throw new DbnToolsHttpException('No document content found in this .docx file.', 422, 'docx_no_content');
|
||||||
|
}
|
||||||
|
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
libxml_use_internal_errors(true);
|
||||||
|
$doc->loadXML($xml);
|
||||||
|
libxml_clear_errors();
|
||||||
|
|
||||||
|
$xpath = new DOMXPath($doc);
|
||||||
|
$xpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
|
||||||
|
|
||||||
|
$paragraphs = [];
|
||||||
|
foreach ($xpath->query('//w:p') as $para) {
|
||||||
|
$runs = [];
|
||||||
|
foreach ($xpath->query('.//w:t', $para) as $t) {
|
||||||
|
$runs[] = $t->textContent;
|
||||||
|
}
|
||||||
|
$paragraphs[] = implode('', $runs);
|
||||||
|
}
|
||||||
|
|
||||||
|
return implode("\n", $paragraphs);
|
||||||
|
}
|
||||||
@@ -873,3 +873,93 @@ p {
|
|||||||
overflow-x: auto;
|
overflow-x: auto;
|
||||||
color: #374151;
|
color: #374151;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ─── Upload zone (Redact tool) ──────────────────────────────────────────── */
|
||||||
|
|
||||||
|
.upload-zone {
|
||||||
|
border: 2px dashed var(--line);
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 18px 14px;
|
||||||
|
text-align: center;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: border-color 0.15s, background 0.15s;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-zone:hover {
|
||||||
|
border-color: var(--teal);
|
||||||
|
background: #f7fdfb;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-zone.is-drag-over {
|
||||||
|
border-color: var(--teal);
|
||||||
|
background: var(--soft-teal);
|
||||||
|
}
|
||||||
|
|
||||||
|
#uploadInput {
|
||||||
|
position: absolute;
|
||||||
|
width: 0;
|
||||||
|
height: 0;
|
||||||
|
opacity: 0;
|
||||||
|
pointer-events: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-icon {
|
||||||
|
display: block;
|
||||||
|
font-size: 1.8rem;
|
||||||
|
line-height: 1;
|
||||||
|
color: var(--teal);
|
||||||
|
opacity: 0.55;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-prompt p {
|
||||||
|
margin: 4px 0 0;
|
||||||
|
color: var(--muted);
|
||||||
|
font-size: 0.88rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-browse {
|
||||||
|
color: var(--teal);
|
||||||
|
font-weight: 700;
|
||||||
|
cursor: pointer;
|
||||||
|
text-decoration: underline;
|
||||||
|
text-underline-offset: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-hint {
|
||||||
|
font-size: 0.76rem !important;
|
||||||
|
opacity: 0.7;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-file {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
gap: 10px;
|
||||||
|
min-height: 48px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-filename {
|
||||||
|
font-size: 0.88rem;
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--ink);
|
||||||
|
word-break: break-all;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-clear {
|
||||||
|
flex-shrink: 0;
|
||||||
|
background: transparent;
|
||||||
|
color: var(--muted);
|
||||||
|
font-size: 1.25rem;
|
||||||
|
line-height: 1;
|
||||||
|
padding: 2px 7px;
|
||||||
|
border-radius: 4px;
|
||||||
|
border: 1px solid transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-clear:hover {
|
||||||
|
background: var(--soft-coral);
|
||||||
|
color: var(--coral);
|
||||||
|
border-color: #f5c6aa;
|
||||||
|
}
|
||||||
|
|||||||
@@ -80,6 +80,12 @@ document.addEventListener('DOMContentLoaded', () => {
|
|||||||
traceList: document.querySelector('#traceList'),
|
traceList: document.querySelector('#traceList'),
|
||||||
healthButton: document.querySelector('#healthButton'),
|
healthButton: document.querySelector('#healthButton'),
|
||||||
healthPill: document.querySelector('#healthPill'),
|
healthPill: document.querySelector('#healthPill'),
|
||||||
|
uploadZone: document.querySelector('#uploadZone'),
|
||||||
|
uploadInput: document.querySelector('#uploadInput'),
|
||||||
|
uploadPrompt: document.querySelector('#uploadPrompt'),
|
||||||
|
uploadFileInfo: document.querySelector('#uploadFileInfo'),
|
||||||
|
uploadFileName: document.querySelector('#uploadFileName'),
|
||||||
|
uploadClear: document.querySelector('#uploadClear'),
|
||||||
});
|
});
|
||||||
|
|
||||||
els.tabs.forEach((button) => {
|
els.tabs.forEach((button) => {
|
||||||
@@ -88,6 +94,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
|||||||
els.form.addEventListener('submit', runTool);
|
els.form.addEventListener('submit', runTool);
|
||||||
els.passcodeForm.addEventListener('submit', submitPasscode);
|
els.passcodeForm.addEventListener('submit', submitPasscode);
|
||||||
els.healthButton.addEventListener('click', checkHealth);
|
els.healthButton.addEventListener('click', checkHealth);
|
||||||
|
setupUpload();
|
||||||
setTool(state.activeTool);
|
setTool(state.activeTool);
|
||||||
|
|
||||||
if (state.authenticated) {
|
if (state.authenticated) {
|
||||||
@@ -114,6 +121,8 @@ function setTool(toolName) {
|
|||||||
els.input.placeholder = tool.placeholder;
|
els.input.placeholder = tool.placeholder;
|
||||||
els.languageControl.classList.toggle('is-hidden', !tool.usesLanguage);
|
els.languageControl.classList.toggle('is-hidden', !tool.usesLanguage);
|
||||||
els.redactionControl.classList.toggle('is-hidden', toolName !== 'redact');
|
els.redactionControl.classList.toggle('is-hidden', toolName !== 'redact');
|
||||||
|
els.uploadZone.classList.toggle('is-hidden', toolName !== 'redact');
|
||||||
|
resetUpload();
|
||||||
els.status.textContent = '';
|
els.status.textContent = '';
|
||||||
renderTrace([]);
|
renderTrace([]);
|
||||||
}
|
}
|
||||||
@@ -186,6 +195,92 @@ async function runTool(event) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function resetUpload() {
|
||||||
|
if (!els.uploadInput) return;
|
||||||
|
els.uploadInput.value = '';
|
||||||
|
els.uploadPrompt.classList.remove('is-hidden');
|
||||||
|
els.uploadFileInfo.classList.add('is-hidden');
|
||||||
|
els.uploadFileName.textContent = '';
|
||||||
|
els.uploadZone.classList.remove('is-drag-over');
|
||||||
|
}
|
||||||
|
|
||||||
|
function setupUpload() {
|
||||||
|
els.uploadZone.addEventListener('dragover', (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
els.uploadZone.classList.add('is-drag-over');
|
||||||
|
});
|
||||||
|
|
||||||
|
els.uploadZone.addEventListener('dragleave', (e) => {
|
||||||
|
if (!els.uploadZone.contains(e.relatedTarget)) {
|
||||||
|
els.uploadZone.classList.remove('is-drag-over');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
els.uploadZone.addEventListener('drop', (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
els.uploadZone.classList.remove('is-drag-over');
|
||||||
|
const file = e.dataTransfer?.files?.[0];
|
||||||
|
if (file) handleFileUpload(file);
|
||||||
|
});
|
||||||
|
|
||||||
|
els.uploadZone.addEventListener('click', (e) => {
|
||||||
|
if (e.target === els.uploadClear || els.uploadClear?.contains(e.target)) return;
|
||||||
|
if (e.target.tagName === 'LABEL') return;
|
||||||
|
els.uploadInput.click();
|
||||||
|
});
|
||||||
|
|
||||||
|
els.uploadInput.addEventListener('change', () => {
|
||||||
|
const file = els.uploadInput.files?.[0];
|
||||||
|
if (file) handleFileUpload(file);
|
||||||
|
});
|
||||||
|
|
||||||
|
els.uploadClear.addEventListener('click', () => {
|
||||||
|
resetUpload();
|
||||||
|
els.status.textContent = '';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleFileUpload(file) {
|
||||||
|
const allowed = ['pdf', 'docx', 'txt'];
|
||||||
|
const ext = file.name.split('.').pop().toLowerCase();
|
||||||
|
if (!allowed.includes(ext)) {
|
||||||
|
els.status.textContent = 'Unsupported file type. Use .pdf, .docx, or .txt.';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
els.status.textContent = `Extracting ${file.name}…`;
|
||||||
|
setBusy(true);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', file);
|
||||||
|
|
||||||
|
const resp = await fetch('api/extract.php', {
|
||||||
|
method: 'POST',
|
||||||
|
credentials: 'same-origin',
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
const data = await resp.json().catch(() => ({}));
|
||||||
|
|
||||||
|
if (!resp.ok || !data.ok) {
|
||||||
|
throw new Error(data.error?.message || `Extraction failed (HTTP ${resp.status}).`);
|
||||||
|
}
|
||||||
|
|
||||||
|
els.input.value = data.text;
|
||||||
|
els.uploadFileName.textContent = file.name;
|
||||||
|
els.uploadPrompt.classList.add('is-hidden');
|
||||||
|
els.uploadFileInfo.classList.remove('is-hidden');
|
||||||
|
|
||||||
|
const note = data.truncated ? ' (truncated to 32 000 chars)' : '';
|
||||||
|
els.status.textContent = `Extracted ${data.chars.toLocaleString()} chars from ${file.name}${note}.`;
|
||||||
|
} catch (err) {
|
||||||
|
els.status.textContent = err.message;
|
||||||
|
resetUpload();
|
||||||
|
} finally {
|
||||||
|
setBusy(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function checkHealth() {
|
async function checkHealth() {
|
||||||
els.healthPill.textContent = 'Checking...';
|
els.healthPill.textContent = 'Checking...';
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -218,6 +218,19 @@ $authenticated = dbnToolsIsAuthenticated();
|
|||||||
<label><input type="radio" name="redactionRegion" value="global"> Global</label>
|
<label><input type="radio" name="redactionRegion" value="global"> Global</label>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="upload-zone is-hidden" id="uploadZone" role="region" aria-label="File upload">
|
||||||
|
<input type="file" id="uploadInput" accept=".pdf,.docx,.txt" aria-label="Choose a file">
|
||||||
|
<div id="uploadPrompt" class="upload-prompt">
|
||||||
|
<span class="upload-icon" aria-hidden="true">⇧</span>
|
||||||
|
<p>Drop a <strong>.pdf</strong>, <strong>.docx</strong>, or <strong>.txt</strong>, or <label for="uploadInput" class="upload-browse">browse</label></p>
|
||||||
|
<p class="upload-hint">Text is extracted and never stored.</p>
|
||||||
|
</div>
|
||||||
|
<div id="uploadFileInfo" class="upload-file is-hidden">
|
||||||
|
<span id="uploadFileName" class="upload-filename"></span>
|
||||||
|
<button type="button" id="uploadClear" class="upload-clear" aria-label="Clear uploaded file">×</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<label class="input-label" for="toolInput" id="inputLabel">Question</label>
|
<label class="input-label" for="toolInput" id="inputLabel">Question</label>
|
||||||
<textarea id="toolInput" name="toolInput" rows="10" required></textarea>
|
<textarea id="toolInput" name="toolInput" rows="10" required></textarea>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user