feat(transcribe): GPT cleanup pass + advanced options i18n

Adds optional post-transcription cleanup via GPT-4o/GPT-4o-mini to fix mishearing errors, punctuation, and domain terms. Speaker role labelling now accepts a deployment param. Adds i18n strings for advanced options panel (task, VAD filter, Whisper model, AI cleanup) in all four languages. Updates BvjAnalyzerAgent and DeepResearchAgent. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 07:23:01 +02:00
parent e32ee60e78
commit c4362738c1
5 changed files with 345 additions and 112 deletions
@@ -27,6 +27,11 @@ $task        = ($_POST['task'] ?? 'transcribe') === 'translate' ? 'translate' :
 $vadFilter   = !empty($_POST['vad_filter']) && $_POST['vad_filter'] !== '0';
 $initPrompt  = substr(trim((string)($_POST['initial_prompt'] ?? '')), 0, 500);

+$allowedPostModels = ['', 'gpt-4o-mini', 'gpt-4o'];
+$postModel = in_array($_POST['post_model'] ?? '', $allowedPostModels, true)
+    ? (string)($_POST['post_model'] ?? '')
+    : '';
+
 // ── Validate upload ───────────────────────────────────────────────────────────

 if (empty($_FILES['audio']) || $_FILES['audio']['error'] !== UPLOAD_ERR_OK) {
@@ -114,6 +119,17 @@ if ($timeOffset > 0.0 && !empty($result['segments'])) {
    unset($seg);
 }

+// ── Optional GPT cleanup pass ─────────────────────────────────────────────────
+
+$cleanedBy = null;
+if ($postModel !== '' && !empty($result['text'])) {
+    $cleaned = dbnCleanupTranscript($result['text'], $language, $initPrompt, $postModel);
+    if ($cleaned !== null) {
+        $result['text'] = $cleaned;
+        $cleanedBy = $postModel;
+    }
+}
+
 // ── Speaker role labelling (diarize + multiple speakers only) ─────────────────

 $segments    = $result['segments']    ?? [];
@@ -126,7 +142,8 @@ if ($numDetected < 2 && $segments) {

 $speakerRoles = null;
 if ($diarize && $numDetected > 1 && $segments) {
-    $speakerRoles = dbnLabelSpeakerRoles($segments);
+    $labelDeployment = $postModel ?: 'gpt-4o-mini';
+    $speakerRoles = dbnLabelSpeakerRoles($segments, $labelDeployment);
 }

 // ── Friendly engine label ─────────────────────────────────────────────────────
@@ -161,6 +178,7 @@ dbnToolsRespond([
    'model'         => $engineLabel,
    'engine'        => $engineUsed,
    'latency_ms'    => $latencyMs,
+    'cleaned_by'    => $cleanedBy,
 ]);


@@ -313,7 +331,7 @@ function transcribeViaWhisperGpu(array $file, string $language, bool $diarize, i
 }


-function dbnLabelSpeakerRoles(array $segments): array
+function dbnLabelSpeakerRoles(array $segments, string $deployment = 'gpt-4o-mini'): array
 {
    $sample = array_slice(
        array_values(array_filter($segments, fn($s) => isset($s['speaker']))),
@@ -322,16 +340,11 @@ function dbnLabelSpeakerRoles(array $segments): array
    if (!$sample) return [];

    $lines  = array_map(fn($s) => "[{$s['speaker']}] " . trim((string)($s['text'] ?? '')), $sample);
-    $azure  = new DbnAzureOpenAiGateway();
-    $system = 'You are analyzing a legal proceeding transcript. '
-        . 'Based on the first segments, identify the role of each speaker. '
-        . 'Common roles in Norwegian legal proceedings: dommer (judge), advokat (lawyer), '
-        . 'forelder (parent), barn (child), sakkyndig (expert witness), '
-        . 'saksbehandler (caseworker), tolk (interpreter), vitne (witness), '
-        . 'prosessfullmektig (counsel). '
+    $azure  = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
+    $system = 'Label speakers in this Norwegian legal transcript. '
        . 'Return ONLY valid JSON: {"SPEAKER_00":"dommer","SPEAKER_01":"forelder"}. '
-        . 'Use Norwegian role names. Use "ukjent" if role cannot be determined. '
-        . 'Only include speakers present in the input.';
+        . 'Norwegian role names only — dommer, advokat, forelder, barn, sakkyndig, '
+        . 'saksbehandler, tolk, vitne, prosessfullmektig. Use "ukjent" if unclear.';

    try {
        $text    = $azure->chatText([
@@ -345,3 +358,36 @@ function dbnLabelSpeakerRoles(array $segments): array
        return [];
    }
 }
+
+
+function dbnCleanupTranscript(string $text, string $language, string $vocabulary, string $deployment): ?string
+{
+    $langName = match($language) {
+        'no', 'nb', 'nn' => 'Norwegian',
+        'en'             => 'English',
+        'pl'             => 'Polish',
+        'uk'             => 'Ukrainian',
+        'sv'             => 'Swedish',
+        'da'             => 'Danish',
+        'de'             => 'German',
+        'fr'             => 'French',
+        default          => 'Norwegian',
+    };
+    $vocabHint = $vocabulary !== '' ? " Domain terms to preserve correctly: {$vocabulary}." : '';
+    $system = "Fix transcription errors in this {$langName} text.{$vocabHint} "
+        . "Correct mishearing errors, run-on sentences, and punctuation. "
+        . "Preserve all meaning and the original language exactly. "
+        . "Return only the corrected transcript text, no commentary.";
+
+    try {
+        $azure = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
+        $result = $azure->chatText(
+            [['role' => 'system', 'content' => $system],
+             ['role' => 'user',   'content' => $text]],
+            ['temperature' => 0.1, 'max_tokens' => 4096]
+        );
+        return ($result !== '' && $result !== null) ? $result : null;
+    } catch (Throwable) {
+        return null;
+    }
+}
@@ -441,6 +441,20 @@ const TRANSCRIBE_I18N = {
    traceProcessingDetail: () => 'Processing audio. Large files may take 1–3 minutes.',
    traceStillLabel: (clip) => `${clip} — still processing…`,
    traceStillDetail: (e) => { const m = Math.floor(e / 60), s = e % 60; return m > 0 ? `${m}m ${s}s elapsed — working through the audio.` : `${e}s elapsed — processing.`; },
+    advancedOptions: 'Advanced options',
+    task: 'Task',
+    taskTranscribe: 'Transcribe',
+    taskTranslate: 'Translate to English',
+    vadFilter: 'VAD filter',
+    vadFilterLabel: 'Remove silence / noise',
+    vadFilterHint: 'Improves accuracy on recordings with long pauses.',
+    whisperModel: 'Whisper model',
+    whisperModelHint: 'Used when Azure/GCP unavailable. large-v3 is the default.',
+    postModel: 'AI cleanup',
+    postModelNone: 'None',
+    postModelMini: 'GPT-4o Mini',
+    postModelFull: 'GPT-4o',
+    postModelHint: 'Fixes errors, punctuation, and domain terms after transcription.',
  },
  no: {
    transcribeLang: 'Språk i lydfil',
@@ -481,6 +495,20 @@ const TRANSCRIBE_I18N = {
    traceProcessingLabel: (clip) => `${clip} — transkriberer`,
    traceProcessingDetail: () => 'Behandler lyden. Store filer tar 1–3 minutter.',    traceStillLabel: (clip) => `${clip} — behandler fortsatt…`,
    traceStillDetail: (e) => { const m = Math.floor(e / 60), s = e % 60; return m > 0 ? `${m} min ${s}s gått — jobber gjennom lyden.` : `${e}s gått — behandler.`; },
+    advancedOptions: 'Avanserte valg',
+    task: 'Oppgave',
+    taskTranscribe: 'Transkriber',
+    taskTranslate: 'Oversett til engelsk',
+    vadFilter: 'VAD-filter',
+    vadFilterLabel: 'Fjern stillhet / støy',
+    vadFilterHint: 'Forbedrer nøyaktigheten ved opptak med lange pauser.',
+    whisperModel: 'Whisper-modell',
+    whisperModelHint: 'Brukes når Azure/GCP ikke er tilgjengelig. large-v3 er standard.',
+    postModel: 'AI-opprydding',
+    postModelNone: 'Ingen',
+    postModelMini: 'GPT-4o Mini',
+    postModelFull: 'GPT-4o',
+    postModelHint: 'Retter feil, tegnsetting og fagtermer etter transkripsjon.',
  },
  uk: {
    transcribeLang: 'Мова аудіо',
@@ -521,6 +549,20 @@ const TRANSCRIBE_I18N = {
    traceProcessingLabel: (clip) => `${clip} — транскрибування`,
    traceProcessingDetail: () => 'Обробка аудіо. Великі файли займають 1–3 хвилини.',    traceStillLabel: (clip) => `${clip} — ще обробляється…`,
    traceStillDetail: (e) => { const m = Math.floor(e / 60), s = e % 60; return m > 0 ? `Минуло ${m} хв ${s} с — обробка.` : `Минуло ${e} с — обробка.`; },
+    advancedOptions: 'Розширені параметри',
+    task: 'Завдання',
+    taskTranscribe: 'Транскрибувати',
+    taskTranslate: 'Перекласти на англійську',
+    vadFilter: 'VAD-фільтр',
+    vadFilterLabel: 'Видалити тишу / шум',
+    vadFilterHint: 'Покращує точність для записів з довгими паузами.',
+    whisperModel: 'Модель Whisper',
+    whisperModelHint: 'Використовується, якщо Azure/GCP недоступні. large-v3 за замовчуванням.',
+    postModel: 'AI-очищення',
+    postModelNone: 'Без',
+    postModelMini: 'GPT-4o Mini',
+    postModelFull: 'GPT-4o',
+    postModelHint: 'Виправляє помилки, пунктуацію та терміни після транскрипції.',
  },
  pl: {
    transcribeLang: 'Język audio',
@@ -561,6 +603,20 @@ const TRANSCRIBE_I18N = {
    traceProcessingLabel: (clip) => `${clip} — transkrybowanie`,
    traceProcessingDetail: () => 'Przetwarzanie audio. Duże pliki zajmują 1–3 minuty.',    traceStillLabel: (clip) => `${clip} — nadal przetwarza…`,
    traceStillDetail: (e) => { const m = Math.floor(e / 60), s = e % 60; return m > 0 ? `Minęło ${m} min ${s} s — przetwarzanie audio.` : `Minęło ${e} s — przetwarzanie.`; },
+    advancedOptions: 'Opcje zaawansowane',
+    task: 'Zadanie',
+    taskTranscribe: 'Transkrypcja',
+    taskTranslate: 'Przetłumacz na angielski',
+    vadFilter: 'Filtr VAD',
+    vadFilterLabel: 'Usuń ciszę / szum',
+    vadFilterHint: 'Poprawia dokładność nagrań z długimi przerwami.',
+    whisperModel: 'Model Whisper',
+    whisperModelHint: 'Używany gdy Azure/GCP niedostępne. large-v3 jest domyślny.',
+    postModel: 'Korekta AI',
+    postModelNone: 'Brak',
+    postModelMini: 'GPT-4o Mini',
+    postModelFull: 'GPT-4o',
+    postModelHint: 'Poprawia błędy, interpunkcję i terminy po transkrypcji.',
  },
 };

@@ -1515,6 +1571,10 @@ async function runTranscribe() {
      formData.append('time_offset', String(cumulativeOffset));
      if (vadFilter) formData.append('vad_filter', '1');
      if (initPrompt) formData.append('initial_prompt', initPrompt);
+      const whisperModel = document.getElementById('whisperModelSelect')?.value;
+      if (whisperModel) formData.append('model', whisperModel);
+      const postModel = document.querySelector('input[name="post_model"]:checked')?.value;
+      if (postModel) formData.append('post_model', postModel);
      if (diarize) {
        formData.append('diarize', '1');
        if (numSpeakers >= 2) formData.append('num_speakers', String(numSpeakers));
@@ -1650,6 +1710,7 @@ function renderTranscriptResults(data) {
  if (data.language) traceMeta.push({ label: `Language: ${data.language}`, detail: '', status: 'complete' });
  if (data.num_speakers > 1) traceMeta.push({ label: `Speakers detected: ${data.num_speakers}`, detail: Object.entries(speakerRoles).map(([id, r]) => `${id}: ${r}`).join(', ') || '', status: 'complete' });
  if (data.model) traceMeta.push({ label: data.model, detail: '', status: 'complete' });
+  if (data.cleaned_by) traceMeta.push({ label: `Cleaned by ${data.cleaned_by}`, detail: '', status: 'complete' });
  renderTrace(traceMeta.length ? traceMeta : [{ label: 'Transcribed', detail: '', status: 'complete' }]);
 }

@@ -493,7 +493,7 @@ PROMPT;
    private function extractParties(string $docText, string $language): array
    {
        $locale  = dbnToolsLanguageName($language);
-        $excerpt = mb_substr($docText, 0, 12000, 'UTF-8');
+        $excerpt = mb_substr($docText, 0, 20000, 'UTF-8');

        $prompt = <<<PROMPT
 You are analysing a Norwegian child welfare (Barnevernet) document.
@@ -502,15 +502,16 @@ Identify ALL named parties — every person or institution referred to by name o
 Respond in {$locale}. Return a JSON object with a single key "parties" containing an array of objects.
 Each object must have these four fields:
 - "name": full name or institution name (string)
- "role": their role in the case, e.g. Biological mother, Child, Barnevernarbeider, Saksbehandler, Melder, Politi, Lege, Advokat, Foster carer, Rusklinikk
+- "role": their role in the case, e.g. Biological mother, Biological father, Child, Barnevernarbeider, Saksbehandler, Leder, Melder, Politi, Lege, Psykolog, Advokat, Talsperson for barnet, Tilsynsfører, Sakkyndig, Foster carer (fosterforelder), Rusklinikk, Statsforvalter
 - "organization": employer or institution if mentioned, otherwise null
- "relationship_to_child": relationship to the child in the document, e.g. Mother, Father, Caseworker, Melder, or null
+- "relationship_to_child": relationship to the child in the document, e.g. Mother, Father, Sibling, Caseworker, Melder, Supervisor, or null

 Rules:
 - Include every named person and named institution — even peripheral ones.
 - Include Barnevernvakta (bvv) as an institution even if no individual caseworkers are named.
+- If a name appears to be redacted or anonymised (e.g. "mor", "far", "barnet", initials like "A.B."), include them with role inferred from context.
 - Do not invent parties not present in the text.
- Maximum 20 parties.
+- Maximum 25 parties.

 Document text:
 {$excerpt}
@@ -520,14 +521,14 @@ PROMPT;
            $raw = $this->azure->chatText([
                ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
                ['role' => 'user',   'content' => $prompt],
-            ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1500, 'timeout' => 40]);
+            ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 2000, 'timeout' => 45]);
            $json = $this->azure->decodeJsonObject($raw);
            if (is_array($json) && is_array($json['parties'] ?? null)) {
-                return array_slice($json['parties'], 0, 20);
+                return array_slice($json['parties'], 0, 25);
            }
            // Fallback: model returned an array at root level instead of {parties:[...]}
            if (is_array($json) && isset($json[0]['name'])) {
-                return array_slice($json, 0, 20);
+                return array_slice($json, 0, 25);
            }
            error_log('BVJ extractParties unexpected structure: ' . substr($raw, 0, 300));
        } catch (Throwable $e) {
@@ -541,7 +542,7 @@ PROMPT;
    private function extractTimeline(string $docText, string $language): array
    {
        $locale  = dbnToolsLanguageName($language);
-        $excerpt = mb_substr($docText, 0, 12000, 'UTF-8');
+        $excerpt = mb_substr($docText, 0, 20000, 'UTF-8');

        $prompt = <<<PROMPT
 Build a chronological timeline from this Norwegian child welfare (Barnevernet) document in {$locale}.
@@ -557,14 +558,24 @@ IMPORTANT — Norwegian date and time formats to recognise:
 - Diary/log format: lines beginning with a date or time are always events.
 - Two-digit years: interpret as 20YY (20 → 2020, 21 → 2021).

+Barnevernet-specific events that are ALWAYS high significance:
+- Akuttvedtak (emergency placement) under §4-6 or §4-25
+- Omsorgsovertakelse (care order) under §4-12
+- Police involvement or assistance (politibistand)
+- Formal decision (vedtak) or court order (kjennelse)
+- Deadline breaches: bekymringsmelding not processed within 7 days; investigation not opened within 6 weeks
+- Forhandlingsmøte (negotiation hearing) or Fylkesnemnda hearing
+- Supervised contact visits (samvær) being reduced or denied
+- Placement in foster care or institution (fosterhjem, institusjon)
+
 For each event provide:
 - "date": ISO 8601 date (YYYY-MM-DD) if determinable, otherwise best-effort description
 - "time_of_day": HH:MM if present, otherwise null
 - "actor": person, institution, or party involved
 - "action": concise description (≤ 80 chars) of what happened
- "significance": high (acute measure, removal, police involvement, formal decision) | medium (home visit, phone call, meeting) | low (minor update, note)
+- "significance": high (acute measure, removal, police involvement, formal decision, statutory deadline breach) | medium (home visit, phone call, meeting, assessment) | low (minor update, note)

-Sort chronologically. Maximum 30 events.
+Sort chronologically. Maximum 40 events.

 Document text:
 {$excerpt}
@@ -579,10 +590,10 @@ PROMPT;
            $raw = $this->azure->chatText([
                ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
                ['role' => 'user',   'content' => $prompt],
-            ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 3000, 'timeout' => 45]);
+            ], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 4000, 'timeout' => 55]);
            $json = $this->azure->decodeJsonObject($raw);
            if (is_array($json) && is_array($json['events'] ?? null)) {
-                return array_slice($json['events'], 0, 30);
+                return array_slice($json['events'], 0, 40);
            }
        } catch (Throwable $e) {
            error_log('BVJ extractTimeline failed: ' . $e->getMessage());
@@ -602,50 +613,82 @@ PROMPT;
    ): array {
        $locale    = dbnToolsLanguageName($language);
        $docType   = $docMeta['doc_type'] ?? 'BVJ document';
+        $docDate   = $docMeta['doc_date'] ?? 'unknown date';
+        $authority = $docMeta['issuing_authority'] ?? 'the municipality';
        $roleStr   = $advocateRole !== '' ? $advocateRole : 'the affected party';

-        // Summarise the top events to give the model context
+        // Summarise high-significance events first, then others
+        $highEvents  = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') === 'high'));
+        $otherEvents = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') !== 'high'));
+        $topEvents   = array_slice(array_merge($highEvents, $otherEvents), 0, 12);
        $eventSummary = '';
-        $highEvents = array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') === 'high');
-        $topEvents = array_slice(array_merge(array_values($highEvents),
-            array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') !== 'high'))), 0, 8);
        foreach ($topEvents as $ev) {
-            $eventSummary .= sprintf("- %s: %s (%s)\n", $ev['date'] ?? '?', $ev['action'] ?? '', $ev['actor'] ?? '');
+            $sig = ($ev['significance'] ?? 'low') === 'high' ? '[HIGH] ' : '';
+            $eventSummary .= sprintf("- %s %s%s (%s)\n",
+                $ev['date'] ?? '?', $sig, $ev['action'] ?? '', $ev['actor'] ?? '');
        }

        // Summarise parties
        $partyList = '';
-        foreach (array_slice($parties, 0, 8) as $p) {
-            $partyList .= sprintf("- %s (%s)\n", $p['name'] ?? '', $p['role'] ?? '');
+        foreach (array_slice($parties, 0, 10) as $p) {
+            $org = !empty($p['organization']) ? ' at ' . $p['organization'] : '';
+            $partyList .= sprintf("- %s (%s%s)\n", $p['name'] ?? '?', $p['role'] ?? '?', $org);
        }

+        $angleGuidance = match (true) {
+            $count >= 5 => <<<ANGLES
+Cover these five distinct legal angles (one per question):
+1. Statutory rights and obligations under Barnevernloven (e.g. §4-2, §4-6, §4-12) specific to the measures taken
+2. ECHR Article 8 proportionality and procedural safeguards — cite the specific measures and dates from this case
+3. Procedural obligations BVV must fulfil (advance notice, documentation, hearing rights) — anchor to documented events
+4. Bufdir/Statsforvalter guidance on investigation standards and thresholds for intervention
+5. Norwegian appellate court decisions on comparable measures and family circumstances
+ANGLES,
+            $count === 4 => <<<ANGLES
+Cover these four distinct legal angles (one per question):
+1. Statutory rights under Barnevernloven anchored to the specific measures and dates in this case
+2. ECHR Article 8 — proportionality of the specific intervention and any procedural violations
+3. BVV's procedural obligations — documentation, notice, and hearing rights — as evidenced by the timeline
+4. Bufdir guidance and Norwegian court decisions on comparable fact patterns
+ANGLES,
+            default => <<<ANGLES
+Cover three distinct legal angles (one per question):
+1. Statutory rights under Barnevernloven for the specific type of measure documented
+2. ECHR Article 8 proportionality and procedural safeguards
+3. BVV's procedural obligations and whether the documented timeline shows any breach
+ANGLES,
+        };
+
        $prompt = <<<PROMPT
 You are a Norwegian family-law research assistant building a case for: {$roleStr}.

-A {$docType} has been uploaded. Key events:
+Case facts extracted from the uploaded document:
+- Document type: {$docType}
+- Date: {$docDate}
+- Issuing authority: {$authority}
+- Key events (chronological):
 {$eventSummary}
-Key parties:
+- Key parties:
 {$partyList}

-Generate exactly {$count} targeted sub-questions to research the legal corpus for arguments that SUPPORT {$roleStr}'s position. Each question should explore a different angle:
-1. Statutory rights and obligations (Barnevernloven, Barneloven)
-2. ECHR Article 8 and 9 precedents vs Norway
-3. Procedural requirements BVV must follow (notice, documentation, proportionality)
-4. Bufdir guidance on case handling standards
-5. Norwegian court decisions on similar fact patterns
+Generate exactly {$count} sub-questions to search the Norwegian legal corpus for arguments that SUPPORT {$roleStr}'s position.
+
+{$angleGuidance}
+
+CRITICAL: Every question MUST embed specific facts from this case — use the actual authority name, document date, type of measure, and parties where relevant. Generic questions ("What are parental rights?") are useless for retrieval. Specific questions ("What notice requirements must {$authority} meet before issuing an emergency placement under Barnevernloven §4-6?") are highly effective.

 Return JSON only in {$locale}:
 {
  "sub_questions": [
-    {"id":"q1","question":"...","rationale":"how this angle strengthens {$roleStr}'s position (≤ 120 chars)"}
+    {"id":"q1","question":"...","rationale":"why this angle strengthens {$roleStr}'s position (≤ 120 chars)"}
  ]
 }

 Rules:
- Exactly {$count} sub-questions, no more no fewer.
- Every question must be answerable from Norwegian family-law, child-welfare, or ECHR sources.
- Each question must cover a DIFFERENT legal angle.
- Questions must be self-contained without needing the raw document.
+- Exactly {$count} sub-questions.
+- Each question targets a DIFFERENT legal angle.
+- Include specific case details (authority, date, measure type) in each question.
+- Questions must be self-contained and answerable from Norwegian family-law, child-welfare, or ECHR sources.
 - Respond in {$locale}.
 PROMPT;

@@ -734,16 +777,16 @@ PROMPT;

        // Build parties summary (top 8)
        $partiesSummary = '';
-        foreach (array_slice($parties, 0, 8) as $i => $p) {
+        foreach (array_slice($parties, 0, 12) as $i => $p) {
            $org = $p['organization'] ? ' (' . $p['organization'] . ')' : '';
            $rel = $p['relationship_to_child'] ? ' — rel: ' . $p['relationship_to_child'] : '';
            $partiesSummary .= sprintf("%d. %s — %s%s%s\n", $i + 1, $p['name'] ?? '', $p['role'] ?? '', $org, $rel);
        }

-        // Build timeline summary (top 15 most significant events)
+        // Build timeline summary (top 20 most significant events)
        $highEvents = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') === 'high'));
        $otherEvents = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') !== 'high'));
-        $topEvents = array_slice(array_merge($highEvents, $otherEvents), 0, 15);
+        $topEvents = array_slice(array_merge($highEvents, $otherEvents), 0, 20);
        $timelineSummary = '';
        foreach ($topEvents as $ev) {
            $time = $ev['time_of_day'] ? ' kl.' . $ev['time_of_day'] : '';
@@ -783,14 +826,17 @@ PROMPT;
            ? "\n== ADDITIONAL CONTEXT FROM ADVOCATE ==\n{$additionalNotes}\n"
            : '';

-        $docExcerpt = mb_substr($docText, 0, 3000, 'UTF-8');
+        $docExcerpt = mb_substr($docText, 0, 8000, 'UTF-8');

        $prompt = <<<PROMPT
-You are Do Better Norge Legal Tools producing a structured Barnevernet case analysis brief.
-You are representing: {$roleStr}
+You are Do Better Norge Legal Tools. Produce a structured Barnevernet case analysis for: {$roleStr}.
+
+HALLUCINATION RULES — READ FIRST:
+- You may ONLY cite statute sections (§), ECHR article numbers, ECHR application numbers, case names, and Bufdir/Statsforvalter circular references that appear verbatim in the numbered corpus sources below.
+- Do NOT cite statute sections, case names, or ECHR applications from your training memory — they may be misremembered or no longer in force.
+- If no source supports a claim, omit the claim rather than invent support.
+- Every factual legal claim in advocacy_brief MUST end with at least one [n] or [DOC] citation. Unsupported claims are a liability for the client.

-Ground every claim in the numbered corpus sources below using [n] markers, OR in the uploaded document using [DOC].
-Do NOT invent statutes, paragraph numbers, case names, ECHR applications, dates, or parties.
 Return valid JSON only. No markdown fences.

 == DOCUMENT METADATA ==
@@ -805,51 +851,74 @@ Child: {$childInfo}
 == TIMELINE (from document) ==
 {$timelineSummary}

-== CORPUS SOURCES ({$sourceCount} numbered) ==
+== CORPUS SOURCES ({$sourceCount} numbered — cite as [n]) ==
 {$sourcesText}
 {$notesSection}
 {$subQText}

-== DOCUMENT EXCERPT (first 3000 chars — use [DOC] to cite) ==
+== DOCUMENT EXCERPT (first 8000 chars — cite as [DOC]) ==
 {$docExcerpt}

-Return JSON in {$locale}:
+== ADVOCACY BRIEF FORMAT ==
+Write the advocacy_brief as a Markdown document with these sections:
+
+## Case Overview
+Summarise what happened: document type, issuing authority, key events from the timeline. Every factual statement must cite [DOC].
+
+## {$roleStr}'s Core Legal Position
+The strongest statutory and ECHR arguments in favour of {$roleStr}. Cite [n] for each legal point. Only cite statutes and cases that appear in the corpus sources above.
+
+## Procedural Compliance Issues
+Where BVV/the authority may have failed their own procedural obligations. Ground each point in a specific documented action from [DOC] and the applicable statute or guidance from [n].
+
+## Client Strengths
+3-6 factual and legal advantages for {$roleStr}, each anchored with [n] or [DOC].
+
+## Counter-Arguments and Responses
+The most likely opposing arguments and how to rebut them. Cite [n] for rebuttal sources.
+
+## Recommended Next Steps
+2-4 concrete legal actions {$roleStr} should take now.
+
+End with one line: "*This brief is AI-assisted and for discussion purposes only — verify all legal references with a qualified Norwegian family-law lawyer.*"
+
+Target length: 600-1000 words.
+
+== JSON OUTPUT ==
 {
-  "advocacy_brief": "Partisan legal brief in Markdown. Structure:\n## Case Overview\n(What happened according to [DOC] — doc type, authority, key events)\n\n## {$roleStr}'s Core Legal Position\n(Strongest statutory and ECHR arguments — cite [n] and [DOC])\n\n## Procedural Compliance Issues\n(Where BVV may have failed their own procedural obligations — cite [DOC][n])\n\n## Client Strengths\n(Factual and legal advantages for {$roleStr} — cite [n][DOC])\n\n## Counter-Arguments and Responses\n(Likely opposing arguments and how to rebut — cite [n])\n\n## Recommended Next Steps\n(Concrete legal actions)\n\nEnd with a one-line disclaimer. Length: 500-1000 words.",
+  "advocacy_brief": "<the Markdown brief following the format above>",

  "procedural_red_flags": [
    {
      "description": "Concise description of the potential procedural violation",
-      "legal_basis": "Statute or ECHR article potentially violated, e.g. Barnevernloven §6-1, ECHR Art.8",
-      "severity": "high",
+      "legal_basis": "Statute or ECHR article from a corpus source — e.g. Barnevernloven §4-2 [3]",
+      "severity": "high|medium|low",
      "source_refs": ["[n]", "[DOC]"],
-      "what_to_check": "Specific document text or action requiring legal verification"
+      "what_to_check": "Exact document text or action to verify with a lawyer"
    }
  ],

-  "client_strengths": ["3-6 items anchored with [n] or [DOC]"],
-  "opposing_weaknesses": ["2-5 vulnerabilities in BVV or opposing party position — omit if unsupported by sources"],
-  "what_we_found": "2-sentence plain-language summary of the most critical finding",
-  "what_remains_uncertain": ["3-5 specific gaps — missing information, unclear authority, conflicting sources"],
-  "next_practical_step": "The single most important concrete legal action for {$roleStr}"
+  "client_strengths": ["3-6 items, each ending with [n] or [DOC]"],
+  "opposing_weaknesses": ["2-5 documented vulnerabilities in BVV or opposing position — OMIT if not supported by at least one [n]"],
+  "what_we_found": "2-sentence plain-language summary of the single most critical finding",
+  "what_remains_uncertain": ["3-5 specific information gaps or legal questions that need clarification"],
+  "next_practical_step": "The single most important concrete legal action for {$roleStr} to take within the next 7 days"
 }

 Rules:
- Every factual claim in advocacy_brief must end with [n] or [DOC].
- procedural_red_flags must be grounded in documented BVV actions — no speculation.
- severity: high = likely violation of a codified right; medium = procedural irregularity; low = best-practice gap.
- If no corpus source supports a claimed weakness, omit it from opposing_weaknesses.
- Cite statute sections and ECHR articles as they appear in the corpus excerpts.
+- severity: high = likely violation of a codified statutory right or ECHR guarantee; medium = procedural irregularity; low = best-practice gap only.
+- procedural_red_flags must be grounded in documented BVV actions visible in [DOC] or the timeline.
+- If fewer than 2 corpus sources support opposing_weaknesses, return an empty array.
 - Respond in {$locale}.
 PROMPT;

-        $sysPrompt = 'You return valid JSON only. No markdown fences.';
+        $sysPrompt = 'You return valid JSON only. No markdown fences. Every legal citation must come from the provided corpus sources, not from training memory.';

        $messages = [
            ['role' => 'system', 'content' => $sysPrompt],
            ['role' => 'user',   'content' => $prompt],
        ];
-        $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3000, 'timeout' => 200];
+        $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 4500, 'timeout' => 240];

        $deployLabel = match ($engine) {
            'gpu'        => 'GPU (cuttlefish)',
@@ -91,7 +91,7 @@ final class DbnDeepResearchAgent
        // STEP 2: Query expansion
        $emitRunning('expansion', 'Query expansion', 'Generating sub-questions…');
        $stepStart = microtime(true);
-        $expansion = $this->expandQueries($seedDescription, $interpretation['brief'], $controls['sub_q_count'], $language, $advocateRole);
+        $expansion = $this->expandQueries($seedDescription, $interpretation['brief'], $interpretation['key_signals'], $controls['sub_q_count'], $language, $advocateRole);
        $this->stepTimings['expansion'] = $this->elapsedMs($stepStart);
        $subQuestions = $expansion['questions'];
        $expansionStatus = $expansion['fallback'] ? 'warning' : 'complete';
@@ -323,7 +323,8 @@ final class DbnDeepResearchAgent
            $controls['temperature'],
            $advocateRole,
            $priorContext,
-            $branchNotes
+            $branchNotes,
+            $interpretation['key_signals'] ?? []
        );
        $this->stepTimings['synthesis'] = $this->elapsedMs($stepStart);
        $emitStep(
@@ -406,7 +407,7 @@ final class DbnDeepResearchAgent
            'chunk_limit'          => max(4, min(10, (int)($controls['chunk_limit'] ?? 6))),
            'similarity_threshold' => max(0.2, min(0.6, (float)($controls['similarity_threshold'] ?? 0.30))),
            'reranker_top_k'       => max(8, min(14, (int)($controls['reranker_top_k'] ?? 12))),
-            'temperature'          => max(0.05, min(0.4, (float)($controls['temperature'] ?? 0.15))),
+            'temperature'          => max(0.05, min(0.4, (float)($controls['temperature'] ?? 0.10))),
        ];
    }

@@ -472,7 +473,7 @@ Input:

 In {$locale}, produce JSON with:
 {
-  "brief": "1-3 sentence description of what the user is trying to research (≤ 220 chars)",
+  "brief": "1-3 sentence description of what the user is trying to research (≤ 300 chars)",
  "key_signals": ["short keywords or terms that should drive retrieval"]
 }
 PROMPT;
@@ -483,19 +484,20 @@ PROMPT;
            if ($language === 'no' || $advocateRole !== '') {
                $resp = dbnToolsCallGpuLlm([$sysMsg, $userMsg], [
                    'model' => 'dbn-legal-agent', 'json' => true,
-                    'temperature' => 0.1, 'max_tokens' => 400, 'timeout' => 40,
+                    'temperature' => 0.1, 'max_tokens' => 500, 'timeout' => 40,
                ]);
                $raw = (string)($resp['choices'][0]['message']['content'] ?? '');
            } else {
                $raw = $this->azure->chatText([$sysMsg, $userMsg],
-                    ['json' => true, 'temperature' => 0.1, 'max_tokens' => 400, 'timeout' => 30]);
+                    ['json' => true, 'temperature' => 0.1, 'max_tokens' => 500, 'timeout' => 30]);
            }
            $json = $this->azure->decodeJsonObject($raw);
            if (is_array($json) && !empty($json['brief'])) {
-                $signals = $json['key_signals'] ?? [];
-                $signalText = is_array($signals) ? implode(', ', array_slice($signals, 0, 6)) : '';
+                $signals = is_array($json['key_signals'] ?? null) ? array_slice($json['key_signals'], 0, 8) : [];
+                $signalText = $signals ? implode(', ', $signals) : '';
                return [
                    'brief'       => (string)$json['brief'],
+                    'key_signals' => $signals,
                    'detail'      => sprintf('Research focus: %s%s', (string)$json['brief'], $signalText ? ' — signals: ' . $signalText : ''),
                ];
            }
@@ -505,13 +507,17 @@ PROMPT;

        return [
            'brief'       => '',
+            'key_signals' => [],
            'detail'      => 'Interpretation step skipped — proceeding with raw seed input.',
        ];
    }

-    private function expandQueries(string $seedDescription, string $brief, int $targetCount, string $language, string $advocateRole = ''): array
+    private function expandQueries(string $seedDescription, string $brief, array $keySignals, int $targetCount, string $language, string $advocateRole = ''): array
    {
        $locale = dbnToolsLanguageName($language);
+        $anchorsLine = !empty($keySignals)
+            ? "\nKey retrieval anchors (incorporate these terms into your sub-questions where relevant):\n" . implode(', ', $keySignals) . "\n"
+            : '';

        if ($advocateRole !== '') {
            $prompt = <<<PROMPT
@@ -521,10 +527,11 @@ Generate exactly {$targetCount} targeted sub-questions designed to find:
 2. Procedural rights and obligations the opposing party must satisfy — failures here help {$advocateRole}.
 3. Case law that exposes weaknesses in the opposing party's likely arguments.
 4. Specific articles, paragraphs, or judgments {$advocateRole}'s representative should cite.
+5. Specific documentation and procedural obligations Barnevernet or the opposing authority must fulfil — procedural or evidentiary failures that Norwegian courts have used to rule in favour of parents or children.

 Research brief:
 {$brief}
-
+{$anchorsLine}
 Raw input:
 {$seedDescription}

@@ -538,7 +545,8 @@ Return JSON only in {$locale}:
 Rules:
 - Exactly {$targetCount} sub-questions, no more, no fewer.
 - Every question must be answerable from Norwegian family-law, child-welfare, or ECHR/Hague sources.
- Each question must cover a DIFFERENT angle (supporting statute, procedural right, opposing weakness, ECHR precedent, evidentiary frame).
+- Each question must cover a DIFFERENT angle (supporting statute, procedural right, opposing weakness, ECHR precedent, evidentiary frame, Barnevernet procedural obligation).
+- Each sub-question must reference a DIFFERENT legal instrument, statute section, or ECHR article — do not repeat the same §-reference or case name across sub-questions.
 - Sub-questions must be self-contained — readable without the raw input.
 - Write the questions in {$locale}.
 PROMPT;
@@ -548,7 +556,7 @@ You are decomposing a Do Better Norge legal-research request into {$targetCount}

 Research brief:
 {$brief}
-
+{$anchorsLine}
 Raw input:
 {$seedDescription}

@@ -563,6 +571,7 @@ Rules:
 - Exactly {$targetCount} sub-questions, no more, no fewer.
 - Each sub-question must be answerable with Norwegian family-law, child-welfare, or ECHR sources.
 - Each sub-question must explore a DIFFERENT angle (statute interpretation, procedural fairness, ECHR case law, evidence/factual frame, comparative authority).
+- Each sub-question must reference a DIFFERENT legal instrument, statute section, or ECHR article — do not repeat the same §-reference or case name across sub-questions.
 - Sub-questions must be self-contained — readable without seeing the seed text.
 - Write the questions in {$locale}.
 PROMPT;
@@ -667,7 +676,7 @@ PROMPT;
                'title'             => 'uploaded: ' . $entry['meta']['filename'],
                'section'           => null,
                'package_or_corpus' => 'Your upload',
-                'excerpt'           => dbnToolsExcerpt($entry['meta']['text'], 620),
+                'excerpt'           => dbnToolsExcerpt($entry['meta']['text'], 950),
                'chunk_text'        => $entry['meta']['text'],
                'similarity'        => round($sim, 4),
                'reranker_score'    => null,
@@ -709,7 +718,7 @@ PROMPT;
            'title'             => (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source'),
            'section'           => $chunk['section_title'] ?? null,
            'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Legal'),
-            'excerpt'           => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620),
+            'excerpt'           => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 950),
            'chunk_text'        => (string)($chunk['content'] ?? ''),
            'similarity'        => $similarity,
            'reranker_score'    => $rerankerScore,
@@ -940,7 +949,8 @@ PROMPT;
        float   $temperature,
        string  $advocateRole = '',
        ?array  $priorContext = null,
-        string  $branchNotes = ''
+        string  $branchNotes = '',
+        array   $keySignals = []
    ): array {
        $locale = dbnToolsLanguageName($language);

@@ -1014,41 +1024,49 @@ PROMPT;
            ? '400-900 words, minimum 4 paragraphs, with clear paragraph breaks. Cover EACH sub-question above in its own paragraph.'
            : '250-450 words, 2-3 short paragraphs. Note when evidence is thin.';

+        $keySignalsLine = !empty($keySignals)
+            ? "\nKey retrieval signals (statutory/factual terms that drove corpus search — ground your brief in these where sources permit):\n" . implode(', ', $keySignals) . "\n"
+            : '';
+
        if ($advocateRole !== '') {
            $prompt = <<<PROMPT
 You are Do Better Norge Legal Tools producing a legal preparation brief in {$locale}.
 Your client: {$advocateRole}
 {$priorContextSection}
-You MUST ground every claim in the numbered sources below using inline `[n]` citation markers. Do NOT invent statutes, paragraph numbers, case names, dates, or parties.
-
 User input:
 {$seedDescription}

 Research brief:
 {$brief}
+{$keySignalsLine}
 {$subQText}

 Sources ({$sourceCount} numbered):
 {$sourcesText}

-Return JSON only in {$locale}:
-{
-  "brief_markdown": "Partisan but factually grounded advocate brief. {$lengthGuidance} Structure: (1) {$advocateRole}'s core legal position, (2) Strongest supporting arguments with [n] citations, (3) Identified weaknesses in the opposing party's position with [n] citations, (4) Procedural rights and obligations {$advocateRole} should assert. End with a one-line caveat that this is legal preparation support, not final legal advice.",
-  "client_strengths": ["3-6 strings — the strongest factual/legal points for {$advocateRole}, each anchored to at least one [n] source"],
-  "opposing_weaknesses": ["2-5 strings — vulnerabilities in the opposing position supported by retrieved sources. Omit this array entirely if evidence is thin — do NOT invent weaknesses."],
-  "what_we_found": "2-sentence summary of the most relevant retrieved authority for {$advocateRole}",
-  "what_remains_uncertain": ["3-5 gaps where evidence is insufficient or law is unclear — be honest"],
-  "next_practical_step": "one concrete action for {$advocateRole} to take next (legal filing, evidence gathering, consultation type, etc.)"
-}
-
-Rules:
- Every factual claim in `brief_markdown` must end with one or more `[n]` markers.
- If no source supports a point, omit the point — DO NOT speculate.
- Prefer citing statute sections (e.g. "Barneloven §43") and case names verbatim from source excerpts.
+Rules — read ALL of these before writing a single word of output:
+- Every factual claim must end with one or more `[n]` markers. A citation is valid ONLY when that source's excerpt explicitly states or directly implies the claim — do not cite a source merely because it is on the same topic.
+- Do NOT invent statute sections, case names, paragraph numbers, dates, or parties. Copy statute references (e.g. §43, §4-12) and ECHR citations verbatim from the excerpt text — never infer a section number that does not appear in an excerpt.
+- If no source supports a point, omit the point entirely — do NOT speculate.
+- Legal hierarchy: when multiple sources support a claim, prefer the highest-authority source — statute (Barneloven/Barnevernsloven/etc.) > Høyesterett decision > ECHR Grand Chamber > ECHR regular chamber > lower courts > Bufdir guidance.
+- Citation self-check: before writing each [n] marker, confirm that source [n] exists in the list and its excerpt actually supports the specific claim being made.
 - When multiple sources support the same point, cite all of them (e.g. `[2,4]`).
- `opposing_weaknesses` must be omitted or empty when no retrieved source actually supports the identified weakness.
+- `opposing_weaknesses`: OMIT this field by default. Populate it only when ≥2 retrieved sources explicitly support the identified weakness. Do not speculate or infer weaknesses from thin evidence.
+- `brief_markdown` must be {$lengthGuidance} Structure it as: (1) {$advocateRole}'s core legal position, (2) Strongest supporting arguments with [n] citations, (3) Procedural rights and obligations {$advocateRole} should assert, (4) Opposing weaknesses — only if `opposing_weaknesses` is non-empty. End with a one-line caveat that this is legal preparation support, not final legal advice.
+- `client_strengths`: 3-6 items, each must include at least one [n] citation.
+- `what_remains_uncertain`: 3-5 honest gaps where evidence is insufficient or law is unclear.
 - Respond in {$locale}.
 - Output valid JSON only — no markdown fences around the JSON object itself.
+
+Return JSON:
+{
+  "brief_markdown": "<advocate brief>",
+  "client_strengths": ["<strength with [n]>"],
+  "opposing_weaknesses": ["<weakness with [n]>"],
+  "what_we_found": "<2-sentence summary of the most relevant retrieved authority for {$advocateRole}>",
+  "what_remains_uncertain": ["<gap>"],
+  "next_practical_step": "<one concrete action for {$advocateRole} to take next>"
+}
 PROMPT;
        } else {
            $prompt = <<<PROMPT
@@ -1074,8 +1092,9 @@ Return JSON only in {$locale}:

 Rules:
 - Every factual claim in `brief_markdown` must end with one or more `[n]` markers.
+- A `[n]` citation is only valid when the excerpt for source [n] explicitly states or directly implies the claim — do not cite a source merely because it is on the same topic.
 - If no source supports a point, omit the point — DO NOT speculate.
- Prefer pinpointing statute sections (e.g. "Barneloven §43") and case names verbatim from the source excerpts.
+- Copy statute section numbers (e.g. §43, §4-12) and ECHR case citations verbatim from the excerpt text — never rephrase or infer a section number that does not appear in an excerpt.
 - When multiple sources support the same point, cite all of them (e.g. `[2,4]`).
 - Respond in {$locale}.
 - Output valid JSON only — no markdown fences around the JSON object itself.
@@ -1083,10 +1102,11 @@ PROMPT;
        }

        $messages = [
-            ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
+            ['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences. Every legal claim must be supported by a source from the numbered list. Do not invent statute sections, case names, paragraph numbers, or dates. If no source supports a point, omit it entirely.'],
            ['role' => 'user',   'content' => $prompt],
        ];
-        $opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3200, 'timeout' => 180];
+        $synthTemp = ($advocateRole !== '') ? min($temperature, 0.20) : $temperature;
+        $opts = ['json' => true, 'temperature' => $synthTemp, 'max_tokens' => 4000, 'timeout' => 180];

        try {
            if ($engine === 'dbn_legal') {
@@ -48,6 +48,43 @@ require_once __DIR__ . '/includes/layout.php';
                        <p class="upload-hint" data-i18n="vocabHint">Helps Whisper recognise technical terms. Not included in the transcript.</p>
                    </div>

+                    <details id="advancedOptions" class="expert-field">
+                        <summary data-i18n="advancedOptions">Advanced options</summary>
+
+                        <div class="control-row" id="taskControl">
+                            <span class="control-label" data-i18n="task">Task</span>
+                            <label><input type="radio" name="task" value="transcribe" checked> <span data-i18n="taskTranscribe">Transcribe</span></label>
+                            <label><input type="radio" name="task" value="translate"> <span data-i18n="taskTranslate">Translate to English</span></label>
+                        </div>
+
+                        <div class="control-row">
+                            <span class="control-label" data-i18n="vadFilter">VAD filter</span>
+                            <label><input type="checkbox" id="vadFilterCheck" name="vad_filter"> <span data-i18n="vadFilterLabel">Remove silence / noise</span></label>
+                            <small class="control-hint" data-i18n="vadFilterHint">Improves accuracy on recordings with long pauses.</small>
+                        </div>
+
+                        <div class="control-row" id="whisperModelControl">
+                            <span class="control-label" data-i18n="whisperModel">Whisper model</span>
+                            <select id="whisperModelSelect" name="whisper_model">
+                                <option value="large-v3" selected>large-v3 (best)</option>
+                                <option value="large-v2">large-v2</option>
+                                <option value="medium">medium (faster)</option>
+                                <option value="small">small</option>
+                                <option value="base">base</option>
+                                <option value="tiny">tiny</option>
+                            </select>
+                            <small class="control-hint" data-i18n="whisperModelHint">Used when Azure/GCP unavailable. large-v3 is the default.</small>
+                        </div>
+
+                        <div class="control-row" id="postModelControl">
+                            <span class="control-label" data-i18n="postModel">AI cleanup</span>
+                            <label><input type="radio" name="post_model" value="" checked> <span data-i18n="postModelNone">None</span></label>
+                            <label><input type="radio" name="post_model" value="gpt-4o-mini"> <span data-i18n="postModelMini">GPT-4o Mini</span></label>
+                            <label><input type="radio" name="post_model" value="gpt-4o"> <span data-i18n="postModelFull">GPT-4o</span></label>
+                            <small class="control-hint" data-i18n="postModelHint">Fixes errors, punctuation, and domain terms after transcription.</small>
+                        </div>
+                    </details>
+
                    <div class="upload-zone" id="audioZone" role="region" aria-label="Audio upload" data-i18n-aria="uploadAria">
                        <input type="file" id="audioInput" accept="audio/*,video/mp4,video/webm" multiple aria-label="Choose audio files">
                        <div id="audioPrompt" class="upload-prompt">