feat(transcribe): GPT cleanup pass + advanced options i18n
Adds optional post-transcription cleanup via GPT-4o/GPT-4o-mini to fix mishearing errors, punctuation, and domain terms. Speaker role labelling now accepts a deployment param. Adds i18n strings for advanced options panel (task, VAD filter, Whisper model, AI cleanup) in all four languages. Updates BvjAnalyzerAgent and DeepResearchAgent. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+57
-11
@@ -27,6 +27,11 @@ $task = ($_POST['task'] ?? 'transcribe') === 'translate' ? 'translate' :
|
||||
$vadFilter = !empty($_POST['vad_filter']) && $_POST['vad_filter'] !== '0';
|
||||
$initPrompt = substr(trim((string)($_POST['initial_prompt'] ?? '')), 0, 500);
|
||||
|
||||
$allowedPostModels = ['', 'gpt-4o-mini', 'gpt-4o'];
|
||||
$postModel = in_array($_POST['post_model'] ?? '', $allowedPostModels, true)
|
||||
? (string)($_POST['post_model'] ?? '')
|
||||
: '';
|
||||
|
||||
// ── Validate upload ───────────────────────────────────────────────────────────
|
||||
|
||||
if (empty($_FILES['audio']) || $_FILES['audio']['error'] !== UPLOAD_ERR_OK) {
|
||||
@@ -114,6 +119,17 @@ if ($timeOffset > 0.0 && !empty($result['segments'])) {
|
||||
unset($seg);
|
||||
}
|
||||
|
||||
// ── Optional GPT cleanup pass ─────────────────────────────────────────────────
|
||||
|
||||
$cleanedBy = null;
|
||||
if ($postModel !== '' && !empty($result['text'])) {
|
||||
$cleaned = dbnCleanupTranscript($result['text'], $language, $initPrompt, $postModel);
|
||||
if ($cleaned !== null) {
|
||||
$result['text'] = $cleaned;
|
||||
$cleanedBy = $postModel;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Speaker role labelling (diarize + multiple speakers only) ─────────────────
|
||||
|
||||
$segments = $result['segments'] ?? [];
|
||||
@@ -126,7 +142,8 @@ if ($numDetected < 2 && $segments) {
|
||||
|
||||
$speakerRoles = null;
|
||||
if ($diarize && $numDetected > 1 && $segments) {
|
||||
$speakerRoles = dbnLabelSpeakerRoles($segments);
|
||||
$labelDeployment = $postModel ?: 'gpt-4o-mini';
|
||||
$speakerRoles = dbnLabelSpeakerRoles($segments, $labelDeployment);
|
||||
}
|
||||
|
||||
// ── Friendly engine label ─────────────────────────────────────────────────────
|
||||
@@ -161,6 +178,7 @@ dbnToolsRespond([
|
||||
'model' => $engineLabel,
|
||||
'engine' => $engineUsed,
|
||||
'latency_ms' => $latencyMs,
|
||||
'cleaned_by' => $cleanedBy,
|
||||
]);
|
||||
|
||||
|
||||
@@ -313,7 +331,7 @@ function transcribeViaWhisperGpu(array $file, string $language, bool $diarize, i
|
||||
}
|
||||
|
||||
|
||||
function dbnLabelSpeakerRoles(array $segments): array
|
||||
function dbnLabelSpeakerRoles(array $segments, string $deployment = 'gpt-4o-mini'): array
|
||||
{
|
||||
$sample = array_slice(
|
||||
array_values(array_filter($segments, fn($s) => isset($s['speaker']))),
|
||||
@@ -322,16 +340,11 @@ function dbnLabelSpeakerRoles(array $segments): array
|
||||
if (!$sample) return [];
|
||||
|
||||
$lines = array_map(fn($s) => "[{$s['speaker']}] " . trim((string)($s['text'] ?? '')), $sample);
|
||||
$azure = new DbnAzureOpenAiGateway();
|
||||
$system = 'You are analyzing a legal proceeding transcript. '
|
||||
. 'Based on the first segments, identify the role of each speaker. '
|
||||
. 'Common roles in Norwegian legal proceedings: dommer (judge), advokat (lawyer), '
|
||||
. 'forelder (parent), barn (child), sakkyndig (expert witness), '
|
||||
. 'saksbehandler (caseworker), tolk (interpreter), vitne (witness), '
|
||||
. 'prosessfullmektig (counsel). '
|
||||
$azure = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
|
||||
$system = 'Label speakers in this Norwegian legal transcript. '
|
||||
. 'Return ONLY valid JSON: {"SPEAKER_00":"dommer","SPEAKER_01":"forelder"}. '
|
||||
. 'Use Norwegian role names. Use "ukjent" if role cannot be determined. '
|
||||
. 'Only include speakers present in the input.';
|
||||
. 'Norwegian role names only — dommer, advokat, forelder, barn, sakkyndig, '
|
||||
. 'saksbehandler, tolk, vitne, prosessfullmektig. Use "ukjent" if unclear.';
|
||||
|
||||
try {
|
||||
$text = $azure->chatText([
|
||||
@@ -345,3 +358,36 @@ function dbnLabelSpeakerRoles(array $segments): array
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function dbnCleanupTranscript(string $text, string $language, string $vocabulary, string $deployment): ?string
|
||||
{
|
||||
$langName = match($language) {
|
||||
'no', 'nb', 'nn' => 'Norwegian',
|
||||
'en' => 'English',
|
||||
'pl' => 'Polish',
|
||||
'uk' => 'Ukrainian',
|
||||
'sv' => 'Swedish',
|
||||
'da' => 'Danish',
|
||||
'de' => 'German',
|
||||
'fr' => 'French',
|
||||
default => 'Norwegian',
|
||||
};
|
||||
$vocabHint = $vocabulary !== '' ? " Domain terms to preserve correctly: {$vocabulary}." : '';
|
||||
$system = "Fix transcription errors in this {$langName} text.{$vocabHint} "
|
||||
. "Correct mishearing errors, run-on sentences, and punctuation. "
|
||||
. "Preserve all meaning and the original language exactly. "
|
||||
. "Return only the corrected transcript text, no commentary.";
|
||||
|
||||
try {
|
||||
$azure = (new DbnAzureOpenAiGateway())->withDeployment($deployment);
|
||||
$result = $azure->chatText(
|
||||
[['role' => 'system', 'content' => $system],
|
||||
['role' => 'user', 'content' => $text]],
|
||||
['temperature' => 0.1, 'max_tokens' => 4096]
|
||||
);
|
||||
return ($result !== '' && $result !== null) ? $result : null;
|
||||
} catch (Throwable) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -441,6 +441,20 @@ const TRANSCRIBE_I18N = {
|
||||
traceProcessingDetail: () => 'Processing audio. Large files may take 1–3 minutes.',
|
||||
traceStillLabel: (clip) => `${clip} — still processing…`,
|
||||
traceStillDetail: (e) => { const m = Math.floor(e / 60), s = e % 60; return m > 0 ? `${m}m ${s}s elapsed — working through the audio.` : `${e}s elapsed — processing.`; },
|
||||
advancedOptions: 'Advanced options',
|
||||
task: 'Task',
|
||||
taskTranscribe: 'Transcribe',
|
||||
taskTranslate: 'Translate to English',
|
||||
vadFilter: 'VAD filter',
|
||||
vadFilterLabel: 'Remove silence / noise',
|
||||
vadFilterHint: 'Improves accuracy on recordings with long pauses.',
|
||||
whisperModel: 'Whisper model',
|
||||
whisperModelHint: 'Used when Azure/GCP unavailable. large-v3 is the default.',
|
||||
postModel: 'AI cleanup',
|
||||
postModelNone: 'None',
|
||||
postModelMini: 'GPT-4o Mini',
|
||||
postModelFull: 'GPT-4o',
|
||||
postModelHint: 'Fixes errors, punctuation, and domain terms after transcription.',
|
||||
},
|
||||
no: {
|
||||
transcribeLang: 'Språk i lydfil',
|
||||
@@ -481,6 +495,20 @@ const TRANSCRIBE_I18N = {
|
||||
traceProcessingLabel: (clip) => `${clip} — transkriberer`,
|
||||
traceProcessingDetail: () => 'Behandler lyden. Store filer tar 1–3 minutter.', traceStillLabel: (clip) => `${clip} — behandler fortsatt…`,
|
||||
traceStillDetail: (e) => { const m = Math.floor(e / 60), s = e % 60; return m > 0 ? `${m} min ${s}s gått — jobber gjennom lyden.` : `${e}s gått — behandler.`; },
|
||||
advancedOptions: 'Avanserte valg',
|
||||
task: 'Oppgave',
|
||||
taskTranscribe: 'Transkriber',
|
||||
taskTranslate: 'Oversett til engelsk',
|
||||
vadFilter: 'VAD-filter',
|
||||
vadFilterLabel: 'Fjern stillhet / støy',
|
||||
vadFilterHint: 'Forbedrer nøyaktigheten ved opptak med lange pauser.',
|
||||
whisperModel: 'Whisper-modell',
|
||||
whisperModelHint: 'Brukes når Azure/GCP ikke er tilgjengelig. large-v3 er standard.',
|
||||
postModel: 'AI-opprydding',
|
||||
postModelNone: 'Ingen',
|
||||
postModelMini: 'GPT-4o Mini',
|
||||
postModelFull: 'GPT-4o',
|
||||
postModelHint: 'Retter feil, tegnsetting og fagtermer etter transkripsjon.',
|
||||
},
|
||||
uk: {
|
||||
transcribeLang: 'Мова аудіо',
|
||||
@@ -521,6 +549,20 @@ const TRANSCRIBE_I18N = {
|
||||
traceProcessingLabel: (clip) => `${clip} — транскрибування`,
|
||||
traceProcessingDetail: () => 'Обробка аудіо. Великі файли займають 1–3 хвилини.', traceStillLabel: (clip) => `${clip} — ще обробляється…`,
|
||||
traceStillDetail: (e) => { const m = Math.floor(e / 60), s = e % 60; return m > 0 ? `Минуло ${m} хв ${s} с — обробка.` : `Минуло ${e} с — обробка.`; },
|
||||
advancedOptions: 'Розширені параметри',
|
||||
task: 'Завдання',
|
||||
taskTranscribe: 'Транскрибувати',
|
||||
taskTranslate: 'Перекласти на англійську',
|
||||
vadFilter: 'VAD-фільтр',
|
||||
vadFilterLabel: 'Видалити тишу / шум',
|
||||
vadFilterHint: 'Покращує точність для записів з довгими паузами.',
|
||||
whisperModel: 'Модель Whisper',
|
||||
whisperModelHint: 'Використовується, якщо Azure/GCP недоступні. large-v3 за замовчуванням.',
|
||||
postModel: 'AI-очищення',
|
||||
postModelNone: 'Без',
|
||||
postModelMini: 'GPT-4o Mini',
|
||||
postModelFull: 'GPT-4o',
|
||||
postModelHint: 'Виправляє помилки, пунктуацію та терміни після транскрипції.',
|
||||
},
|
||||
pl: {
|
||||
transcribeLang: 'Język audio',
|
||||
@@ -561,6 +603,20 @@ const TRANSCRIBE_I18N = {
|
||||
traceProcessingLabel: (clip) => `${clip} — transkrybowanie`,
|
||||
traceProcessingDetail: () => 'Przetwarzanie audio. Duże pliki zajmują 1–3 minuty.', traceStillLabel: (clip) => `${clip} — nadal przetwarza…`,
|
||||
traceStillDetail: (e) => { const m = Math.floor(e / 60), s = e % 60; return m > 0 ? `Minęło ${m} min ${s} s — przetwarzanie audio.` : `Minęło ${e} s — przetwarzanie.`; },
|
||||
advancedOptions: 'Opcje zaawansowane',
|
||||
task: 'Zadanie',
|
||||
taskTranscribe: 'Transkrypcja',
|
||||
taskTranslate: 'Przetłumacz na angielski',
|
||||
vadFilter: 'Filtr VAD',
|
||||
vadFilterLabel: 'Usuń ciszę / szum',
|
||||
vadFilterHint: 'Poprawia dokładność nagrań z długimi przerwami.',
|
||||
whisperModel: 'Model Whisper',
|
||||
whisperModelHint: 'Używany gdy Azure/GCP niedostępne. large-v3 jest domyślny.',
|
||||
postModel: 'Korekta AI',
|
||||
postModelNone: 'Brak',
|
||||
postModelMini: 'GPT-4o Mini',
|
||||
postModelFull: 'GPT-4o',
|
||||
postModelHint: 'Poprawia błędy, interpunkcję i terminy po transkrypcji.',
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1515,6 +1571,10 @@ async function runTranscribe() {
|
||||
formData.append('time_offset', String(cumulativeOffset));
|
||||
if (vadFilter) formData.append('vad_filter', '1');
|
||||
if (initPrompt) formData.append('initial_prompt', initPrompt);
|
||||
const whisperModel = document.getElementById('whisperModelSelect')?.value;
|
||||
if (whisperModel) formData.append('model', whisperModel);
|
||||
const postModel = document.querySelector('input[name="post_model"]:checked')?.value;
|
||||
if (postModel) formData.append('post_model', postModel);
|
||||
if (diarize) {
|
||||
formData.append('diarize', '1');
|
||||
if (numSpeakers >= 2) formData.append('num_speakers', String(numSpeakers));
|
||||
@@ -1650,6 +1710,7 @@ function renderTranscriptResults(data) {
|
||||
if (data.language) traceMeta.push({ label: `Language: ${data.language}`, detail: '', status: 'complete' });
|
||||
if (data.num_speakers > 1) traceMeta.push({ label: `Speakers detected: ${data.num_speakers}`, detail: Object.entries(speakerRoles).map(([id, r]) => `${id}: ${r}`).join(', ') || '', status: 'complete' });
|
||||
if (data.model) traceMeta.push({ label: data.model, detail: '', status: 'complete' });
|
||||
if (data.cleaned_by) traceMeta.push({ label: `Cleaned by ${data.cleaned_by}`, detail: '', status: 'complete' });
|
||||
renderTrace(traceMeta.length ? traceMeta : [{ label: 'Transcribed', detail: '', status: 'complete' }]);
|
||||
}
|
||||
|
||||
|
||||
+128
-59
@@ -493,7 +493,7 @@ PROMPT;
|
||||
private function extractParties(string $docText, string $language): array
|
||||
{
|
||||
$locale = dbnToolsLanguageName($language);
|
||||
$excerpt = mb_substr($docText, 0, 12000, 'UTF-8');
|
||||
$excerpt = mb_substr($docText, 0, 20000, 'UTF-8');
|
||||
|
||||
$prompt = <<<PROMPT
|
||||
You are analysing a Norwegian child welfare (Barnevernet) document.
|
||||
@@ -502,15 +502,16 @@ Identify ALL named parties — every person or institution referred to by name o
|
||||
Respond in {$locale}. Return a JSON object with a single key "parties" containing an array of objects.
|
||||
Each object must have these four fields:
|
||||
- "name": full name or institution name (string)
|
||||
- "role": their role in the case, e.g. Biological mother, Child, Barnevernarbeider, Saksbehandler, Melder, Politi, Lege, Advokat, Foster carer, Rusklinikk
|
||||
- "role": their role in the case, e.g. Biological mother, Biological father, Child, Barnevernarbeider, Saksbehandler, Leder, Melder, Politi, Lege, Psykolog, Advokat, Talsperson for barnet, Tilsynsfører, Sakkyndig, Foster carer (fosterforelder), Rusklinikk, Statsforvalter
|
||||
- "organization": employer or institution if mentioned, otherwise null
|
||||
- "relationship_to_child": relationship to the child in the document, e.g. Mother, Father, Caseworker, Melder, or null
|
||||
- "relationship_to_child": relationship to the child in the document, e.g. Mother, Father, Sibling, Caseworker, Melder, Supervisor, or null
|
||||
|
||||
Rules:
|
||||
- Include every named person and named institution — even peripheral ones.
|
||||
- Include Barnevernvakta (bvv) as an institution even if no individual caseworkers are named.
|
||||
- If a name appears to be redacted or anonymised (e.g. "mor", "far", "barnet", initials like "A.B."), include them with role inferred from context.
|
||||
- Do not invent parties not present in the text.
|
||||
- Maximum 20 parties.
|
||||
- Maximum 25 parties.
|
||||
|
||||
Document text:
|
||||
{$excerpt}
|
||||
@@ -520,14 +521,14 @@ PROMPT;
|
||||
$raw = $this->azure->chatText([
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 1500, 'timeout' => 40]);
|
||||
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 2000, 'timeout' => 45]);
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
if (is_array($json) && is_array($json['parties'] ?? null)) {
|
||||
return array_slice($json['parties'], 0, 20);
|
||||
return array_slice($json['parties'], 0, 25);
|
||||
}
|
||||
// Fallback: model returned an array at root level instead of {parties:[...]}
|
||||
if (is_array($json) && isset($json[0]['name'])) {
|
||||
return array_slice($json, 0, 20);
|
||||
return array_slice($json, 0, 25);
|
||||
}
|
||||
error_log('BVJ extractParties unexpected structure: ' . substr($raw, 0, 300));
|
||||
} catch (Throwable $e) {
|
||||
@@ -541,7 +542,7 @@ PROMPT;
|
||||
private function extractTimeline(string $docText, string $language): array
|
||||
{
|
||||
$locale = dbnToolsLanguageName($language);
|
||||
$excerpt = mb_substr($docText, 0, 12000, 'UTF-8');
|
||||
$excerpt = mb_substr($docText, 0, 20000, 'UTF-8');
|
||||
|
||||
$prompt = <<<PROMPT
|
||||
Build a chronological timeline from this Norwegian child welfare (Barnevernet) document in {$locale}.
|
||||
@@ -557,14 +558,24 @@ IMPORTANT — Norwegian date and time formats to recognise:
|
||||
- Diary/log format: lines beginning with a date or time are always events.
|
||||
- Two-digit years: interpret as 20YY (20 → 2020, 21 → 2021).
|
||||
|
||||
Barnevernet-specific events that are ALWAYS high significance:
|
||||
- Akuttvedtak (emergency placement) under §4-6 or §4-25
|
||||
- Omsorgsovertakelse (care order) under §4-12
|
||||
- Police involvement or assistance (politibistand)
|
||||
- Formal decision (vedtak) or court order (kjennelse)
|
||||
- Deadline breaches: bekymringsmelding not processed within 7 days; investigation not opened within 6 weeks
|
||||
- Forhandlingsmøte (negotiation hearing) or Fylkesnemnda hearing
|
||||
- Supervised contact visits (samvær) being reduced or denied
|
||||
- Placement in foster care or institution (fosterhjem, institusjon)
|
||||
|
||||
For each event provide:
|
||||
- "date": ISO 8601 date (YYYY-MM-DD) if determinable, otherwise best-effort description
|
||||
- "time_of_day": HH:MM if present, otherwise null
|
||||
- "actor": person, institution, or party involved
|
||||
- "action": concise description (≤ 80 chars) of what happened
|
||||
- "significance": high (acute measure, removal, police involvement, formal decision) | medium (home visit, phone call, meeting) | low (minor update, note)
|
||||
- "significance": high (acute measure, removal, police involvement, formal decision, statutory deadline breach) | medium (home visit, phone call, meeting, assessment) | low (minor update, note)
|
||||
|
||||
Sort chronologically. Maximum 30 events.
|
||||
Sort chronologically. Maximum 40 events.
|
||||
|
||||
Document text:
|
||||
{$excerpt}
|
||||
@@ -579,10 +590,10 @@ PROMPT;
|
||||
$raw = $this->azure->chatText([
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 3000, 'timeout' => 45]);
|
||||
], ['json' => true, 'temperature' => 0.05, 'max_tokens' => 4000, 'timeout' => 55]);
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
if (is_array($json) && is_array($json['events'] ?? null)) {
|
||||
return array_slice($json['events'], 0, 30);
|
||||
return array_slice($json['events'], 0, 40);
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
error_log('BVJ extractTimeline failed: ' . $e->getMessage());
|
||||
@@ -602,50 +613,82 @@ PROMPT;
|
||||
): array {
|
||||
$locale = dbnToolsLanguageName($language);
|
||||
$docType = $docMeta['doc_type'] ?? 'BVJ document';
|
||||
$docDate = $docMeta['doc_date'] ?? 'unknown date';
|
||||
$authority = $docMeta['issuing_authority'] ?? 'the municipality';
|
||||
$roleStr = $advocateRole !== '' ? $advocateRole : 'the affected party';
|
||||
|
||||
// Summarise the top events to give the model context
|
||||
// Summarise high-significance events first, then others
|
||||
$highEvents = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') === 'high'));
|
||||
$otherEvents = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') !== 'high'));
|
||||
$topEvents = array_slice(array_merge($highEvents, $otherEvents), 0, 12);
|
||||
$eventSummary = '';
|
||||
$highEvents = array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') === 'high');
|
||||
$topEvents = array_slice(array_merge(array_values($highEvents),
|
||||
array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') !== 'high'))), 0, 8);
|
||||
foreach ($topEvents as $ev) {
|
||||
$eventSummary .= sprintf("- %s: %s (%s)\n", $ev['date'] ?? '?', $ev['action'] ?? '', $ev['actor'] ?? '');
|
||||
$sig = ($ev['significance'] ?? 'low') === 'high' ? '[HIGH] ' : '';
|
||||
$eventSummary .= sprintf("- %s %s%s (%s)\n",
|
||||
$ev['date'] ?? '?', $sig, $ev['action'] ?? '', $ev['actor'] ?? '');
|
||||
}
|
||||
|
||||
// Summarise parties
|
||||
$partyList = '';
|
||||
foreach (array_slice($parties, 0, 8) as $p) {
|
||||
$partyList .= sprintf("- %s (%s)\n", $p['name'] ?? '', $p['role'] ?? '');
|
||||
foreach (array_slice($parties, 0, 10) as $p) {
|
||||
$org = !empty($p['organization']) ? ' at ' . $p['organization'] : '';
|
||||
$partyList .= sprintf("- %s (%s%s)\n", $p['name'] ?? '?', $p['role'] ?? '?', $org);
|
||||
}
|
||||
|
||||
$angleGuidance = match (true) {
|
||||
$count >= 5 => <<<ANGLES
|
||||
Cover these five distinct legal angles (one per question):
|
||||
1. Statutory rights and obligations under Barnevernloven (e.g. §4-2, §4-6, §4-12) specific to the measures taken
|
||||
2. ECHR Article 8 proportionality and procedural safeguards — cite the specific measures and dates from this case
|
||||
3. Procedural obligations BVV must fulfil (advance notice, documentation, hearing rights) — anchor to documented events
|
||||
4. Bufdir/Statsforvalter guidance on investigation standards and thresholds for intervention
|
||||
5. Norwegian appellate court decisions on comparable measures and family circumstances
|
||||
ANGLES,
|
||||
$count === 4 => <<<ANGLES
|
||||
Cover these four distinct legal angles (one per question):
|
||||
1. Statutory rights under Barnevernloven anchored to the specific measures and dates in this case
|
||||
2. ECHR Article 8 — proportionality of the specific intervention and any procedural violations
|
||||
3. BVV's procedural obligations — documentation, notice, and hearing rights — as evidenced by the timeline
|
||||
4. Bufdir guidance and Norwegian court decisions on comparable fact patterns
|
||||
ANGLES,
|
||||
default => <<<ANGLES
|
||||
Cover three distinct legal angles (one per question):
|
||||
1. Statutory rights under Barnevernloven for the specific type of measure documented
|
||||
2. ECHR Article 8 proportionality and procedural safeguards
|
||||
3. BVV's procedural obligations and whether the documented timeline shows any breach
|
||||
ANGLES,
|
||||
};
|
||||
|
||||
$prompt = <<<PROMPT
|
||||
You are a Norwegian family-law research assistant building a case for: {$roleStr}.
|
||||
|
||||
A {$docType} has been uploaded. Key events:
|
||||
Case facts extracted from the uploaded document:
|
||||
- Document type: {$docType}
|
||||
- Date: {$docDate}
|
||||
- Issuing authority: {$authority}
|
||||
- Key events (chronological):
|
||||
{$eventSummary}
|
||||
Key parties:
|
||||
- Key parties:
|
||||
{$partyList}
|
||||
|
||||
Generate exactly {$count} targeted sub-questions to research the legal corpus for arguments that SUPPORT {$roleStr}'s position. Each question should explore a different angle:
|
||||
1. Statutory rights and obligations (Barnevernloven, Barneloven)
|
||||
2. ECHR Article 8 and 9 precedents vs Norway
|
||||
3. Procedural requirements BVV must follow (notice, documentation, proportionality)
|
||||
4. Bufdir guidance on case handling standards
|
||||
5. Norwegian court decisions on similar fact patterns
|
||||
Generate exactly {$count} sub-questions to search the Norwegian legal corpus for arguments that SUPPORT {$roleStr}'s position.
|
||||
|
||||
{$angleGuidance}
|
||||
|
||||
CRITICAL: Every question MUST embed specific facts from this case — use the actual authority name, document date, type of measure, and parties where relevant. Generic questions ("What are parental rights?") are useless for retrieval. Specific questions ("What notice requirements must {$authority} meet before issuing an emergency placement under Barnevernloven §4-6?") are highly effective.
|
||||
|
||||
Return JSON only in {$locale}:
|
||||
{
|
||||
"sub_questions": [
|
||||
{"id":"q1","question":"...","rationale":"how this angle strengthens {$roleStr}'s position (≤ 120 chars)"}
|
||||
{"id":"q1","question":"...","rationale":"why this angle strengthens {$roleStr}'s position (≤ 120 chars)"}
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Exactly {$count} sub-questions, no more no fewer.
|
||||
- Every question must be answerable from Norwegian family-law, child-welfare, or ECHR sources.
|
||||
- Each question must cover a DIFFERENT legal angle.
|
||||
- Questions must be self-contained without needing the raw document.
|
||||
- Exactly {$count} sub-questions.
|
||||
- Each question targets a DIFFERENT legal angle.
|
||||
- Include specific case details (authority, date, measure type) in each question.
|
||||
- Questions must be self-contained and answerable from Norwegian family-law, child-welfare, or ECHR sources.
|
||||
- Respond in {$locale}.
|
||||
PROMPT;
|
||||
|
||||
@@ -734,16 +777,16 @@ PROMPT;
|
||||
|
||||
// Build parties summary (top 8)
|
||||
$partiesSummary = '';
|
||||
foreach (array_slice($parties, 0, 8) as $i => $p) {
|
||||
foreach (array_slice($parties, 0, 12) as $i => $p) {
|
||||
$org = $p['organization'] ? ' (' . $p['organization'] . ')' : '';
|
||||
$rel = $p['relationship_to_child'] ? ' — rel: ' . $p['relationship_to_child'] : '';
|
||||
$partiesSummary .= sprintf("%d. %s — %s%s%s\n", $i + 1, $p['name'] ?? '', $p['role'] ?? '', $org, $rel);
|
||||
}
|
||||
|
||||
// Build timeline summary (top 15 most significant events)
|
||||
// Build timeline summary (top 20 most significant events)
|
||||
$highEvents = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') === 'high'));
|
||||
$otherEvents = array_values(array_filter($timelineEvents, fn($e) => ($e['significance'] ?? '') !== 'high'));
|
||||
$topEvents = array_slice(array_merge($highEvents, $otherEvents), 0, 15);
|
||||
$topEvents = array_slice(array_merge($highEvents, $otherEvents), 0, 20);
|
||||
$timelineSummary = '';
|
||||
foreach ($topEvents as $ev) {
|
||||
$time = $ev['time_of_day'] ? ' kl.' . $ev['time_of_day'] : '';
|
||||
@@ -783,14 +826,17 @@ PROMPT;
|
||||
? "\n== ADDITIONAL CONTEXT FROM ADVOCATE ==\n{$additionalNotes}\n"
|
||||
: '';
|
||||
|
||||
$docExcerpt = mb_substr($docText, 0, 3000, 'UTF-8');
|
||||
$docExcerpt = mb_substr($docText, 0, 8000, 'UTF-8');
|
||||
|
||||
$prompt = <<<PROMPT
|
||||
You are Do Better Norge Legal Tools producing a structured Barnevernet case analysis brief.
|
||||
You are representing: {$roleStr}
|
||||
You are Do Better Norge Legal Tools. Produce a structured Barnevernet case analysis for: {$roleStr}.
|
||||
|
||||
HALLUCINATION RULES — READ FIRST:
|
||||
- You may ONLY cite statute sections (§), ECHR article numbers, ECHR application numbers, case names, and Bufdir/Statsforvalter circular references that appear verbatim in the numbered corpus sources below.
|
||||
- Do NOT cite statute sections, case names, or ECHR applications from your training memory — they may be misremembered or no longer in force.
|
||||
- If no source supports a claim, omit the claim rather than invent support.
|
||||
- Every factual legal claim in advocacy_brief MUST end with at least one [n] or [DOC] citation. Unsupported claims are a liability for the client.
|
||||
|
||||
Ground every claim in the numbered corpus sources below using [n] markers, OR in the uploaded document using [DOC].
|
||||
Do NOT invent statutes, paragraph numbers, case names, ECHR applications, dates, or parties.
|
||||
Return valid JSON only. No markdown fences.
|
||||
|
||||
== DOCUMENT METADATA ==
|
||||
@@ -805,51 +851,74 @@ Child: {$childInfo}
|
||||
== TIMELINE (from document) ==
|
||||
{$timelineSummary}
|
||||
|
||||
== CORPUS SOURCES ({$sourceCount} numbered) ==
|
||||
== CORPUS SOURCES ({$sourceCount} numbered — cite as [n]) ==
|
||||
{$sourcesText}
|
||||
{$notesSection}
|
||||
{$subQText}
|
||||
|
||||
== DOCUMENT EXCERPT (first 3000 chars — use [DOC] to cite) ==
|
||||
== DOCUMENT EXCERPT (first 8000 chars — cite as [DOC]) ==
|
||||
{$docExcerpt}
|
||||
|
||||
Return JSON in {$locale}:
|
||||
== ADVOCACY BRIEF FORMAT ==
|
||||
Write the advocacy_brief as a Markdown document with these sections:
|
||||
|
||||
## Case Overview
|
||||
Summarise what happened: document type, issuing authority, key events from the timeline. Every factual statement must cite [DOC].
|
||||
|
||||
## {$roleStr}'s Core Legal Position
|
||||
The strongest statutory and ECHR arguments in favour of {$roleStr}. Cite [n] for each legal point. Only cite statutes and cases that appear in the corpus sources above.
|
||||
|
||||
## Procedural Compliance Issues
|
||||
Where BVV/the authority may have failed their own procedural obligations. Ground each point in a specific documented action from [DOC] and the applicable statute or guidance from [n].
|
||||
|
||||
## Client Strengths
|
||||
3-6 factual and legal advantages for {$roleStr}, each anchored with [n] or [DOC].
|
||||
|
||||
## Counter-Arguments and Responses
|
||||
The most likely opposing arguments and how to rebut them. Cite [n] for rebuttal sources.
|
||||
|
||||
## Recommended Next Steps
|
||||
2-4 concrete legal actions {$roleStr} should take now.
|
||||
|
||||
End with one line: "*This brief is AI-assisted and for discussion purposes only — verify all legal references with a qualified Norwegian family-law lawyer.*"
|
||||
|
||||
Target length: 600-1000 words.
|
||||
|
||||
== JSON OUTPUT ==
|
||||
{
|
||||
"advocacy_brief": "Partisan legal brief in Markdown. Structure:\n## Case Overview\n(What happened according to [DOC] — doc type, authority, key events)\n\n## {$roleStr}'s Core Legal Position\n(Strongest statutory and ECHR arguments — cite [n] and [DOC])\n\n## Procedural Compliance Issues\n(Where BVV may have failed their own procedural obligations — cite [DOC][n])\n\n## Client Strengths\n(Factual and legal advantages for {$roleStr} — cite [n][DOC])\n\n## Counter-Arguments and Responses\n(Likely opposing arguments and how to rebut — cite [n])\n\n## Recommended Next Steps\n(Concrete legal actions)\n\nEnd with a one-line disclaimer. Length: 500-1000 words.",
|
||||
"advocacy_brief": "<the Markdown brief following the format above>",
|
||||
|
||||
"procedural_red_flags": [
|
||||
{
|
||||
"description": "Concise description of the potential procedural violation",
|
||||
"legal_basis": "Statute or ECHR article potentially violated, e.g. Barnevernloven §6-1, ECHR Art.8",
|
||||
"severity": "high",
|
||||
"legal_basis": "Statute or ECHR article from a corpus source — e.g. Barnevernloven §4-2 [3]",
|
||||
"severity": "high|medium|low",
|
||||
"source_refs": ["[n]", "[DOC]"],
|
||||
"what_to_check": "Specific document text or action requiring legal verification"
|
||||
"what_to_check": "Exact document text or action to verify with a lawyer"
|
||||
}
|
||||
],
|
||||
|
||||
"client_strengths": ["3-6 items anchored with [n] or [DOC]"],
|
||||
"opposing_weaknesses": ["2-5 vulnerabilities in BVV or opposing party position — omit if unsupported by sources"],
|
||||
"what_we_found": "2-sentence plain-language summary of the most critical finding",
|
||||
"what_remains_uncertain": ["3-5 specific gaps — missing information, unclear authority, conflicting sources"],
|
||||
"next_practical_step": "The single most important concrete legal action for {$roleStr}"
|
||||
"client_strengths": ["3-6 items, each ending with [n] or [DOC]"],
|
||||
"opposing_weaknesses": ["2-5 documented vulnerabilities in BVV or opposing position — OMIT if not supported by at least one [n]"],
|
||||
"what_we_found": "2-sentence plain-language summary of the single most critical finding",
|
||||
"what_remains_uncertain": ["3-5 specific information gaps or legal questions that need clarification"],
|
||||
"next_practical_step": "The single most important concrete legal action for {$roleStr} to take within the next 7 days"
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Every factual claim in advocacy_brief must end with [n] or [DOC].
|
||||
- procedural_red_flags must be grounded in documented BVV actions — no speculation.
|
||||
- severity: high = likely violation of a codified right; medium = procedural irregularity; low = best-practice gap.
|
||||
- If no corpus source supports a claimed weakness, omit it from opposing_weaknesses.
|
||||
- Cite statute sections and ECHR articles as they appear in the corpus excerpts.
|
||||
- severity: high = likely violation of a codified statutory right or ECHR guarantee; medium = procedural irregularity; low = best-practice gap only.
|
||||
- procedural_red_flags must be grounded in documented BVV actions visible in [DOC] or the timeline.
|
||||
- If fewer than 2 corpus sources support opposing_weaknesses, return an empty array.
|
||||
- Respond in {$locale}.
|
||||
PROMPT;
|
||||
|
||||
$sysPrompt = 'You return valid JSON only. No markdown fences.';
|
||||
$sysPrompt = 'You return valid JSON only. No markdown fences. Every legal citation must come from the provided corpus sources, not from training memory.';
|
||||
|
||||
$messages = [
|
||||
['role' => 'system', 'content' => $sysPrompt],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
];
|
||||
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3000, 'timeout' => 200];
|
||||
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 4500, 'timeout' => 240];
|
||||
|
||||
$deployLabel = match ($engine) {
|
||||
'gpu' => 'GPU (cuttlefish)',
|
||||
|
||||
@@ -91,7 +91,7 @@ final class DbnDeepResearchAgent
|
||||
// STEP 2: Query expansion
|
||||
$emitRunning('expansion', 'Query expansion', 'Generating sub-questions…');
|
||||
$stepStart = microtime(true);
|
||||
$expansion = $this->expandQueries($seedDescription, $interpretation['brief'], $controls['sub_q_count'], $language, $advocateRole);
|
||||
$expansion = $this->expandQueries($seedDescription, $interpretation['brief'], $interpretation['key_signals'], $controls['sub_q_count'], $language, $advocateRole);
|
||||
$this->stepTimings['expansion'] = $this->elapsedMs($stepStart);
|
||||
$subQuestions = $expansion['questions'];
|
||||
$expansionStatus = $expansion['fallback'] ? 'warning' : 'complete';
|
||||
@@ -323,7 +323,8 @@ final class DbnDeepResearchAgent
|
||||
$controls['temperature'],
|
||||
$advocateRole,
|
||||
$priorContext,
|
||||
$branchNotes
|
||||
$branchNotes,
|
||||
$interpretation['key_signals'] ?? []
|
||||
);
|
||||
$this->stepTimings['synthesis'] = $this->elapsedMs($stepStart);
|
||||
$emitStep(
|
||||
@@ -406,7 +407,7 @@ final class DbnDeepResearchAgent
|
||||
'chunk_limit' => max(4, min(10, (int)($controls['chunk_limit'] ?? 6))),
|
||||
'similarity_threshold' => max(0.2, min(0.6, (float)($controls['similarity_threshold'] ?? 0.30))),
|
||||
'reranker_top_k' => max(8, min(14, (int)($controls['reranker_top_k'] ?? 12))),
|
||||
'temperature' => max(0.05, min(0.4, (float)($controls['temperature'] ?? 0.15))),
|
||||
'temperature' => max(0.05, min(0.4, (float)($controls['temperature'] ?? 0.10))),
|
||||
];
|
||||
}
|
||||
|
||||
@@ -472,7 +473,7 @@ Input:
|
||||
|
||||
In {$locale}, produce JSON with:
|
||||
{
|
||||
"brief": "1-3 sentence description of what the user is trying to research (≤ 220 chars)",
|
||||
"brief": "1-3 sentence description of what the user is trying to research (≤ 300 chars)",
|
||||
"key_signals": ["short keywords or terms that should drive retrieval"]
|
||||
}
|
||||
PROMPT;
|
||||
@@ -483,19 +484,20 @@ PROMPT;
|
||||
if ($language === 'no' || $advocateRole !== '') {
|
||||
$resp = dbnToolsCallGpuLlm([$sysMsg, $userMsg], [
|
||||
'model' => 'dbn-legal-agent', 'json' => true,
|
||||
'temperature' => 0.1, 'max_tokens' => 400, 'timeout' => 40,
|
||||
'temperature' => 0.1, 'max_tokens' => 500, 'timeout' => 40,
|
||||
]);
|
||||
$raw = (string)($resp['choices'][0]['message']['content'] ?? '');
|
||||
} else {
|
||||
$raw = $this->azure->chatText([$sysMsg, $userMsg],
|
||||
['json' => true, 'temperature' => 0.1, 'max_tokens' => 400, 'timeout' => 30]);
|
||||
['json' => true, 'temperature' => 0.1, 'max_tokens' => 500, 'timeout' => 30]);
|
||||
}
|
||||
$json = $this->azure->decodeJsonObject($raw);
|
||||
if (is_array($json) && !empty($json['brief'])) {
|
||||
$signals = $json['key_signals'] ?? [];
|
||||
$signalText = is_array($signals) ? implode(', ', array_slice($signals, 0, 6)) : '';
|
||||
$signals = is_array($json['key_signals'] ?? null) ? array_slice($json['key_signals'], 0, 8) : [];
|
||||
$signalText = $signals ? implode(', ', $signals) : '';
|
||||
return [
|
||||
'brief' => (string)$json['brief'],
|
||||
'key_signals' => $signals,
|
||||
'detail' => sprintf('Research focus: %s%s', (string)$json['brief'], $signalText ? ' — signals: ' . $signalText : ''),
|
||||
];
|
||||
}
|
||||
@@ -505,13 +507,17 @@ PROMPT;
|
||||
|
||||
return [
|
||||
'brief' => '',
|
||||
'key_signals' => [],
|
||||
'detail' => 'Interpretation step skipped — proceeding with raw seed input.',
|
||||
];
|
||||
}
|
||||
|
||||
private function expandQueries(string $seedDescription, string $brief, int $targetCount, string $language, string $advocateRole = ''): array
|
||||
private function expandQueries(string $seedDescription, string $brief, array $keySignals, int $targetCount, string $language, string $advocateRole = ''): array
|
||||
{
|
||||
$locale = dbnToolsLanguageName($language);
|
||||
$anchorsLine = !empty($keySignals)
|
||||
? "\nKey retrieval anchors (incorporate these terms into your sub-questions where relevant):\n" . implode(', ', $keySignals) . "\n"
|
||||
: '';
|
||||
|
||||
if ($advocateRole !== '') {
|
||||
$prompt = <<<PROMPT
|
||||
@@ -521,10 +527,11 @@ Generate exactly {$targetCount} targeted sub-questions designed to find:
|
||||
2. Procedural rights and obligations the opposing party must satisfy — failures here help {$advocateRole}.
|
||||
3. Case law that exposes weaknesses in the opposing party's likely arguments.
|
||||
4. Specific articles, paragraphs, or judgments {$advocateRole}'s representative should cite.
|
||||
5. Specific documentation and procedural obligations Barnevernet or the opposing authority must fulfil — procedural or evidentiary failures that Norwegian courts have used to rule in favour of parents or children.
|
||||
|
||||
Research brief:
|
||||
{$brief}
|
||||
|
||||
{$anchorsLine}
|
||||
Raw input:
|
||||
{$seedDescription}
|
||||
|
||||
@@ -538,7 +545,8 @@ Return JSON only in {$locale}:
|
||||
Rules:
|
||||
- Exactly {$targetCount} sub-questions, no more, no fewer.
|
||||
- Every question must be answerable from Norwegian family-law, child-welfare, or ECHR/Hague sources.
|
||||
- Each question must cover a DIFFERENT angle (supporting statute, procedural right, opposing weakness, ECHR precedent, evidentiary frame).
|
||||
- Each question must cover a DIFFERENT angle (supporting statute, procedural right, opposing weakness, ECHR precedent, evidentiary frame, Barnevernet procedural obligation).
|
||||
- Each sub-question must reference a DIFFERENT legal instrument, statute section, or ECHR article — do not repeat the same §-reference or case name across sub-questions.
|
||||
- Sub-questions must be self-contained — readable without the raw input.
|
||||
- Write the questions in {$locale}.
|
||||
PROMPT;
|
||||
@@ -548,7 +556,7 @@ You are decomposing a Do Better Norge legal-research request into {$targetCount}
|
||||
|
||||
Research brief:
|
||||
{$brief}
|
||||
|
||||
{$anchorsLine}
|
||||
Raw input:
|
||||
{$seedDescription}
|
||||
|
||||
@@ -563,6 +571,7 @@ Rules:
|
||||
- Exactly {$targetCount} sub-questions, no more, no fewer.
|
||||
- Each sub-question must be answerable with Norwegian family-law, child-welfare, or ECHR sources.
|
||||
- Each sub-question must explore a DIFFERENT angle (statute interpretation, procedural fairness, ECHR case law, evidence/factual frame, comparative authority).
|
||||
- Each sub-question must reference a DIFFERENT legal instrument, statute section, or ECHR article — do not repeat the same §-reference or case name across sub-questions.
|
||||
- Sub-questions must be self-contained — readable without seeing the seed text.
|
||||
- Write the questions in {$locale}.
|
||||
PROMPT;
|
||||
@@ -667,7 +676,7 @@ PROMPT;
|
||||
'title' => 'uploaded: ' . $entry['meta']['filename'],
|
||||
'section' => null,
|
||||
'package_or_corpus' => 'Your upload',
|
||||
'excerpt' => dbnToolsExcerpt($entry['meta']['text'], 620),
|
||||
'excerpt' => dbnToolsExcerpt($entry['meta']['text'], 950),
|
||||
'chunk_text' => $entry['meta']['text'],
|
||||
'similarity' => round($sim, 4),
|
||||
'reranker_score' => null,
|
||||
@@ -709,7 +718,7 @@ PROMPT;
|
||||
'title' => (string)($chunk['document_title'] ?? $chunk['title'] ?? 'Untitled source'),
|
||||
'section' => $chunk['section_title'] ?? null,
|
||||
'package_or_corpus' => (string)($chunk['source_name'] ?? $chunk['source_type'] ?? 'Do Better Legal'),
|
||||
'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 620),
|
||||
'excerpt' => dbnToolsExcerpt((string)($chunk['content'] ?? ''), 950),
|
||||
'chunk_text' => (string)($chunk['content'] ?? ''),
|
||||
'similarity' => $similarity,
|
||||
'reranker_score' => $rerankerScore,
|
||||
@@ -940,7 +949,8 @@ PROMPT;
|
||||
float $temperature,
|
||||
string $advocateRole = '',
|
||||
?array $priorContext = null,
|
||||
string $branchNotes = ''
|
||||
string $branchNotes = '',
|
||||
array $keySignals = []
|
||||
): array {
|
||||
$locale = dbnToolsLanguageName($language);
|
||||
|
||||
@@ -1014,41 +1024,49 @@ PROMPT;
|
||||
? '400-900 words, minimum 4 paragraphs, with clear paragraph breaks. Cover EACH sub-question above in its own paragraph.'
|
||||
: '250-450 words, 2-3 short paragraphs. Note when evidence is thin.';
|
||||
|
||||
$keySignalsLine = !empty($keySignals)
|
||||
? "\nKey retrieval signals (statutory/factual terms that drove corpus search — ground your brief in these where sources permit):\n" . implode(', ', $keySignals) . "\n"
|
||||
: '';
|
||||
|
||||
if ($advocateRole !== '') {
|
||||
$prompt = <<<PROMPT
|
||||
You are Do Better Norge Legal Tools producing a legal preparation brief in {$locale}.
|
||||
Your client: {$advocateRole}
|
||||
{$priorContextSection}
|
||||
You MUST ground every claim in the numbered sources below using inline `[n]` citation markers. Do NOT invent statutes, paragraph numbers, case names, dates, or parties.
|
||||
|
||||
User input:
|
||||
{$seedDescription}
|
||||
|
||||
Research brief:
|
||||
{$brief}
|
||||
{$keySignalsLine}
|
||||
{$subQText}
|
||||
|
||||
Sources ({$sourceCount} numbered):
|
||||
{$sourcesText}
|
||||
|
||||
Return JSON only in {$locale}:
|
||||
{
|
||||
"brief_markdown": "Partisan but factually grounded advocate brief. {$lengthGuidance} Structure: (1) {$advocateRole}'s core legal position, (2) Strongest supporting arguments with [n] citations, (3) Identified weaknesses in the opposing party's position with [n] citations, (4) Procedural rights and obligations {$advocateRole} should assert. End with a one-line caveat that this is legal preparation support, not final legal advice.",
|
||||
"client_strengths": ["3-6 strings — the strongest factual/legal points for {$advocateRole}, each anchored to at least one [n] source"],
|
||||
"opposing_weaknesses": ["2-5 strings — vulnerabilities in the opposing position supported by retrieved sources. Omit this array entirely if evidence is thin — do NOT invent weaknesses."],
|
||||
"what_we_found": "2-sentence summary of the most relevant retrieved authority for {$advocateRole}",
|
||||
"what_remains_uncertain": ["3-5 gaps where evidence is insufficient or law is unclear — be honest"],
|
||||
"next_practical_step": "one concrete action for {$advocateRole} to take next (legal filing, evidence gathering, consultation type, etc.)"
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Every factual claim in `brief_markdown` must end with one or more `[n]` markers.
|
||||
- If no source supports a point, omit the point — DO NOT speculate.
|
||||
- Prefer citing statute sections (e.g. "Barneloven §43") and case names verbatim from source excerpts.
|
||||
Rules — read ALL of these before writing a single word of output:
|
||||
- Every factual claim must end with one or more `[n]` markers. A citation is valid ONLY when that source's excerpt explicitly states or directly implies the claim — do not cite a source merely because it is on the same topic.
|
||||
- Do NOT invent statute sections, case names, paragraph numbers, dates, or parties. Copy statute references (e.g. §43, §4-12) and ECHR citations verbatim from the excerpt text — never infer a section number that does not appear in an excerpt.
|
||||
- If no source supports a point, omit the point entirely — do NOT speculate.
|
||||
- Legal hierarchy: when multiple sources support a claim, prefer the highest-authority source — statute (Barneloven/Barnevernsloven/etc.) > Høyesterett decision > ECHR Grand Chamber > ECHR regular chamber > lower courts > Bufdir guidance.
|
||||
- Citation self-check: before writing each [n] marker, confirm that source [n] exists in the list and its excerpt actually supports the specific claim being made.
|
||||
- When multiple sources support the same point, cite all of them (e.g. `[2,4]`).
|
||||
- `opposing_weaknesses` must be omitted or empty when no retrieved source actually supports the identified weakness.
|
||||
- `opposing_weaknesses`: OMIT this field by default. Populate it only when ≥2 retrieved sources explicitly support the identified weakness. Do not speculate or infer weaknesses from thin evidence.
|
||||
- `brief_markdown` must be {$lengthGuidance} Structure it as: (1) {$advocateRole}'s core legal position, (2) Strongest supporting arguments with [n] citations, (3) Procedural rights and obligations {$advocateRole} should assert, (4) Opposing weaknesses — only if `opposing_weaknesses` is non-empty. End with a one-line caveat that this is legal preparation support, not final legal advice.
|
||||
- `client_strengths`: 3-6 items, each must include at least one [n] citation.
|
||||
- `what_remains_uncertain`: 3-5 honest gaps where evidence is insufficient or law is unclear.
|
||||
- Respond in {$locale}.
|
||||
- Output valid JSON only — no markdown fences around the JSON object itself.
|
||||
|
||||
Return JSON:
|
||||
{
|
||||
"brief_markdown": "<advocate brief>",
|
||||
"client_strengths": ["<strength with [n]>"],
|
||||
"opposing_weaknesses": ["<weakness with [n]>"],
|
||||
"what_we_found": "<2-sentence summary of the most relevant retrieved authority for {$advocateRole}>",
|
||||
"what_remains_uncertain": ["<gap>"],
|
||||
"next_practical_step": "<one concrete action for {$advocateRole} to take next>"
|
||||
}
|
||||
PROMPT;
|
||||
} else {
|
||||
$prompt = <<<PROMPT
|
||||
@@ -1074,8 +1092,9 @@ Return JSON only in {$locale}:
|
||||
|
||||
Rules:
|
||||
- Every factual claim in `brief_markdown` must end with one or more `[n]` markers.
|
||||
- A `[n]` citation is only valid when the excerpt for source [n] explicitly states or directly implies the claim — do not cite a source merely because it is on the same topic.
|
||||
- If no source supports a point, omit the point — DO NOT speculate.
|
||||
- Prefer pinpointing statute sections (e.g. "Barneloven §43") and case names verbatim from the source excerpts.
|
||||
- Copy statute section numbers (e.g. §43, §4-12) and ECHR case citations verbatim from the excerpt text — never rephrase or infer a section number that does not appear in an excerpt.
|
||||
- When multiple sources support the same point, cite all of them (e.g. `[2,4]`).
|
||||
- Respond in {$locale}.
|
||||
- Output valid JSON only — no markdown fences around the JSON object itself.
|
||||
@@ -1083,10 +1102,11 @@ PROMPT;
|
||||
}
|
||||
|
||||
$messages = [
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences.'],
|
||||
['role' => 'system', 'content' => 'You return valid JSON only. No markdown fences. Every legal claim must be supported by a source from the numbered list. Do not invent statute sections, case names, paragraph numbers, or dates. If no source supports a point, omit it entirely.'],
|
||||
['role' => 'user', 'content' => $prompt],
|
||||
];
|
||||
$opts = ['json' => true, 'temperature' => $temperature, 'max_tokens' => 3200, 'timeout' => 180];
|
||||
$synthTemp = ($advocateRole !== '') ? min($temperature, 0.20) : $temperature;
|
||||
$opts = ['json' => true, 'temperature' => $synthTemp, 'max_tokens' => 4000, 'timeout' => 180];
|
||||
|
||||
try {
|
||||
if ($engine === 'dbn_legal') {
|
||||
|
||||
@@ -48,6 +48,43 @@ require_once __DIR__ . '/includes/layout.php';
|
||||
<p class="upload-hint" data-i18n="vocabHint">Helps Whisper recognise technical terms. Not included in the transcript.</p>
|
||||
</div>
|
||||
|
||||
<details id="advancedOptions" class="expert-field">
|
||||
<summary data-i18n="advancedOptions">Advanced options</summary>
|
||||
|
||||
<div class="control-row" id="taskControl">
|
||||
<span class="control-label" data-i18n="task">Task</span>
|
||||
<label><input type="radio" name="task" value="transcribe" checked> <span data-i18n="taskTranscribe">Transcribe</span></label>
|
||||
<label><input type="radio" name="task" value="translate"> <span data-i18n="taskTranslate">Translate to English</span></label>
|
||||
</div>
|
||||
|
||||
<div class="control-row">
|
||||
<span class="control-label" data-i18n="vadFilter">VAD filter</span>
|
||||
<label><input type="checkbox" id="vadFilterCheck" name="vad_filter"> <span data-i18n="vadFilterLabel">Remove silence / noise</span></label>
|
||||
<small class="control-hint" data-i18n="vadFilterHint">Improves accuracy on recordings with long pauses.</small>
|
||||
</div>
|
||||
|
||||
<div class="control-row" id="whisperModelControl">
|
||||
<span class="control-label" data-i18n="whisperModel">Whisper model</span>
|
||||
<select id="whisperModelSelect" name="whisper_model">
|
||||
<option value="large-v3" selected>large-v3 (best)</option>
|
||||
<option value="large-v2">large-v2</option>
|
||||
<option value="medium">medium (faster)</option>
|
||||
<option value="small">small</option>
|
||||
<option value="base">base</option>
|
||||
<option value="tiny">tiny</option>
|
||||
</select>
|
||||
<small class="control-hint" data-i18n="whisperModelHint">Used when Azure/GCP unavailable. large-v3 is the default.</small>
|
||||
</div>
|
||||
|
||||
<div class="control-row" id="postModelControl">
|
||||
<span class="control-label" data-i18n="postModel">AI cleanup</span>
|
||||
<label><input type="radio" name="post_model" value="" checked> <span data-i18n="postModelNone">None</span></label>
|
||||
<label><input type="radio" name="post_model" value="gpt-4o-mini"> <span data-i18n="postModelMini">GPT-4o Mini</span></label>
|
||||
<label><input type="radio" name="post_model" value="gpt-4o"> <span data-i18n="postModelFull">GPT-4o</span></label>
|
||||
<small class="control-hint" data-i18n="postModelHint">Fixes errors, punctuation, and domain terms after transcription.</small>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<div class="upload-zone" id="audioZone" role="region" aria-label="Audio upload" data-i18n-aria="uploadAria">
|
||||
<input type="file" id="audioInput" accept="audio/*,video/mp4,video/webm" multiple aria-label="Choose audio files">
|
||||
<div id="audioPrompt" class="upload-prompt">
|
||||
|
||||
Reference in New Issue
Block a user