ba9cddf9a1
- Stripe: StripeClient.php, checkout/portal/webhook endpoints, idempotent event handling - FreeTier: tier-aware credits (free/light/pro/pro_plus), bonus_balance, hourly caps per tier - pricing.php + billing.php: 4-tier cards, 3 topups, Customer Portal, balance breakdown - Min Sak: CaseStore.php, AzureDocIntelligence.php, AzureSearchAdmin.php — per-user hybrid RAG - api/case/: upload, list, delete, ingest-callback (HMAC-auth'd from n8n) - award-survey-credits: inter-site HMAC endpoint for dobetternorge.no survey bonus - dashboard.php: tier badge, balance breakdown card, Min Sak CTA, survey CTA - KorrespondAgent + all 3 other agents: use_my_case toggle wired to dbnToolsCaseContext() - bootstrap.php: dbnToolsCaseContext(), dbnToolsIntersiteSecret(), dbnToolsCurrentTier() Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
286 lines
11 KiB
PHP
286 lines
11 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
require_once __DIR__ . '/AzureSearchAdmin.php';
|
|
require_once __DIR__ . '/FreeTier.php';
|
|
|
|
/**
|
|
* Build Your Own Case — per-user private RAG corpus.
|
|
*
|
|
* Storage layout:
|
|
* - PDFs: /home/dobetternorge/uploads/case_{user_id}/{doc_id}.pdf (chloe filesystem)
|
|
* - Vectors: Azure AI Search index "case-{user_id}" (hybrid BM25 + vector + nb.microsoft analyzer)
|
|
* - Metadata: MySQL case_documents rows
|
|
*
|
|
* Family plan note: members share owner's caveau_client_id but each query is still scoped
|
|
* to the OWNER's user_id (and thus their index). Family members' UI-bound user_id resolves
|
|
* to owner_user_id via caseResolveClientId.
|
|
*/
|
|
final class CaseStore
|
|
{
|
|
/** Storage root on chloe — override via env if needed. */
|
|
public static function storageRoot(): string
|
|
{
|
|
$env = getenv('DBN_CASE_STORAGE_ROOT');
|
|
return rtrim($env !== false && $env !== '' ? $env : '/home/dobetternorge/uploads', '/');
|
|
}
|
|
|
|
/** Resolve the effective case-owner user_id for a given session user.
|
|
* If the user is a family-plan MEMBER, returns the OWNER's user_id (shared corpus).
|
|
* If the user is an owner OR has no seat record, returns their own user_id.
|
|
*/
|
|
public static function caseResolveClientId(int $userId): int
|
|
{
|
|
$db = dbnmDb();
|
|
$stmt = $db->prepare(
|
|
'SELECT owner_user_id FROM case_seats
|
|
WHERE member_user_id = ? AND accepted_at IS NOT NULL AND revoked_at IS NULL
|
|
LIMIT 1'
|
|
);
|
|
$stmt->execute([$userId]);
|
|
$ownerId = (int)($stmt->fetchColumn() ?: 0);
|
|
return $ownerId > 0 ? $ownerId : $userId;
|
|
}
|
|
|
|
/** Ensure storage dir + Azure index exist for a user. Idempotent. */
|
|
public static function caseProvisionUser(int $userId): array
|
|
{
|
|
$rootDir = self::storageRoot() . '/case_' . $userId;
|
|
if (!is_dir($rootDir)) {
|
|
// 0750: owner rwx, group rx, world none
|
|
@mkdir($rootDir, 0750, true);
|
|
}
|
|
$indexName = '';
|
|
try {
|
|
$admin = new AzureSearchAdmin();
|
|
$indexName = $admin->ensureUserIndex($userId);
|
|
} catch (Throwable $e) {
|
|
error_log('[CaseStore::caseProvisionUser] index create failed: ' . $e->getMessage());
|
|
}
|
|
return ['storage_path' => $rootDir, 'index_name' => $indexName];
|
|
}
|
|
|
|
/**
|
|
* Register an uploaded file in DB and return the doc row.
|
|
* Enforces tier-based storage quota.
|
|
*/
|
|
public static function registerUpload(int $userId, string $filename, string $tempPath, int $sizeBytes): array
|
|
{
|
|
// Quota check
|
|
$detail = FreeTier::balanceDetail($userId);
|
|
$quota = (int)$detail['storage_quota_bytes'];
|
|
$used = (int)$detail['storage_used_bytes'];
|
|
if ($quota === 0) {
|
|
throw new RuntimeException('Min Sak er ikke tilgjengelig på gratis-nivå. Oppgrader for å laste opp dokumenter.');
|
|
}
|
|
if ($used + $sizeBytes > $quota) {
|
|
$remainMb = max(0, ($quota - $used) / 1048576);
|
|
throw new RuntimeException(sprintf('Du har %.1f MB lagring igjen, men filen er %.1f MB.', $remainMb, $sizeBytes / 1048576));
|
|
}
|
|
|
|
// Provision (idempotent)
|
|
$bundle = self::caseProvisionUser($userId);
|
|
$dir = $bundle['storage_path'];
|
|
|
|
// Sanitize filename
|
|
$safeName = preg_replace('/[^A-Za-z0-9._\-]/', '_', $filename);
|
|
$safeName = mb_substr((string)$safeName, 0, 100);
|
|
|
|
$db = dbnmDb();
|
|
$db->prepare(
|
|
'INSERT INTO case_documents
|
|
(user_id, filename, storage_path, size_bytes, ocr_status, qdrant_collection, azure_index_name, uploaded_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, NOW())'
|
|
)->execute([
|
|
$userId, $safeName, '', $sizeBytes, 'pending',
|
|
'case_user_' . $userId,
|
|
AzureSearchAdmin::indexName($userId),
|
|
]);
|
|
$docId = (int)$db->lastInsertId();
|
|
|
|
$finalPath = $dir . '/' . $docId . '.pdf';
|
|
if (!@rename($tempPath, $finalPath)) {
|
|
// Fallback: copy + unlink
|
|
if (!@copy($tempPath, $finalPath)) {
|
|
$db->prepare('DELETE FROM case_documents WHERE id = ?')->execute([$docId]);
|
|
throw new RuntimeException('Kunne ikke lagre filen på serveren.');
|
|
}
|
|
@unlink($tempPath);
|
|
}
|
|
@chmod($finalPath, 0640);
|
|
|
|
// Save final path + bump storage usage
|
|
$db->prepare('UPDATE case_documents SET storage_path = ? WHERE id = ?')
|
|
->execute([$finalPath, $docId]);
|
|
$db->prepare('UPDATE user_tool_credits SET storage_used_bytes = storage_used_bytes + ? WHERE user_id = ?')
|
|
->execute([$sizeBytes, $userId]);
|
|
|
|
return [
|
|
'doc_id' => $docId,
|
|
'filename' => $safeName,
|
|
'storage_path' => $finalPath,
|
|
'size_bytes' => $sizeBytes,
|
|
];
|
|
}
|
|
|
|
/** Notify n8n that a new doc is ready for OCR + indexing. */
|
|
public static function caseEnqueueIngest(int $docId, int $userId): bool
|
|
{
|
|
$webhookUrl = getenv('N8N_CASE_INGEST_WEBHOOK') ?: '';
|
|
if ($webhookUrl === '') {
|
|
error_log('[CaseStore] N8N_CASE_INGEST_WEBHOOK not configured — leaving doc ' . $docId . ' as pending');
|
|
return false;
|
|
}
|
|
$payload = json_encode([
|
|
'doc_id' => $docId,
|
|
'user_id' => $userId,
|
|
'callback_url' => 'https://tools.dobetternorge.no/api/case/ingest-callback.php',
|
|
], JSON_UNESCAPED_UNICODE);
|
|
|
|
$ch = curl_init($webhookUrl);
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_POST => true,
|
|
CURLOPT_POSTFIELDS => $payload,
|
|
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_TIMEOUT => 5,
|
|
]);
|
|
curl_exec($ch);
|
|
$errno = curl_errno($ch);
|
|
$status = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
|
curl_close($ch);
|
|
return $errno === 0 && $status >= 200 && $status < 300;
|
|
}
|
|
|
|
/**
|
|
* Hybrid search across the user's case.
|
|
* Embeds the query via LiteLLM (azure-text-embedding-3-small) and hits the per-user Azure Search index.
|
|
*
|
|
* CRITICAL: $userId here must be the EFFECTIVE owner_user_id (resolved via caseResolveClientId).
|
|
* The Azure index is scoped to this user_id at the INDEX NAME level — cross-user leak is structurally
|
|
* impossible: index "case-100" cannot return rows from index "case-200".
|
|
*/
|
|
public static function caseHybridSearch(int $effectiveOwnerUserId, string $query, int $k = 5): array
|
|
{
|
|
if ($effectiveOwnerUserId <= 0 || trim($query) === '') {
|
|
return [];
|
|
}
|
|
try {
|
|
$vector = self::embedQuery($query);
|
|
if (empty($vector)) {
|
|
return [];
|
|
}
|
|
$admin = new AzureSearchAdmin();
|
|
$resp = $admin->hybridSearch($effectiveOwnerUserId, $query, $vector, $k);
|
|
$hits = [];
|
|
foreach (($resp['value'] ?? []) as $hit) {
|
|
$hits[] = [
|
|
'chunk_text' => (string)($hit['chunk_text'] ?? ''),
|
|
'filename' => (string)($hit['filename'] ?? ''),
|
|
'page' => (int)($hit['page'] ?? 0),
|
|
'doc_id' => (int)($hit['doc_id'] ?? 0),
|
|
'doc_type' => (string)($hit['doc_type'] ?? ''),
|
|
'score' => (float)($hit['@search.score'] ?? 0),
|
|
'reranker_score' => (float)($hit['@search.rerankerScore'] ?? 0),
|
|
];
|
|
}
|
|
return $hits;
|
|
} catch (Throwable $e) {
|
|
error_log('[CaseStore::caseHybridSearch] failed: ' . $e->getMessage());
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/** Embed a string via LiteLLM (azure-text-embedding-3-small). Returns float[] of dim 1536, or []. */
|
|
public static function embedQuery(string $text): array
|
|
{
|
|
$base = getenv('LITELLM_BASE_URL') ?: 'http://10.0.1.10:4000';
|
|
$key = getenv('LITELLM_API_KEY') ?: 'sk-bnl-litellm-26xR9mK4qvN3wL8sTj7pB2d';
|
|
$payload = json_encode([
|
|
'model' => 'azure-text-embedding-3-small',
|
|
'input' => mb_substr($text, 0, 8000),
|
|
], JSON_UNESCAPED_UNICODE);
|
|
|
|
$ch = curl_init($base . '/v1/embeddings');
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_POST => true,
|
|
CURLOPT_POSTFIELDS => $payload,
|
|
CURLOPT_HTTPHEADER => [
|
|
'Authorization: Bearer ' . $key,
|
|
'Content-Type: application/json',
|
|
],
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_TIMEOUT => 15,
|
|
]);
|
|
$raw = curl_exec($ch);
|
|
$status = (int)curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
|
|
curl_close($ch);
|
|
if ($status !== 200 || !is_string($raw)) {
|
|
return [];
|
|
}
|
|
$data = json_decode($raw, true);
|
|
$vec = $data['data'][0]['embedding'] ?? null;
|
|
return is_array($vec) ? array_map('floatval', $vec) : [];
|
|
}
|
|
|
|
/** Format chunks for injection into an agent's system prompt. */
|
|
public static function formatChunksForPrompt(array $chunks): string
|
|
{
|
|
if (empty($chunks)) return '';
|
|
$out = "\n\n## Brukerens egne dokumenter (private sak):\n";
|
|
foreach ($chunks as $i => $c) {
|
|
$out .= sprintf(
|
|
"\n[%d] %s · side %d%s\n%s\n",
|
|
$i + 1,
|
|
$c['filename'],
|
|
$c['page'],
|
|
$c['doc_type'] !== '' ? ' · ' . $c['doc_type'] : '',
|
|
mb_substr($c['chunk_text'], 0, 1500)
|
|
);
|
|
}
|
|
$out .= "\n— slutt på brukerens dokumenter —\n";
|
|
return $out;
|
|
}
|
|
|
|
/** Soft-delete a doc + remove vectors from Azure index. */
|
|
public static function deleteDocument(int $userId, int $docId): bool
|
|
{
|
|
$db = dbnmDb();
|
|
$stmt = $db->prepare('SELECT id, storage_path, size_bytes FROM case_documents WHERE id = ? AND user_id = ? AND deleted_at IS NULL LIMIT 1');
|
|
$stmt->execute([$docId, $userId]);
|
|
$doc = $stmt->fetch(PDO::FETCH_ASSOC);
|
|
if (!$doc) {
|
|
return false;
|
|
}
|
|
// Remove from Azure index
|
|
try {
|
|
$admin = new AzureSearchAdmin();
|
|
$admin->deleteDoc($userId, $docId);
|
|
} catch (Throwable $e) {
|
|
error_log('[CaseStore::deleteDocument] azure delete: ' . $e->getMessage());
|
|
}
|
|
// Mark deleted in DB
|
|
$db->prepare('UPDATE case_documents SET deleted_at = NOW() WHERE id = ?')->execute([$docId]);
|
|
// Refund storage
|
|
$db->prepare('UPDATE user_tool_credits SET storage_used_bytes = GREATEST(0, storage_used_bytes - ?) WHERE user_id = ?')
|
|
->execute([(int)$doc['size_bytes'], $userId]);
|
|
// Remove file from disk
|
|
if (!empty($doc['storage_path']) && is_file($doc['storage_path'])) {
|
|
@unlink($doc['storage_path']);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/** Return all docs for a user (excluding deleted). */
|
|
public static function listDocs(int $userId): array
|
|
{
|
|
$db = dbnmDb();
|
|
$stmt = $db->prepare(
|
|
'SELECT id, filename, size_bytes, page_count, doc_type, detected_date, ocr_status, ocr_error, uploaded_at, indexed_at
|
|
FROM case_documents WHERE user_id = ? AND deleted_at IS NULL ORDER BY uploaded_at DESC'
|
|
);
|
|
$stmt->execute([$userId]);
|
|
return $stmt->fetchAll(PDO::FETCH_ASSOC) ?: [];
|
|
}
|
|
}
|