Commit multilingual editorial frontend work

2026-04-07 07:36:27 +02:00
parent 0eae030142
commit 77f57cf528
119 changed files with 5255 additions and 220 deletions
@@ -1,6 +1,10 @@
 # build output
 dist/
 dist-*.tar.gz
 dist-*.tar
 release-backups/
 tmp/
 images/facebook-060426/
 # generated types
 .astro/
@@ -6,7 +6,9 @@
    "node": ">=22.12.0"
  },
  "scripts": {
    "predev": "node scripts/sync-family-lab.mjs",
    "dev": "astro dev",
    "prebuild": "node scripts/sync-family-lab.mjs",
    "build": "astro build",
    "preview": "astro preview",
    "astro": "astro"
@@ -0,0 +1,364 @@
 #!/usr/bin/env python3
 from __future__ import annotations
 import argparse
 import csv
 import hashlib
 import json
 import re
 from dataclasses import asdict, dataclass
 from pathlib import Path
 from typing import Iterable
 SUPPORTED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
 TOKEN_REPLACEMENTS = {
    "jr": "Dave Jr",
    "nova": "Villanova",
    "meanddave": "Dave And Dave Jr",
    "meanddave1": "Dave And Dave Jr",
    "meandjr": "Dave And Dave Jr",
    "meandjr2": "Dave And Dave Jr",
    "withdavejr": "With Dave Jr",
    "withdavejr1": "With Dave Jr",
    "witherin": "With Erin",
    "withkevin": "With Kevin",
    "withmom": "With Mom",
    "withgrandma": "With Grandma",
    "jr_brigid": "Dave Jr And Brigid",
    "jr_grandma": "Dave Jr And Grandma",
    "jr_nova": "Dave Jr Villanova",
    "jr_trivia": "Dave Jr Trivia",
    "four_jr": "Dave Jr At Four",
    "four_jr2": "Dave Jr At Four",
    "shirt_nova": "Shirt Villanova",
    "me_nova": "Me Villanova",
    "citizen": "Citizen",
    "meseum": "Museum",
    "me_trivia": "Me Trivia",
 }
 KNOWN_FACEBOOK_PREFIX_RE = re.compile(r"^\d{6,}_")
 DESCRIPTION_RULES = [
    ({"villanova", "nova", "jr"}, "Dave Jr in Villanova gear."),
    ({"meanddave", "meandjr", "withdavejr"}, "Dave with Dave Jr."),
    ({"dave", "dad", "me"}, "Family moment with Dave."),
    ({"jr"}, "Family snapshot featuring Dave Jr."),
    ({"grandma", "mom", "kevin", "erin", "noreen"}, "Family portrait moment."),
    ({"gift", "gifts"}, "Gift-opening or celebration moment."),
    ({"school"}, "School-related family snapshot."),
    ({"thanksgiving"}, "Holiday family gathering moment."),
    ({"trivia"}, "Trivia and music related family snapshot."),
    ({"poland"}, "Travel memory from Poland."),
    ({"museum"}, "Museum or outing snapshot."),
    ({"fireman"}, "Dress-up or costume moment."),
 ]
@dataclass
 class MediaRow:
    filename: str
    relative_url: str
    title: str
    description: str
    tags: list[str]
    sha1: str
    file_size: int
    duplicate_of: str | None = None
    named_file: bool = False
 def slugify(text: str) -> str:
    text = text.lower()
    text = re.sub(r"[^a-z0-9]+", "-", text)
    return text.strip("-")
 def humanize_stem(stem: str) -> str:
    cleaned = stem.strip()
    cleaned = re.sub(r"^\d+", "", cleaned)
    cleaned = re.sub(r"_\d{6,}.*$", "", cleaned)
    cleaned = cleaned.replace("(1)", "").replace("(2)", "").replace("(3)", "")
    cleaned = cleaned.replace("(4)", "").replace("(5)", "").replace("(6)", "").replace("(7)", "")
    lowered = cleaned.lower().strip()
    if lowered in TOKEN_REPLACEMENTS:
        return TOKEN_REPLACEMENTS[lowered]
    lowered = lowered.replace("_", " ").replace("-", " ")
    lowered = re.sub(r"\s+", " ", lowered).strip()
    if not lowered:
        return "Untitled Photo"
    words: list[str] = []
    for word in lowered.split():
        if word in {"jr", "jr."}:
            words.append("Dave Jr")
        elif word in {"nova", "villanova"}:
            words.append("Villanova")
        elif word in {"me", "dave", "kevin", "erin", "noreen", "mom", "grandma", "poland", "museum", "trivia"}:
            words.append(word.capitalize())
        else:
            words.append(word.capitalize())
    return " ".join(words)
 def is_named_file(filename: str, title: str) -> bool:
    stem = Path(filename).stem.lower()
    if title == "Untitled Photo":
        return False
    if stem.startswith("unnamed"):
        return False
    if KNOWN_FACEBOOK_PREFIX_RE.match(stem):
        return False
    return True
 def infer_tags(filename: str, title: str) -> list[str]:
    text = f"{filename} {title}".lower()
    tags: list[str] = []
    if any(token in text for token in ("jr", "dave jr", "baby")):
        tags.extend(["child", "dave-jr"])
    if any(token in text for token in ("me ", " me", "meand", " meand", "dave and")):
        tags.append("dave")
    if any(token in text for token in ("villanova", "nova", "rangers")):
        tags.extend(["villanova", "sports"])
    if any(token in text for token in ("gift", "gifts", "thanksgiving", "christmas", "may17")):
        tags.append("holiday")
    if any(token in text for token in ("grandma", "mom", "kevin", "erin", "noreen", "brigid")):
        tags.append("family")
    if any(token in text for token in ("trivia", "music", "marvin gaye")):
        tags.append("music")
    if any(token in text for token in ("school", "museum", "poland", "citizen")):
        tags.append("outing")
    if any(token in text for token in ("fireman", "dress", "beret", "shirt", "glasses")):
        tags.append("dress-up")
    if any(token in text for token in ("haircut", "slide", "tree", "balls", "eating")):
        tags.append("playtime")
    if not tags:
        tags.append("family")
    return sorted(set(tags))
 def infer_description(filename: str, title: str) -> str:
    text = f"{filename} {title}".lower()
    named_file = is_named_file(filename, title)
    if any(token in text for token in ("meanddave", "me and dave", "meandjr", "dave and dave jr")):
        return "Dave with Dave Jr."
    if any(token in text for token in ("withdavejr", "with dave jr")):
        return "Family snapshot with Dave Jr."
    if any(token in text for token in ("jr_nova", "villanova", "shirt villanova", "nova dave")):
        return "Villanova-flavored family snapshot."
    if any(token in text for token in ("gift", "gifts", "thanksgiving", "may17")):
        return "Family celebration moment."
    if any(token in text for token in ("fireman", "dress", "beret", "glasses")):
        return "Dress-up family snapshot."
    if any(token in text for token in ("school", "museum", "poland", "citizen")):
        return "Family outing or travel snapshot."
    if any(token in text for token in ("haircut", "slide", "tree", "balls", "eating")):
        return "Everyday family moment."
    if any(token in text for token in ("grandma", "mom", "kevin", "erin", "noreen", "brigid")):
        return "Family portrait moment."
    if any(token in text for token in ("jr", "dave jr")):
        return "Family snapshot featuring Dave Jr."
    if not named_file:
        return "Family archive snapshot awaiting fuller annotation."
    return "Family archive snapshot."
 def sha1_for_file(path: Path) -> str:
    digest = hashlib.sha1()
    with path.open("rb") as handle:
        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
            digest.update(chunk)
    return digest.hexdigest()
 def build_rows(source_dir: Path, url_prefix: str) -> list[MediaRow]:
    rows: list[MediaRow] = []
    first_seen_by_hash: dict[str, str] = {}
    for path in sorted(source_dir.iterdir(), key=lambda p: p.name.lower()):
        if not path.is_file() or path.suffix.lower() not in SUPPORTED_EXTENSIONS:
            continue
        title = humanize_stem(path.stem)
        description = infer_description(path.name, title)
        tags = infer_tags(path.name, title)
        digest = sha1_for_file(path)
        duplicate_of = first_seen_by_hash.get(digest)
        if duplicate_of is None:
            first_seen_by_hash[digest] = path.name
        rows.append(
            MediaRow(
                filename=path.name,
                relative_url=f"{url_prefix.rstrip('/')}/{path.name}",
                title=title,
                description=description,
                tags=tags,
                sha1=digest,
                file_size=path.stat().st_size,
                duplicate_of=duplicate_of,
                named_file=is_named_file(path.name, title),
            )
        )
    return rows
 def write_json(rows: Iterable[MediaRow], target: Path) -> None:
    payload = [asdict(row) for row in rows]
    target.write_text(json.dumps(payload, indent=2, ensure_ascii=True), encoding="utf-8")
 def write_csv(rows: Iterable[MediaRow], target: Path) -> None:
    fieldnames = [
        "filename",
        "relative_url",
        "title",
        "description",
        "tags",
        "sha1",
        "file_size",
        "duplicate_of",
        "named_file",
    ]
    with target.open("w", newline="", encoding="utf-8") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            data = asdict(row)
            data["tags"] = json.dumps(data["tags"], ensure_ascii=True)
            writer.writerow(data)
 def sql_escape(value: str) -> str:
    return value.replace("\\", "\\\\").replace("'", "''")
 def choose_cover_url(rows: list[MediaRow]) -> str:
    preferred_rows = [row for row in rows if row.named_file and not row.duplicate_of]
    if preferred_rows:
        ranked = sorted(
            preferred_rows,
            key=lambda row: (
                0 if "dave and dave jr" in row.title.lower() else 1,
                0 if "dave jr" in row.title.lower() else 1,
                row.filename.lower(),
            ),
        )
        return ranked[0].relative_url
    unique_rows = [row for row in rows if not row.duplicate_of]
    if unique_rows:
        return unique_rows[0].relative_url
    return rows[0].relative_url
 def write_sql(rows: list[MediaRow], target: Path, slug: str, album_title: str, cover_url: str, unique_only: bool) -> None:
    rows_to_write = [row for row in rows if not row.duplicate_of] if unique_only else rows
    lines = [
        "-- Generated by build_facebook_album_manifest.py",
        f"SET @folder_slug = '{sql_escape(slug)}';",
        f"SET @cover_url = '{sql_escape(cover_url)}';",
        "",
        "INSERT INTO media_folders (slug, name_en, name_nb, description_en, description_nb, cover_image_url, active, sort_order)",
        "VALUES (",
        f"  @folder_slug,",
        f"  '{sql_escape(album_title)}',",
        f"  '{sql_escape(album_title)}',",
        "  'Imported Facebook family archive with starter captions generated from filenames. Descriptions should be reviewed and enriched.',",
        "  'Importert familiearkiv fra Facebook med starttekster generert fra filnavn. Beskrivelsene bor gjennomgaas og forbedres.',",
        "  @cover_url,",
        "  1,",
        "  0",
        ")",
        "ON DUPLICATE KEY UPDATE",
        "  name_en = VALUES(name_en),",
        "  name_nb = VALUES(name_nb),",
        "  description_en = VALUES(description_en),",
        "  description_nb = VALUES(description_nb),",
        "  cover_image_url = VALUES(cover_image_url);",
        "",
        "SET @folder_id = (SELECT id FROM media_folders WHERE slug = @folder_slug LIMIT 1);",
        "",
    ]
    for index, row in enumerate(rows_to_write, start=1):
        tags_json = json.dumps(row.tags, ensure_ascii=True)
        lines.extend(
            [
                "INSERT INTO media (type, filename, url, title, description, category, folder_id, tags, credit, created_at)",
                "VALUES (",
                "  'image',",
                f"  '{sql_escape(row.filename)}',",
                f"  '{sql_escape(row.relative_url)}',",
                f"  '{sql_escape(row.title)}',",
                f"  '{sql_escape(row.description)}',",
                "  'family',",
                "  @folder_id,",
                f"  '{sql_escape(tags_json)}',",
                "  'Facebook archive import',",
                "  NOW()",
                ")",
                "ON DUPLICATE KEY UPDATE",
                "  title = VALUES(title),",
                "  description = VALUES(description),",
                "  folder_id = VALUES(folder_id),",
                "  tags = VALUES(tags),",
                "  credit = VALUES(credit);",
                "",
            ]
        )
    target.write_text("\n".join(lines) + "\n", encoding="utf-8")
 def main() -> None:
    parser = argparse.ArgumentParser(description="Build a JSON/CSV/SQL manifest for a photo folder.")
    parser.add_argument("--source", required=True, help="Path to the source image folder.")
    parser.add_argument("--slug", default="facebook-060426", help="Target media folder slug.")
    parser.add_argument("--title", default="Facebook Archive / June 4 2026", help="Folder title.")
    parser.add_argument("--url-prefix", default="/uploads/facebook-060426", help="URL prefix for deployed images.")
    parser.add_argument("--output-dir", default="C:\\wamp64\\www\\davegilligan-new\\tmp", help="Directory for generated files.")
    parser.add_argument("--include-duplicates", action="store_true", help="Include duplicate hashes in the SQL seed.")
    args = parser.parse_args()
    source_dir = Path(args.source)
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    rows = build_rows(source_dir, args.url_prefix)
    if not rows:
        raise SystemExit("No supported images found.")
    cover_url = choose_cover_url(rows)
    stem = slugify(args.slug)
    write_json(rows, output_dir / f"{stem}-manifest.json")
    write_csv(rows, output_dir / f"{stem}-manifest.csv")
    write_sql(
        rows,
        output_dir / f"{stem}-seed.sql",
        args.slug,
        args.title,
        cover_url,
        unique_only=not args.include_duplicates,
    )
    duplicate_count = sum(1 for row in rows if row.duplicate_of)
    print(f"Generated manifest for {len(rows)} files")
    print(f"Duplicate files detected: {duplicate_count}")
    print(f"Unique rows for SQL seed: {len([row for row in rows if not row.duplicate_of]) if not args.include_duplicates else len(rows)}")
    print(f"Cover image: {cover_url}")
    print(f"Output directory: {output_dir}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,65 @@
 import { mkdir, copyFile, readFile, readdir, writeFile } from "node:fs/promises";
 import { existsSync } from "node:fs";
 import path from "node:path";
 const root = process.cwd();
 const sourceDir = path.join(root, "images", "facebook-060426");
 const manifestPath = path.join(root, "tmp", "facebook-060426-manifest.json");
 const targetDir = path.join(root, "public", "images", "family-lab", "facebook-060426");
 const targetManifest = path.join(targetDir, "manifest.json");
 const supported = new Set([".jpg", ".jpeg", ".png", ".webp", ".gif"]);
 function titleFromFilename(filename) {
  const stem = path.parse(filename).name;
  return stem
    .replace(/^\d+_[0-9a-z]+_/i, "")
    .replace(/[_-]+/g, " ")
    .replace(/\s+/g, " ")
    .trim() || "Untitled Photo";
 }
 function fallbackRowsFromFiles(files) {
  return files.map((filename) => ({
    filename,
    title: titleFromFilename(filename),
    description: "Family archive snapshot awaiting fuller annotation.",
    tags: ["family"],
    duplicate_of: null,
    named_file: !/^\d{6,}_/.test(filename) && !/^unnamed/i.test(filename),
  }));
 }
 async function main() {
  await mkdir(targetDir, { recursive: true });
  const sourceFiles = (await readdir(sourceDir))
    .filter((filename) => supported.has(path.extname(filename).toLowerCase()))
    .sort((a, b) => a.localeCompare(b));
  let rows;
  if (existsSync(manifestPath)) {
    rows = JSON.parse(await readFile(manifestPath, "utf-8"));
  } else {
    rows = fallbackRowsFromFiles(sourceFiles);
  }
  const uniqueRows = rows
    .filter((row) => !row.duplicate_of)
    .map((row) => ({
      ...row,
      src: `/images/family-lab/facebook-060426/${row.filename}`,
    }));
  for (const row of uniqueRows) {
    await copyFile(path.join(sourceDir, row.filename), path.join(targetDir, row.filename));
  }
  await writeFile(targetManifest, `${JSON.stringify(uniqueRows, null, 2)}\n`, "utf-8");
  console.log(`Family lab synced: ${uniqueRows.length} unique photos`);
 }
 main().catch((error) => {
  console.error(error);
  process.exit(1);
 });
--- a/Show More
+++ b/Show More