#!/usr/bin/env python3
"""Build a manifest for IIW S7 character design plates.

This scans 02_Elements/DESIGN/01_CH and records the official production
character/reference sheets as training/evaluation candidates. It does not render
or modify source artwork.
"""
from __future__ import annotations

import argparse
import csv
import json
import re
import struct
from collections import defaultdict
from pathlib import Path
from typing import Any

ROOT = Path(__file__).resolve().parents[1]
DEFAULT_CH_ROOT = ROOT / "iiw-totallyspies/02_Elements/DESIGN/01_CH"
DEFAULT_JSON = ROOT / "materials/training-data/iiw_character_plates_manifest.json"
DEFAULT_CSV = ROOT / "docs/internal/iiw-character-plate-inventory.csv"

IMAGE_EXTS = {".psd", ".psb", ".png", ".jpg", ".jpeg"}
SKIP_NAMES = {".ds_store", "thumbs.db"}

CHARACTER_FOLDERS = {
    "01_ALEX": "Alex",
    "02_CLOVER": "Clover",
    "03_SAM": "Sam",
    "04_SPIES": "Spies group",
    "05_ZERLINA": "Zerlina",
    "06_LINE_UP": "Lineup",
    "06_TOBY": "Toby",
    "07_GLITTERSTAR": "Glitterstar",
    "08_MANDY": "Mandy",
    "09_JERRY": "Jerry",
    "10_WOOHP_AGENTS": "WOOHP agents",
    "11_CYBERCHAC": "Cyberchac",
    "12_ANIMALS": "Animals",
    "13_GUEST": "Guests",
    "14_EXTRA": "Extras",
    "15_WORKER": "Workers",
    "16_MONSTER": "Monsters",
}

MAIN_PRIORITY = {"Alex", "Clover", "Sam"}
SECONDARY_PRIORITY = {"Zerlina", "Toby", "Jerry", "Mandy", "Glitterstar", "Cyberchac", "WOOHP agents"}


def rel(path: Path) -> str:
    try:
        return path.relative_to(ROOT).as_posix()
    except ValueError:
        return path.as_posix()


def file_size_human(n: int) -> str:
    units = ["B", "KB", "MB", "GB"]
    v = float(n)
    for unit in units:
        if v < 1024 or unit == units[-1]:
            return f"{int(v)} B" if unit == "B" else f"{v:.1f} {unit}"
        v /= 1024
    return f"{n} B"


def png_dims(path: Path) -> tuple[int | None, int | None]:
    try:
        data = path.read_bytes()[:24]
        if data[:8] == b"\x89PNG\r\n\x1a\n":
            return struct.unpack(">II", data[16:24])
    except Exception:
        pass
    return None, None


def jpg_dims(path: Path) -> tuple[int | None, int | None]:
    try:
        with path.open("rb") as handle:
            if handle.read(2) != b"\xff\xd8":
                return None, None
            while True:
                b = handle.read(1)
                if not b:
                    return None, None
                if b != b"\xff":
                    continue
                marker = handle.read(1)
                while marker == b"\xff":
                    marker = handle.read(1)
                if marker in [b"\xd8", b"\xd9"]:
                    continue
                length_b = handle.read(2)
                if len(length_b) != 2:
                    return None, None
                length = struct.unpack(">H", length_b)[0]
                if marker and marker[0] in list(range(0xC0, 0xC4)) + list(range(0xC5, 0xC8)) + list(range(0xC9, 0xCC)) + list(range(0xCD, 0xD0)):
                    data = handle.read(5)
                    if len(data) >= 5:
                        height, width = struct.unpack(">HH", data[1:5])
                        return width, height
                    return None, None
                handle.seek(max(length - 2, 0), 1)
    except Exception:
        return None, None


def psd_dims(path: Path) -> tuple[int | None, int | None, dict[str, Any]]:
    try:
        data = path.read_bytes()[:26]
        if len(data) >= 26 and data[:4] == b"8BPS":
            version = struct.unpack(">H", data[4:6])[0]
            channels = struct.unpack(">H", data[12:14])[0]
            height = struct.unpack(">I", data[14:18])[0]
            width = struct.unpack(">I", data[18:22])[0]
            depth = struct.unpack(">H", data[22:24])[0]
            mode = struct.unpack(">H", data[24:26])[0]
            return width, height, {"psd_version": version, "channels": channels, "depth": depth, "color_mode": mode}
    except Exception:
        pass
    return None, None, {}


def dims(path: Path) -> tuple[int | None, int | None, dict[str, Any]]:
    ext = path.suffix.lower()
    if ext == ".png":
        w, h = png_dims(path)
        return w, h, {}
    if ext in {".jpg", ".jpeg"}:
        w, h = jpg_dims(path)
        return w, h, {}
    if ext in {".psd", ".psb"}:
        return psd_dims(path)
    return None, None, {}


def production_code(name: str) -> str:
    match = re.search(r"(?:TS7?|TS_)?[_-]?(\d{3})", name, flags=re.I)
    return match.group(1) if match else ""


def clean_tokens(name: str) -> list[str]:
    stem = Path(name).stem
    stem = re.sub(r"^TS7?[_-]?\d{3}[_-]", "", stem, flags=re.I)
    stem = re.sub(r"^(CH|PR|BG|CC)[_-]", "", stem, flags=re.I)
    return [t for t in re.split(r"[^A-Za-z0-9]+", stem) if t]


def title_from_name(name: str) -> str:
    tokens = clean_tokens(name)
    drop = {"Color", "Colour", "Colo", "V01", "V02", "V03", "V04", "V05", "V1", "V2", "V3", "V4", "V5", "FULL", "Full", "copie", "Simplified", "simplified", "wip", "WIP"}
    kept = [t for t in tokens if t not in drop and not re.fullmatch(r"\d+", t)]
    return " ".join(kept).strip()


def asset_type(name: str, parent: str) -> str:
    low = name.lower()
    parent_low = parent.lower()
    if any(x in low for x in ["heads", "head", "closeup", "close-up", "mouth", "expression"]):
        return "head_expression_sheet"
    if any(x in low for x in ["turn", "turnaround", "charte", "chart"]):
        return "turnaround_sheet"
    if "lineup" in low or "lineupr" in low or "line_up" in parent_low:
        return "lineup_sheet"
    if any(x in low for x in ["outfit", "suit", "pyjama", "pajama", "dress", "wedding", "snowboard", "winter", "camping", "sportswear", "casual", "armor", "armors", "wingsuit", "moto", "training"]):
        return "outfit_sheet"
    if any(x in low for x in ["pose", "action", "hero"]):
        return "pose_sheet"
    if any(x in low for x in ["hand", "hands", "arm", "hair", "feet", "shoe", "boots", "necklace", "bracelet"]):
        return "detail_reference"
    return "character_design_asset"


def outfit_hint(name: str) -> str:
    title = title_from_name(name)
    low = title.lower()
    known = [
        "casual outfit", "spy suit", "spie", "spies", "snowboard", "winter", "camping", "wedding", "pyjama", "pajama", "sportswear", "race outfit", "training suit", "moto suit", "vintage", "reporter", "scientist", "bodyguard", "botanist", "luxury", "event outfit", "surf", "diving", "armor", "wingsuit", "prison", "villain outfit",
    ]
    for item in known:
        if item in low:
            return item
    if "outfit" in low or "suit" in low:
        return title
    return ""


def view_hint(name: str) -> str:
    low = name.lower()
    views = []
    for token in ["front", "back", "side", "profile", "turn", "full", "closeup", "close-up", "head", "hands", "hand", "body"]:
        if token in low:
            views.append(token.replace("close-up", "closeup"))
    return ",".join(dict.fromkeys(views))


def normalized_stem(path: Path) -> str:
    stem = path.stem.lower()
    stem = re.sub(r"\s+copie\b", "", stem)
    stem = re.sub(r"[_-](v\d+)$", "", stem)
    return stem


def training_priority(character: str, typ: str, ext: str, name: str) -> str:
    low = name.lower()
    if character in MAIN_PRIORITY and typ in {"turnaround_sheet", "head_expression_sheet", "outfit_sheet", "lineup_sheet"}:
        return "high"
    if character in SECONDARY_PRIORITY and typ in {"turnaround_sheet", "head_expression_sheet", "outfit_sheet"}:
        return "medium"
    if character in {"Guests", "Extras", "Animals", "Workers", "Monsters"}:
        return "low"
    if any(x in low for x in ["old-design", "old_design", "wip"]):
        return "eval_or_low"
    return "medium" if character in MAIN_PRIORITY else "low"


def caption(character: str, typ: str, title: str, outfit: str, production: str) -> str:
    pieces = [f"{character} from Totally Spies season 7"]
    if outfit:
        pieces.append(outfit)
    if typ:
        pieces.append(typ.replace("_", " "))
    if title and title.lower() not in " ".join(pieces).lower():
        pieces.append(title)
    if production:
        pieces.append(f"production code {production}")
    pieces.append("clean production colour reference")
    pieces.append("2D animated style")
    return ", ".join(pieces) + "."


def find_paired_exports(files: list[Path]) -> dict[Path, list[Path]]:
    by_key: dict[str, list[Path]] = defaultdict(list)
    for path in files:
        by_key[normalized_stem(path)].append(path)
    paired: dict[Path, list[Path]] = {}
    for path in files:
        if path.suffix.lower() not in {".psd", ".psb"}:
            continue
        key = normalized_stem(path)
        exports = [p for p in by_key.get(key, []) if p != path and p.suffix.lower() in {".jpg", ".jpeg", ".png"}]
        paired[path] = sorted(exports)
    return paired


def build(ch_root: Path) -> list[dict[str, Any]]:
    if not ch_root.exists():
        raise SystemExit(f"Missing character root: {ch_root}")
    files = sorted(p for p in ch_root.rglob("*") if p.is_file() and p.suffix.lower() in IMAGE_EXTS and p.name.lower() not in SKIP_NAMES and not p.name.startswith("._"))
    pairs = find_paired_exports(files)
    rows: list[dict[str, Any]] = []
    for path in files:
        rel_parts = path.relative_to(ch_root).parts
        if len(rel_parts) > 1:
            folder = rel_parts[0]
            character = CHARACTER_FOLDERS.get(folder, folder)
        else:
            folder = "01_CH_ROOT"
            low_name = path.name.lower()
            if "alex" in low_name:
                character = "Alex"
            elif "clover" in low_name:
                character = "Clover"
            elif "sam" in low_name:
                character = "Sam"
            else:
                character = "Lineup"
        ext = path.suffix.lower()
        w, h, extra = dims(path)
        prod = production_code(path.name)
        typ = asset_type(path.name, folder)
        title = title_from_name(path.name)
        outfit = outfit_hint(path.name)
        priority = training_priority(character, typ, ext, path.name)
        paired = pairs.get(path, []) if ext in {".psd", ".psb"} else []
        row: dict[str, Any] = {
            "character": character,
            "folder": folder,
            "source_path": rel(path),
            "filename": path.name,
            "extension": ext,
            "source_format": ext.lstrip("."),
            "size_bytes": path.stat().st_size,
            "size_human": file_size_human(path.stat().st_size),
            "width": w or "",
            "height": h or "",
            "production_code": prod,
            "asset_type": typ,
            "title_hint": title,
            "outfit_hint": outfit,
            "view_hint": view_hint(path.name),
            "paired_exports": [rel(p) for p in paired],
            "training_priority": priority,
            "caption": caption(character, typ, title, outfit, prod),
        }
        row.update(extra)
        rows.append(row)
    return rows


def write_json(rows: list[dict[str, Any]], output: Path) -> None:
    output.parent.mkdir(parents=True, exist_ok=True)
    payload = {
        "schema": "iiw_character_plates/v1",
        "source_root": "iiw-totallyspies/02_Elements/DESIGN/01_CH",
        "count": len(rows),
        "notes": [
            "Derived training images should be PNG, not JPEG, to preserve clean line art and flat colours.",
            "Do not over-weight design sheets in episode-frame/video training; use them as identity anchors.",
        ],
        "plates": rows,
    }
    output.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")


def write_csv(rows: list[dict[str, Any]], output: Path) -> None:
    output.parent.mkdir(parents=True, exist_ok=True)
    fieldnames = [
        "character", "folder", "source_path", "filename", "source_format", "size_bytes", "size_human", "width", "height", "production_code", "asset_type", "title_hint", "outfit_hint", "view_hint", "training_priority", "paired_exports", "caption"
    ]
    with output.open("w", newline="", encoding="utf-8") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames, extrasaction="ignore")
        writer.writeheader()
        writer.writerows(rows)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--character-root", type=Path, default=DEFAULT_CH_ROOT)
    parser.add_argument("--output-json", type=Path, default=DEFAULT_JSON)
    parser.add_argument("--output-csv", type=Path, default=DEFAULT_CSV)
    args = parser.parse_args()
    rows = build(args.character_root)
    write_json(rows, args.output_json)
    write_csv(rows, args.output_csv)
    by_character = defaultdict(int)
    for row in rows:
        by_character[row["character"]] += 1
    print(f"Wrote {args.output_json} ({len(rows)} plates)")
    print(f"Wrote {args.output_csv}")
    for character, count in sorted(by_character.items()):
        print(f"  {character}: {count}")


if __name__ == "__main__":
    main()