#!/usr/bin/env python3
from __future__ import annotations

import argparse
import csv
import json
import os
import re
import shutil
import subprocess
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
SOURCE_EPISODES_ROOT = ROOT / 'materials/source-episodes'
INGESTED_ROOT = SOURCE_EPISODES_ROOT / 'ingested'
PROXIES_ROOT = SOURCE_EPISODES_ROOT / 'proxies'
INVENTORY = ROOT / 'docs/internal/source-episode-inventory.csv'
SOURCE_ENV = 'TOTALLY_SPIES_EPISODE_SOURCE_ROOT'
VIDEO_EXTENSIONS = {'.mp4', '.mov', '.m4v', '.mkv'}
FIELDS = [
    'episode_id',
    'source_basename',
    'source_rel_path',
    'local_rel_path',
    'video_codec',
    'resolution',
    'fps',
    'duration_seconds',
    'frame_count',
    'size_bytes',
    'ingest_status',
    'notes',
]


def ensure_structure() -> None:
    for path in (INGESTED_ROOT, PROXIES_ROOT, INVENTORY.parent):
        path.mkdir(parents=True, exist_ok=True)


def slugify(name: str) -> str:
    value = re.sub(r'[^a-zA-Z0-9]+', '-', name.strip().lower()).strip('-')
    return value or 'episode'



def collect_videos(root: Path) -> list[Path]:
    return sorted(
        path for path in root.rglob('*')
        if path.is_file() and not path.name.startswith('.') and path.suffix.lower() in VIDEO_EXTENSIONS
    )



def ffprobe(path: Path) -> dict:
    result = subprocess.run(
        [
            'ffprobe',
            '-v', 'error',
            '-show_entries',
            'stream=codec_name,width,height,r_frame_rate,nb_frames:format=duration,size,bit_rate',
            '-of', 'json',
            str(path),
        ],
        check=True,
        capture_output=True,
        text=True,
    )
    return json.loads(result.stdout)



def fps(value: str) -> str:
    if not value:
        return ''
    if '/' not in value:
        return value
    num, den = value.split('/', 1)
    if den == '0':
        return value
    return f'{float(num) / float(den):.2f}'



def copy_from_source(source_root: Path) -> int:
    source_root = source_root.expanduser().resolve()
    ingested_root = INGESTED_ROOT.resolve()
    if source_root == ingested_root:
        raise SystemExit('Source root must not be materials/source-episodes/ingested')

    videos = collect_videos(source_root)
    if not videos:
        raise SystemExit(f'No video files found under {source_root}')

    copied = 0
    for src in videos:
        dst = INGESTED_ROOT / src.relative_to(source_root)
        dst.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src, dst)
        copied += 1
    return copied



def inventory_rows() -> list[dict[str, str]]:
    rows: list[dict[str, str]] = []
    for path in collect_videos(INGESTED_ROOT):
        payload = ffprobe(path)
        stream = payload.get('streams', [{}])[0]
        fmt = payload.get('format', {})
        rows.append({
            'episode_id': slugify(path.stem),
            'source_basename': path.name,
            'source_rel_path': path.relative_to(INGESTED_ROOT).as_posix(),
            'local_rel_path': path.relative_to(ROOT).as_posix(),
            'video_codec': str(stream.get('codec_name', '')),
            'resolution': f"{stream.get('width', '')}x{stream.get('height', '')}",
            'fps': fps(str(stream.get('r_frame_rate', ''))),
            'duration_seconds': f"{float(fmt.get('duration', 0.0)):.2f}",
            'frame_count': str(stream.get('nb_frames', '')),
            'size_bytes': str(fmt.get('size', '')),
            'ingest_status': 'ingested',
            'notes': '',
        })
    return rows



def write_inventory(rows: list[dict[str, str]], output: Path) -> None:
    output.parent.mkdir(parents=True, exist_ok=True)
    with output.open('w', newline='', encoding='utf-8') as handle:
        writer = csv.DictWriter(handle, fieldnames=FIELDS)
        writer.writeheader()
        writer.writerows(rows)



def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument('--source-root', type=Path, help=f'Optional owned-episode source root. Defaults to ${SOURCE_ENV} when set.')
    parser.add_argument('--inventory-output', type=Path, default=INVENTORY)
    parser.add_argument('--check', action='store_true')
    args = parser.parse_args()

    ensure_structure()

    source_root = args.source_root
    if source_root is None:
        raw_source = os.environ.get(SOURCE_ENV, '').strip()
        source_root = Path(raw_source).expanduser() if raw_source else None

    copied = 0
    if source_root is not None and not args.check:
        copied = copy_from_source(source_root)

    rows = inventory_rows()
    if args.check:
        print(f'OK: {len(rows)} ingested episode files')
        return

    write_inventory(rows, args.inventory_output)
    print(f'Wrote {args.inventory_output} ({len(rows)} rows, copied {copied} files)')


if __name__ == '__main__':
    main()
