#!/usr/bin/env python3
from __future__ import annotations

import argparse
import csv
import subprocess
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
REQUESTS = ROOT / 'docs/internal/reference-extraction-requests.csv'
REQUIRED_FIELDS = [
    'request_id',
    'status',
    'episode_rel_path',
    'asset_category',
    'asset_family',
    'asset_name',
    'timestamp',
    'output_rel_dir',
    'filename_stub',
    'notes',
]
READY_STATUSES = {'pending', 'queued', 'ready'}



def load_requests(path: Path) -> list[dict[str, str]]:
    with path.open(newline='', encoding='utf-8') as handle:
        reader = csv.DictReader(handle)
        missing = [field for field in REQUIRED_FIELDS if field not in (reader.fieldnames or [])]
        if missing:
            raise SystemExit(f'Missing CSV columns in {path}: {", ".join(missing)}')
        return list(reader)



def selected_requests(rows: list[dict[str, str]], request_ids: set[str]) -> list[dict[str, str]]:
    selected: list[dict[str, str]] = []
    for row in rows:
        request_id = row['request_id'].strip()
        if request_ids and request_id not in request_ids:
            continue
        if row['status'].strip().lower() in READY_STATUSES:
            selected.append(row)
    return selected



def safe_timestamp(value: str) -> str:
    return value.strip().replace(':', '-').replace('.', '_')



def output_path(row: dict[str, str]) -> Path:
    output_dir = ROOT / row['output_rel_dir'].strip()
    stub = row['filename_stub'].strip() or row['request_id'].strip()
    return output_dir / f"{stub}_{safe_timestamp(row['timestamp'])}.png"



def validate_request(row: dict[str, str]) -> None:
    episode_path = ROOT / row['episode_rel_path'].strip()
    if not episode_path.exists():
        raise SystemExit(f'Missing episode file for request {row["request_id"]}: {episode_path}')
    if not row['timestamp'].strip():
        raise SystemExit(f'Missing timestamp for request {row["request_id"]}')
    if not row['output_rel_dir'].strip():
        raise SystemExit(f'Missing output_rel_dir for request {row["request_id"]}')



def extract_frame(row: dict[str, str], overwrite: bool) -> Path:
    validate_request(row)
    episode_path = ROOT / row['episode_rel_path'].strip()
    destination = output_path(row)
    destination.parent.mkdir(parents=True, exist_ok=True)
    cmd = [
        'ffmpeg',
        '-v', 'error',
        '-y' if overwrite else '-n',
        '-ss', row['timestamp'].strip(),
        '-i', str(episode_path),
        '-frames:v', '1',
        str(destination),
    ]
    subprocess.run(cmd, check=True)
    return destination



def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument('--requests', type=Path, default=REQUESTS)
    parser.add_argument('--request-id', action='append', default=[])
    parser.add_argument('--overwrite', action='store_true')
    parser.add_argument('--check', action='store_true')
    args = parser.parse_args()

    rows = load_requests(args.requests)
    selected = selected_requests(rows, set(args.request_id))

    if args.check:
        for row in selected:
            validate_request(row)
        print(f'OK: {len(selected)} queued extraction requests')
        return

    if not selected:
        print('No pending extraction requests.')
        return

    for row in selected:
        destination = extract_frame(row, overwrite=args.overwrite)
        print(f'Wrote {destination.relative_to(ROOT)}')


if __name__ == '__main__':
    main()
