#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import shutil
import subprocess
import tempfile
from pathlib import Path
from typing import Any


def run(cmd: list[str]) -> None:
    subprocess.run(cmd, check=True)


def extract_audio(source_video: Path, wav_path: Path) -> None:
    run([
        "ffmpeg", "-y",
        "-i", str(source_video),
        "-ac", "1",
        "-ar", "16000",
        str(wav_path),
    ])


def load_json_relaxed(path: Path) -> dict[str, Any]:
    return json.loads(path.read_text(errors="replace"))


def load_corrections(path: Path | None) -> dict[str, Any]:
    if not path or not path.exists():
        return {"replacements": {}, "segment_overrides": []}
    return json.loads(path.read_text())


def apply_corrections(segments: list[dict[str, Any]], corrections: dict[str, Any]) -> list[dict[str, Any]]:
    replacements: dict[str, str] = corrections.get("replacements", {})
    overrides: list[dict[str, Any]] = corrections.get("segment_overrides", [])

    corrected = []
    for segment in segments:
        text = segment["text"]
        for old, new in replacements.items():
            text = text.replace(old, new)

        new_segment = {**segment, "text": text.strip()}
        for override in overrides:
            if abs(float(override.get("start", -9999)) - float(new_segment["start"])) < 0.05:
                if "text" in override:
                    new_segment["text"] = str(override["text"]).strip()
        corrected.append(new_segment)
    return corrected


def normalize_segments(raw: dict[str, Any]) -> list[dict[str, Any]]:
    segments = []
    for seg in raw.get("transcription", []):
        text = str(seg.get("text", "")).strip()
        if not text:
            continue
        offsets = seg.get("offsets", {})
        segments.append({
            "start": round(float(offsets.get("from", 0)) / 1000.0, 3),
            "end": round(float(offsets.get("to", 0)) / 1000.0, 3),
            "text": text,
        })
    return segments


def main() -> None:
    parser = argparse.ArgumentParser(description="Transcribe source video audio locally with whisper.cpp")
    parser.add_argument("--source-video", required=True, type=Path)
    parser.add_argument("--model-path", required=True, type=Path)
    parser.add_argument("--output", required=True, type=Path)
    parser.add_argument("--language", default="en")
    parser.add_argument("--threads", type=int, default=4)
    parser.add_argument("--corrections", type=Path)
    args = parser.parse_args()

    if not args.source_video.exists():
        raise SystemExit(f"Source video not found: {args.source_video}")
    if not args.model_path.exists():
        raise SystemExit(f"Model not found: {args.model_path}")

    with tempfile.TemporaryDirectory() as tmp:
        tmpdir = Path(tmp)
        wav_path = tmpdir / "audio.wav"
        out_prefix = tmpdir / "transcript"
        extract_audio(args.source_video, wav_path)
        run([
            "whisper-cli",
            "-m", str(args.model_path),
            "-f", str(wav_path),
            "-l", args.language,
            "-t", str(args.threads),
            "-ojf",
            "-of", str(out_prefix),
            "-np",
        ])
        raw = load_json_relaxed(out_prefix.with_suffix(".json"))

    segments = normalize_segments(raw)
    corrections = load_corrections(args.corrections)
    segments = apply_corrections(segments, corrections)
    full_text = " ".join(seg["text"] for seg in segments).strip()

    payload = {
        "source_video": args.source_video.name,
        "language": args.language,
        "model_path": str(args.model_path),
        "segment_count": len(segments),
        "full_text": full_text,
        "segments": segments,
    }
    args.output.parent.mkdir(parents=True, exist_ok=True)
    args.output.write_text(json.dumps(payload, indent=2) + "\n")
    print(f"Wrote transcript to {args.output}")


if __name__ == "__main__":
    main()
