#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import re
from pathlib import Path
from typing import Any


def overlap(a0: float, a1: float, b0: float, b1: float) -> bool:
    return max(a0, b0) < min(a1, b1)


def build_matchers(story_bible: dict[str, Any]) -> list[dict[str, Any]]:
    seen = set()
    matchers = []
    for entry in story_bible.get("alias_index", []):
        alias = entry["alias"]
        alias_lower = entry["alias_lower"]
        if not alias_lower or alias_lower in seen:
            continue
        seen.add(alias_lower)
        matchers.append({
            **entry,
            "pattern": re.compile(rf"(?<![A-Za-z0-9]){re.escape(alias_lower)}(?![A-Za-z0-9])")
        })
    return matchers


def match_entities(text: str, matchers: list[dict[str, Any]]) -> list[dict[str, Any]]:
    lowered = text.lower()
    found = []
    seen = set()
    for matcher in matchers:
        if matcher["pattern"].search(lowered):
            key = (matcher["bucket"], matcher["id"])
            if key in seen:
                continue
            seen.add(key)
            found.append({
                "bucket": matcher["bucket"],
                "id": matcher["id"],
                "name": matcher["name"],
                "alias": matcher["alias"],
                "evidence": "transcript",
            })
    return found


def build_context_summary(story_bible: dict[str, Any], transcript_excerpt: str, entities: list[dict[str, Any]]) -> list[str]:
    lines = list(story_bible.get("prompt_summary", []))
    if transcript_excerpt:
        lines.append(f"Transcript excerpt: {transcript_excerpt}")
    if entities:
        lines.append(
            "Candidate canon entities: " + ", ".join(f"{e['name']} ({e['bucket']})" for e in entities)
        )
    return lines


def main() -> None:
    parser = argparse.ArgumentParser(description="Attach transcript and canon context to each clip")
    parser.add_argument("--training-data-dir", required=True, type=Path)
    parser.add_argument("--transcript", required=True, type=Path)
    parser.add_argument("--story-bible", required=True, type=Path)
    parser.add_argument("--shot-reference", required=True, type=Path)
    parser.add_argument("--output", required=True, type=Path)
    args = parser.parse_args()

    manifest_path = args.training_data_dir / "manifest.json"
    manifest = json.loads(manifest_path.read_text())
    transcript = json.loads(args.transcript.read_text())
    story_bible = json.loads(args.story_bible.read_text())
    shot_reference = json.loads(args.shot_reference.read_text())
    matchers = build_matchers(story_bible)

    scene_context = {
        "story_bible": str(args.story_bible),
        "shot_reference": str(args.shot_reference),
        "transcript": str(args.transcript),
        "clips": [],
    }

    for clip in manifest.get("clips", []):
        start = float(clip.get("start", 0.0))
        end = start + float(clip.get("duration", 0.0))
        segments = [
            seg for seg in transcript.get("segments", [])
            if overlap(start, end, float(seg.get("start", 0.0)), float(seg.get("end", 0.0)))
        ]
        transcript_excerpt = " ".join(seg["text"].strip() for seg in segments if seg.get("text")).strip()
        entities = match_entities(transcript_excerpt, matchers)
        context_summary = build_context_summary(story_bible, transcript_excerpt, entities)
        clip_context = {
            "clip": clip["clip"],
            "start": start,
            "end": round(end, 3),
            "transcript_excerpt": transcript_excerpt,
            "transcript_segments": segments,
            "candidate_entities": entities,
            "story_context": context_summary,
            "shot_prompt_summary": shot_reference.get("prompt_summary", []),
        }
        scene_context["clips"].append(clip_context)

        clip["transcript"] = transcript_excerpt
        clip["candidate_entities"] = entities
        clip["story_context"] = context_summary
        clip["shot_prompt_summary"] = shot_reference.get("prompt_summary", [])

    manifest_path.write_text(json.dumps(manifest, indent=2) + "\n")
    args.output.parent.mkdir(parents=True, exist_ok=True)
    args.output.write_text(json.dumps(scene_context, indent=2) + "\n")
    print(f"Wrote scene context to {args.output}")


if __name__ == "__main__":
    main()
