/** Title matcher: fuzzy title matching + ID generation */

import type { ScrapedTitle } from '../types';
import { slugify } from './parser.ts';

const TITLE_ARTICLE_RE = /^(the|a|an|al-|el-)\s+/i;
const TITLE_YEAR_SUFFIX_RE = /\s*\(\d{4}\)\s*$/;
const TITLE_LANGUAGE_SUFFIX_RE = /\s*\((english|arabic|french|dubbed|subtitled)\)\s*$/i;
const IMAGE_EXTENSION_RE = /\.(avif|gif|jpe?g|png|webp)$/i;
const IMAGE_NOISE_TOKEN_RE =
  /\b(backdrop|cover|film|hero|image|img|large|medium|movie|original|poster|public|small|thumb|thumbnail|upload|uploads)\b/g;
const IMDB_ID_RE = /^tt\d{5,}$/i;

interface TitleCandidateRow {
  id: string;
  title_en: string;
  title_ar?: string | null;
  title_original?: string | null;
  release_date?: string | null;
  imdb_id?: string | null;
  duration_min?: number | null;
  genre_raw?: string | null;
  poster_url?: string | null;
  cast_list?: string | null;
  confirmed_ref?: number | null;
}

type MatchMovieInput = Pick<
  ScrapedTitle,
  | 'title_en'
  | 'title_ar'
  | 'title_original'
  | 'genre'
  | 'duration_min'
  | 'poster_url'
  | 'imdb_id'
  | 'release_date'
> & {
  cast?: string[];
};

interface ScoredMovieCandidate {
  id: string;
  title_en: string;
  confidence: number;
  confirmed: boolean;
  signalCount: number;
  titleScore: number;
}

export interface MatchMovieResult {
  id: string;
  title_en: string;
  confidence: number;
  confirmed: boolean;
  signalCount: number;
}

function stripSuffix(title: string): string {
  return title
    .replace(TITLE_YEAR_SUFFIX_RE, '')
    .replace(TITLE_LANGUAGE_SUFFIX_RE, '')
    .trim();
}

function normalizeTitle(title: string): string {
  return stripSuffix(title)
    .toLowerCase()
    .replace(TITLE_ARTICLE_RE, '')
    .replace(/&/g, ' and ')
    .replace(/[^\p{L}\p{N}\s]/gu, ' ')
    .replace(/\s+/g, ' ')
    .trim();
}

function tokenizeTitle(title: string): string[] {
  return normalizeTitle(title)
    .split(' ')
    .map((token) => token.trim())
    .filter(Boolean);
}

function levenshtein(a: string, b: string): number {
  const m = a.length;
  const n = b.length;
  const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
  for (let i = 0; i <= m; i++) dp[i][0] = i;
  for (let j = 0; j <= n; j++) dp[0][j] = j;
  for (let i = 1; i <= m; i++) {
    for (let j = 1; j <= n; j++) {
      dp[i][j] = a[i - 1] === b[j - 1]
        ? dp[i - 1][j - 1]
        : 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
    }
  }
  return dp[m][n];
}

export function titleSimilarity(a: string, b: string): number {
  const na = normalizeTitle(a);
  const nb = normalizeTitle(b);
  if (na === nb) return 1.0;
  const maxLen = Math.max(na.length, nb.length);
  if (maxLen === 0) return 1.0;
  return 1 - levenshtein(na, nb) / maxLen;
}

function metaphoneToken(token: string): string {
  let value = token.toLowerCase();
  if (!value) return value;

  value = value
    .replace(/^[kgp]n/, 'n')
    .replace(/^wr/, 'r')
    .replace(/^x/, 's')
    .replace(/^wh/, 'w')
    .replace(/mb$/g, 'm')
    .replace(/ph/g, 'f')
    .replace(/ght/g, 't')
    .replace(/tch/g, 'ch')
    .replace(/dg(?=[eiy])/g, 'j')
    .replace(/c(?=[eiy])/g, 's')
    .replace(/ck/g, 'k')
    .replace(/q/g, 'k')
    .replace(/x/g, 'ks')
    .replace(/v/g, 'f')
    .replace(/z/g, 's')
    .replace(/sch/g, 'sk')
    .replace(/tion/g, 'shun');

  if (value.length > 1) {
    value = value[0] + value.slice(1).replace(/[aeiouy]/g, '');
  }

  return value.replace(/(.)\1+/g, '$1');
}

function titleMetaphoneSignature(title: string): string {
  return tokenizeTitle(title)
    .map((token) => metaphoneToken(token))
    .filter(Boolean)
    .join(' ');
}

export function titlePhoneticSimilarity(a: string, b: string): number {
  const left = titleMetaphoneSignature(a);
  const right = titleMetaphoneSignature(b);
  if (!left && !right) return 1.0;
  if (!left || !right) return 0;
  return titleSimilarity(left, right);
}

function titleTokenOverlap(a: string, b: string): number {
  const left = tokenizeTitle(a);
  const right = tokenizeTitle(b);
  if (!left.length && !right.length) return 1.0;
  if (!left.length || !right.length) return 0;

  const rightSet = new Set(right);
  const shared = left.filter((token) => rightSet.has(token)).length;
  return shared / Math.max(left.length, right.length);
}

function normalizeImdbId(value: string | null | undefined): string | null {
  if (!value) return null;
  const token = value.trim().toLowerCase();
  return IMDB_ID_RE.test(token) ? token : null;
}

export function sharedNameOverlapCount(
  left: string[] | string | null | undefined,
  right: string[] | string | null | undefined,
): number {
  const leftNames = normalizeNameList(left);
  const rightNames = normalizeNameList(right);
  if (!leftNames.length || !rightNames.length) return 0;
  const rightSet = new Set(rightNames);
  return leftNames.filter((name) => rightSet.has(name)).length;
}

export function genreOverlapCount(
  left: string[] | string | null | undefined,
  right: string[] | string | null | undefined,
): number {
  const leftGenres = normalizeGenreList(left);
  const rightGenres = normalizeGenreList(right);
  if (!leftGenres.length || !rightGenres.length) return 0;
  const rightSet = new Set(rightGenres);
  return leftGenres.filter((genre) => rightSet.has(genre)).length;
}

export function posterFingerprintSimilarity(
  left: string | null | undefined,
  right: string | null | undefined,
): number | null {
  const leftFingerprint = extractPosterFingerprint(left);
  const rightFingerprint = extractPosterFingerprint(right);
  if (!leftFingerprint || !rightFingerprint) return null;
  return titleSimilarity(leftFingerprint, rightFingerprint);
}

function normalizeNameList(value: string[] | string | null | undefined): string[] {
  const values = Array.isArray(value)
    ? value
    : typeof value === 'string'
      ? value.split(/[,/|;]+/g)
      : [];
  const seen = new Set<string>();
  const out: string[] = [];
  for (const raw of values) {
    const normalized = raw
      .toLowerCase()
      .replace(/[^\p{L}\p{N}\s]/gu, ' ')
      .replace(/\s+/g, ' ')
      .trim();
    if (!normalized || seen.has(normalized)) continue;
    seen.add(normalized);
    out.push(normalized);
  }
  return out;
}

function normalizeGenreList(value: string[] | string | null | undefined): string[] {
  const values = Array.isArray(value)
    ? value
    : typeof value === 'string'
      ? value.split(/[,/|;]+/g)
      : [];
  const seen = new Set<string>();
  const out: string[] = [];
  for (const raw of values) {
    const normalized = raw
      .toLowerCase()
      .replace(/&/g, ' and ')
      .replace(/[^\p{L}\p{N}\s]/gu, ' ')
      .replace(/\s+/g, ' ')
      .trim();
    if (!normalized || seen.has(normalized)) continue;
    seen.add(normalized);
    out.push(normalized);
  }
  return out;
}

function extractPosterFingerprint(value: string | null | undefined): string | null {
  if (!value) return null;

  let path = value.trim();
  try {
    path = new URL(path, 'https://cultroll.local').pathname;
  } catch {
    // keep the raw value as a best-effort path fragment
  }

  const tail = decodeURIComponent(path.split('/').filter(Boolean).pop() ?? path);
  const normalized = tail
    .toLowerCase()
    .replace(IMAGE_EXTENSION_RE, '')
    .replace(/[._-]+/g, ' ')
    .replace(/\b\d{2,4}x\d{2,4}\b/g, ' ')
    .replace(IMAGE_NOISE_TOKEN_RE, ' ')
    .replace(/\s+/g, ' ')
    .trim();

  return normalized || null;
}

function extractReleaseYear(value: string | null | undefined): number | null {
  if (!value) return null;
  const year = parseInt(value.slice(0, 4), 10);
  return Number.isFinite(year) ? year : null;
}

function clampScore(value: number): number {
  if (value < 0) return 0;
  if (value > 1) return 1;
  return value;
}

function collectTitleVariants(...values: Array<string | null | undefined>): string[] {
  const out: string[] = [];
  const seen = new Set<string>();
  for (const value of values) {
    const trimmed = value?.trim();
    if (!trimmed) continue;
    const key = trimmed.toLowerCase();
    if (seen.has(key)) continue;
    seen.add(key);
    out.push(trimmed);
  }
  return out;
}

function buildTitleProbe(title: string): { phrase: string; token: string } {
  const normalized = normalizeTitle(title);
  const tokens = tokenizeTitle(title);
  const token = tokens.find((value) => value.length >= 4) ?? tokens[0] ?? '';
  return {
    phrase: normalized.slice(0, 18),
    token,
  };
}

function scoreCandidateMatch(
  source: MatchMovieInput,
  candidate: TitleCandidateRow,
  threshold: number,
): ScoredMovieCandidate | null {
  const sourceVariants = collectTitleVariants(source.title_en, source.title_ar, source.title_original);
  const candidateVariants = collectTitleVariants(
    candidate.title_en,
    candidate.title_ar ?? null,
    candidate.title_original ?? null,
  );

  let bestSimilarity = 0;
  let bestPhonetic = 0;
  let bestTokenOverlap = 0;

  for (const sourceTitle of sourceVariants) {
    for (const candidateTitle of candidateVariants) {
      bestSimilarity = Math.max(bestSimilarity, titleSimilarity(sourceTitle, candidateTitle));
      bestPhonetic = Math.max(bestPhonetic, titlePhoneticSimilarity(sourceTitle, candidateTitle));
      bestTokenOverlap = Math.max(bestTokenOverlap, titleTokenOverlap(sourceTitle, candidateTitle));
    }
  }

  if (bestSimilarity < 0.76 && bestPhonetic < 0.84) return null;

  let score = bestSimilarity * 0.76 + bestPhonetic * 0.18 + bestTokenOverlap * 0.06;
  let signalCount = 0;

  const sourceImdbId = normalizeImdbId(source.imdb_id);
  const candidateImdbId = normalizeImdbId(candidate.imdb_id);
  if (sourceImdbId && candidateImdbId) {
    if (sourceImdbId === candidateImdbId) {
      return {
        id: candidate.id,
        title_en: candidate.title_en,
        confidence: 1,
        confirmed: true,
        signalCount: 4,
        titleScore: Math.max(bestSimilarity, bestPhonetic),
      };
    }
    return null;
  }

  const sourceYear = extractReleaseYear(source.release_date);
  const candidateYear = extractReleaseYear(candidate.release_date ?? null);
  if (sourceYear && candidateYear) {
    const diff = Math.abs(sourceYear - candidateYear);
    if (diff > 1) return null;
    if (diff === 0) {
      score += 0.08;
      signalCount += 1;
    } else if (bestSimilarity >= 0.94 || bestPhonetic >= 0.95) {
      score += 0.02;
      signalCount += 1;
    } else {
      return null;
    }
  }

  if (
    Number.isFinite(source.duration_min) &&
    Number.isFinite(candidate.duration_min) &&
    source.duration_min &&
    candidate.duration_min
  ) {
    const runtimeDiff = Math.abs(source.duration_min - candidate.duration_min);
    if (runtimeDiff > 35) return null;
    if (runtimeDiff <= 5) {
      score += 0.08;
      signalCount += 1;
    } else if (runtimeDiff <= 12) {
      score += 0.04;
      signalCount += 1;
    } else if (runtimeDiff <= 20) {
      score += 0.01;
    } else {
      score -= 0.08;
    }
  }

  const genreOverlap = genreOverlapCount(source.genre ?? null, candidate.genre_raw ?? null);
  const sourceGenres = normalizeGenreList(source.genre ?? null);
  const candidateGenres = normalizeGenreList(candidate.genre_raw ?? null);
  if (sourceGenres.length && candidateGenres.length) {
    if (genreOverlap > 0) {
      score += 0.04;
      signalCount += 1;
    } else {
      score -= 0.08;
    }
  }

  const castOverlap = sharedNameOverlapCount(source.cast ?? null, candidate.cast_list ?? null);
  const sourceCast = normalizeNameList(source.cast ?? null);
  const candidateCast = normalizeNameList(candidate.cast_list ?? null);
  if (castOverlap > 0) {
    score += Math.min(0.08, castOverlap * 0.04);
    signalCount += 1;
  } else if (sourceCast.length >= 2 && candidateCast.length >= 2) {
    score -= 0.04;
  }

  const posterSimilarity = posterFingerprintSimilarity(source.poster_url ?? null, candidate.poster_url ?? null);
  if (posterSimilarity !== null) {
    if (posterSimilarity >= 0.9) {
      score += 0.03;
      signalCount += 1;
    } else if (posterSimilarity <= 0.32) {
      score -= 0.03;
    }
  }

  if (candidate.confirmed_ref) {
    score += 0.02;
  }

  score = clampScore(score);

  const sourceSignalCount = [
    source.release_date,
    source.duration_min,
    source.genre,
    source.poster_url,
    source.imdb_id,
    source.cast?.length ? 'cast' : null,
  ].filter(Boolean).length;
  const minimumScore = sourceSignalCount >= 2 ? threshold : Math.max(threshold, 0.88);

  if (score < minimumScore) return null;
  if (sourceSignalCount >= 2 && signalCount === 0 && bestSimilarity < 0.92) return null;

  const confirmed = score >= 0.93 && (signalCount >= 2 || bestSimilarity >= 0.97);

  return {
    id: candidate.id,
    title_en: candidate.title_en,
    confidence: score,
    confirmed,
    signalCount,
    titleScore: bestSimilarity,
  };
}

export async function matchMovie(
  db: D1Database,
  title: MatchMovieInput,
  threshold = 0.82,
): Promise<MatchMovieResult | null> {
  const cleanTitle = stripSuffix(title.title_en);
  const sourceImdbId = normalizeImdbId(title.imdb_id);

  if (sourceImdbId) {
    const direct = await db
      .prepare(
        `SELECT id, title_en, title_ar, title_original, release_date, imdb_id, duration_min, genre_raw, poster_url, cast_list,
                CASE
                  WHEN EXISTS(
                    SELECT 1 FROM title_chain_refs refs
                    WHERE refs.title_id = titles.id AND refs.confirmed = 1
                  ) THEN 1
                  ELSE 0
                END AS confirmed_ref
           FROM titles
          WHERE imdb_id = ?
          LIMIT 5`,
      )
      .bind(sourceImdbId)
      .all<TitleCandidateRow>();

    const directMatches = (direct.results ?? [])
      .map((candidate) => scoreCandidateMatch(title, candidate, threshold))
      .filter((candidate): candidate is ScoredMovieCandidate => candidate !== null)
      .sort((a, b) => b.confidence - a.confidence || b.signalCount - a.signalCount);

    if (directMatches[0]) {
      const best = directMatches[0];
      return {
        id: best.id,
        title_en: best.title_en,
        confidence: best.confidence,
        confirmed: true,
        signalCount: best.signalCount,
      };
    }
  }

  const probe = buildTitleProbe(cleanTitle);
  if (!probe.phrase && !probe.token) return null;

  const phraseLike = `%${probe.phrase || cleanTitle}%`;
  const tokenLike = probe.token ? `%${probe.token}%` : '';
  const rows = await db
    .prepare(
      `SELECT id, title_en, title_ar, title_original, release_date, imdb_id, duration_min, genre_raw, poster_url, cast_list,
              CASE
                WHEN EXISTS(
                  SELECT 1 FROM title_chain_refs refs
                  WHERE refs.title_id = titles.id AND refs.confirmed = 1
                ) THEN 1
                ELSE 0
              END AS confirmed_ref
         FROM titles
        WHERE status IN ('now_playing', 'coming_soon')
          AND (
            title_en LIKE ? OR title_ar LIKE ? OR title_original LIKE ?
            OR (? <> '' AND (title_en LIKE ? OR title_ar LIKE ? OR title_original LIKE ?))
          )
          AND (
            release_date IS NULL OR
            CAST(substr(release_date, 1, 4) AS INTEGER) >= CAST(strftime('%Y', 'now') AS INTEGER) - 3
          )
        LIMIT 25`,
    )
    .bind(phraseLike, phraseLike, phraseLike, probe.token, tokenLike, tokenLike, tokenLike)
    .all<TitleCandidateRow>();

  const scored = (rows.results ?? [])
    .map((candidate) => scoreCandidateMatch(title, candidate, threshold))
    .filter((candidate): candidate is ScoredMovieCandidate => candidate !== null)
    .sort((a, b) =>
      b.confidence - a.confidence ||
      b.signalCount - a.signalCount ||
      Number(b.confirmed) - Number(a.confirmed) ||
      b.titleScore - a.titleScore,
    );

  const best = scored[0];
  if (!best) return null;

  const runnerUp = scored[1];
  const sourceSignalCount = [
    title.release_date,
    title.duration_min,
    title.genre,
    title.poster_url,
    title.imdb_id,
    title.cast?.length ? 'cast' : null,
  ].filter(Boolean).length;

  if (runnerUp && sourceSignalCount === 0 && best.confidence - runnerUp.confidence < 0.05) {
    return null;
  }
  if (
    runnerUp &&
    best.confidence - runnerUp.confidence < 0.03 &&
    best.signalCount <= runnerUp.signalCount &&
    !best.confirmed
  ) {
    return null;
  }

  return {
    id: best.id,
    title_en: best.title_en,
    confidence: best.confidence,
    confirmed: best.confirmed,
    signalCount: best.signalCount,
  };
}

/** Merge titles sharing the same tmdb_id after enrichment. Returns count of duplicates removed. */
export async function deduplicateByTmdbId(db: D1Database): Promise<number> {
  const groups = await db.prepare(
    `SELECT tmdb_id, COUNT(*) as cnt, MIN(rowid) as canonical_rowid
     FROM titles WHERE tmdb_id IS NOT NULL
     GROUP BY tmdb_id HAVING cnt > 1`,
  ).all<{ tmdb_id: string; cnt: number; canonical_rowid: number }>();

  if (!groups.results.length) return 0;

  let removed = 0;
  for (const group of groups.results) {
    const candidates = await db.prepare(
      `SELECT id,
        (CASE WHEN title_ar IS NOT NULL THEN 1 ELSE 0 END +
         CASE WHEN synopsis_en IS NOT NULL THEN 1 ELSE 0 END +
         CASE WHEN poster_url IS NOT NULL THEN 1 ELSE 0 END +
         CASE WHEN director IS NOT NULL THEN 1 ELSE 0 END) as completeness
       FROM titles WHERE tmdb_id = ?
       ORDER BY completeness DESC, id ASC`,
    ).bind(group.tmdb_id).all<{ id: string; completeness: number }>();

    if (candidates.results.length < 2) continue;
    const [canonical, ...duplicates] = candidates.results;
    const dupIds = duplicates.map((duplicate) => duplicate.id);

    for (const dupId of dupIds) {
      await db.prepare(`UPDATE title_chain_refs SET title_id = ? WHERE title_id = ?`).bind(canonical.id, dupId).run();
      await db.prepare(`UPDATE showtimes SET title_id = ? WHERE title_id = ?`).bind(canonical.id, dupId).run();
    }

    const placeholders = dupIds.map(() => '?').join(',');
    await db.prepare(`DELETE FROM titles WHERE id IN (${placeholders})`).bind(...dupIds).run();
    removed += dupIds.length;
    console.log(`dedup: merged ${dupIds.join(', ')} → ${canonical.id} (tmdb_id=${group.tmdb_id})`);
  }
  return removed;
}

export function generateMovieId(title: string): string {
  return slugify(title);
}
