/**
 * VOX Cinemas scraper (Lebanon)
 *
 * Strategy:
 *  - Parse /showtimes pages (today + date filter lookahead) for movie cards and booking links.
 *  - Map cinema headings to configured canonical cinema IDs.
 *
 * Notes:
 *  - VOX renders showtimes server-side in <article class="movie-compare"> blocks.
 *  - The selected date is encoded as d=YYYYMMDD.
 */

import type { ScraperEnv, ScraperResult, ScrapedTitle, ScrapedShowtime } from '../types';
import { fetchWithTimeout } from '../utils/http';
import { parseDuration, stripHtml } from '../utils/parser';

const BASE = 'https://lbn.voxcinemas.com';
const HEADERS = {
  'User-Agent': 'Mozilla/5.0 (compatible; CultRoll/1.0)',
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
};

export interface VoxCinemaConfig {
  externalId: string;
  cinemaId: string;
  displayName?: string;
  display_name?: string;
}

export const DEFAULT_CINEMAS: VoxCinemaConfig[] = [
  { externalId: 'city-centre-beirut', cinemaId: 'vox-hazmieh', displayName: 'City Centre Beirut' },
];

interface VoxOpts {
  baseUrl?: string;
  cinemas?: VoxCinemaConfig[];
  lookaheadDays?: number;
}

interface VoxShowtimeMatch {
  cinemaName: string;
  format: string;
  bookingId: string;
  bookingUrl: string;
  displayTime: string;
}

export async function scrapeVoxCinemas(_env: ScraperEnv, opts?: VoxOpts): Promise<ScraperResult> {
  const baseUrl = (opts?.baseUrl ?? BASE).replace(/\/+$/, '');
  const cinemas = opts?.cinemas && opts.cinemas.length > 0 ? opts.cinemas : DEFAULT_CINEMAS;
  const lookaheadDays = clampLookahead(opts?.lookaheadDays);

  const titles = new Map<string, ScrapedTitle>();
  const showtimeMap = new Map<string, ScrapedShowtime>();

  const firstPage = await fetchText(`${baseUrl}/showtimes`);
  const dateKeys = discoverDateKeys(firstPage, lookaheadDays);

  for (const [idx, dateKey] of dateKeys.entries()) {
    let html = firstPage;
    if (idx > 0) {
      html = await fetchText(`${baseUrl}/showtimes?d=${dateKey}`);
    }
    parseShowtimesPage(html, dateKey, baseUrl, cinemas, titles, showtimeMap);
  }

  return {
    chainId: 'vox-cinemas',
    titles: Array.from(titles.values()),
    showtimes: Array.from(showtimeMap.values()),
  };
}

function parseShowtimesPage(
  html: string,
  dateKey: string,
  baseUrl: string,
  cinemas: VoxCinemaConfig[],
  titles: Map<string, ScrapedTitle>,
  showtimeMap: Map<string, ScrapedShowtime>
) {
  const articleRe = /<article[^>]*class="movie-compare"[^>]*data-slug="([^"]+)"[^>]*>([\s\S]*?)<\/article>/gi;
  let articleMatch: RegExpExecArray | null;
  while ((articleMatch = articleRe.exec(html)) !== null) {
    const slug = articleMatch[1].trim();
    const block = articleMatch[2];
    if (!slug) continue;

    const titleEn =
      extractText(block, /<h2[^>]*>([\s\S]*?)<\/h2>/i) ??
      humanizeSlug(slug);
    const rating = extractText(block, /<span[^>]*class="classification[^"]*"[^>]*>([\s\S]*?)<\/span>/i) ?? undefined;
    const heroImg = absolutizeUrl(
      extractAttr(block, /<img[^>]*class="hero"[^>]*src="([^"]+)"/i),
      baseUrl
    );
    const tagTexts = extractTagTexts(block);
    const duration = tagTexts
      .map((tag) => parseDuration(tag))
      .find((value): value is number => typeof value === 'number');
    const language = tagTexts.find((tag) => !/\d+\s*min/i.test(tag));

    const extractedShowtimes = extractMovieShowtimes(block, baseUrl);
    const hasShowtimes = extractedShowtimes.length > 0;

    const existing = titles.get(slug);
    if (!existing) {
      titles.set(slug, {
        chainMovieId: slug,
        title_en: titleEn,
        duration_min: duration,
        exhibitor_rating: rating,
        poster_url: heroImg,
        language: language?.toLowerCase(),
        is_coming_soon: !hasShowtimes,
      });
    } else {
      existing.is_coming_soon = existing.is_coming_soon && !hasShowtimes;
      if (!existing.poster_url && heroImg) existing.poster_url = heroImg;
      if (!existing.exhibitor_rating && rating) existing.exhibitor_rating = rating;
      if (!existing.duration_min && duration) existing.duration_min = duration;
    }

    for (const raw of extractedShowtimes) {
      const cinemaId = resolveCinemaId(raw.cinemaName, cinemas);
      if (!cinemaId) continue;

      const showtime = toIsoShowtime(dateKey, raw.displayTime);
      if (!showtime) continue;

      const bookingUrl = absolutizeUrl(raw.bookingUrl, baseUrl);
      if (!bookingUrl) continue;

      const stId = `${cinemaId}-${slug}-${raw.bookingId || compactTime(showtime)}`;
      showtimeMap.set(stId, {
        id: stId,
        cinema_id: cinemaId,
        title_id: slug,
        showtime,
        screen_type: normalizeScreenType(raw.format),
        booking_url: bookingUrl,
      });
    }
  }
}

function extractMovieShowtimes(block: string, baseUrl: string): VoxShowtimeMatch[] {
  const matches: VoxShowtimeMatch[] = [];
  const sectionRe = /<h3[^>]*class="highlight"[^>]*>([\s\S]*?)<\/h3>([\s\S]*?)(?=<h3[^>]*class="highlight"|$)/gi;
  let sectionMatch: RegExpExecArray | null;
  while ((sectionMatch = sectionRe.exec(block)) !== null) {
    const cinemaName = stripHtml(sectionMatch[1]);
    const sectionHtml = sectionMatch[2];

    let foundFormatRows = false;
    const formatRe = /<li>\s*(?:<strong>([\s\S]*?)<\/strong>)?\s*(?:<ol>([\s\S]*?)<\/ol>)?\s*<\/li>/gi;
    let formatMatch: RegExpExecArray | null;
    while ((formatMatch = formatRe.exec(sectionHtml)) !== null) {
      const nestedList = formatMatch[2];
      if (!nestedList) continue;
      foundFormatRows = true;
      const format = stripHtml(formatMatch[1] ?? '');
      extractShowtimeLinks(nestedList, cinemaName, format, baseUrl, matches);
    }

    if (!foundFormatRows) {
      extractShowtimeLinks(sectionHtml, cinemaName, '', baseUrl, matches);
    }
  }

  return matches;
}

function extractShowtimeLinks(
  html: string,
  cinemaName: string,
  format: string,
  baseUrl: string,
  out: VoxShowtimeMatch[]
) {
  const timeRe = /<li[^>]*?(?:data-id="([^"]+)")?[^>]*>\s*<a[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>\s*<\/li>/gi;
  let timeMatch: RegExpExecArray | null;
  while ((timeMatch = timeRe.exec(html)) !== null) {
    const bookingUrl = absolutizeUrl(timeMatch[2], baseUrl);
    if (!bookingUrl) continue;
    const bookingId = (timeMatch[1] ?? '').trim() || extractBookingId(bookingUrl);
    const displayTime = stripHtml(timeMatch[3]);
    if (!displayTime) continue;
    out.push({
      cinemaName,
      format,
      bookingId,
      bookingUrl,
      displayTime,
    });
  }
}

function discoverDateKeys(html: string, lookaheadDays: number): string[] {
  const keys: string[] = [];
  const seen = new Set<string>();

  const selected = html.match(/name="d"\s+value="(\d{8})"/i)?.[1];
  if (selected) {
    keys.push(selected);
    seen.add(selected);
  }

  const re = /\/showtimes\?d(?:=|&#x3D;)(\d{8})/gi;
  let m: RegExpExecArray | null;
  while ((m = re.exec(html)) !== null) {
    const key = m[1];
    if (!seen.has(key)) {
      keys.push(key);
      seen.add(key);
    }
    if (keys.length >= lookaheadDays) break;
  }

  if (keys.length === 0) {
    keys.push(formatDateKey(new Date()));
  }

  return keys.slice(0, lookaheadDays);
}

async function fetchText(url: string): Promise<string> {
  const resp = await fetchWithTimeout(url, { headers: HEADERS }, {
    resource: `VOX ${url}`,
  });
  if (!resp.ok) {
    throw new Error(`VOX fetch failed (${resp.status}) for ${url}`);
  }
  return resp.text();
}

function extractTagTexts(block: string): string[] {
  const tags: string[] = [];
  const re = /<span[^>]*class="tag"[^>]*>([\s\S]*?)<\/span>/gi;
  let m: RegExpExecArray | null;
  while ((m = re.exec(block)) !== null) {
    const text = stripHtml(m[1]);
    if (text) tags.push(text);
  }
  return tags;
}

function extractText(html: string, re: RegExp): string | null {
  const m = html.match(re);
  if (!m) return null;
  const text = stripHtml(m[1]);
  return text || null;
}

function extractAttr(html: string, re: RegExp): string | null {
  const m = html.match(re);
  return m?.[1] ?? null;
}

function absolutizeUrl(url: string | null | undefined, baseUrl: string): string | undefined {
  if (!url) return undefined;
  const normalized = url.replace(/&amp;/g, '&').trim();
  if (!normalized) return undefined;
  try {
    return new URL(normalized, baseUrl).toString();
  } catch {
    return undefined;
  }
}

function resolveCinemaId(cinemaName: string, cinemas: VoxCinemaConfig[]): string | undefined {
  if (cinemas.length === 1) return cinemas[0].cinemaId;
  const normalizedName = normalizeToken(cinemaName);
  for (const cinema of cinemas) {
    const candidates = [cinema.displayName, cinema.display_name, cinema.externalId]
      .filter((value): value is string => Boolean(value));
    for (const candidate of candidates) {
      const normalizedCandidate = normalizeToken(candidate);
      if (!normalizedCandidate) continue;
      if (
        normalizedName.includes(normalizedCandidate) ||
        normalizedCandidate.includes(normalizedName)
      ) {
        return cinema.cinemaId;
      }
    }
  }
  return undefined;
}

function normalizeToken(value: string): string {
  return value.toLowerCase().replace(/[^a-z0-9]+/g, ' ').replace(/\s+/g, ' ').trim();
}

function toIsoShowtime(dateKey: string, displayTime: string): string | null {
  const dateIso = dateKeyToIso(dateKey);
  if (!dateIso) return null;
  const m = displayTime.trim().match(/(\d{1,2}):(\d{2})\s*(am|pm)/i);
  if (!m) return null;
  let hh = parseInt(m[1], 10);
  const mm = m[2];
  const meridiem = m[3].toLowerCase();
  if (meridiem === 'pm' && hh < 12) hh += 12;
  if (meridiem === 'am' && hh === 12) hh = 0;
  return `${dateIso}T${String(hh).padStart(2, '0')}:${mm}:00`;
}

function normalizeScreenType(raw: string): string {
  const text = raw.toLowerCase();
  if (text.includes('imax')) return 'imax';
  if (text.includes('4dx')) return '4dx';
  if (text.includes('mx4d')) return 'mx4d';
  if (text.includes('vip') || text.includes('gold')) return 'vip';
  if (text.includes('3d')) return '3d';
  if (text.includes('dolby') || text.includes('atmos')) return 'dolby-atmos';
  return 'standard';
}

function dateKeyToIso(dateKey: string): string | null {
  if (!/^\d{8}$/.test(dateKey)) return null;
  return `${dateKey.slice(0, 4)}-${dateKey.slice(4, 6)}-${dateKey.slice(6, 8)}`;
}

function extractBookingId(bookingUrl: string): string {
  const m = bookingUrl.match(/\/booking\/([^/?#]+)/i);
  return m?.[1] ?? '';
}

function compactTime(showtime: string): string {
  return showtime.replace(/\D/g, '');
}

function clampLookahead(value: number | undefined): number {
  if (!Number.isFinite(value)) return 3;
  return Math.max(1, Math.min(7, Math.floor(value as number)));
}

function formatDateKey(date: Date): string {
  const year = date.getUTCFullYear();
  const month = String(date.getUTCMonth() + 1).padStart(2, '0');
  const day = String(date.getUTCDate()).padStart(2, '0');
  return `${year}${month}${day}`;
}

function humanizeSlug(slug: string): string {
  return slug
    .split('-')
    .filter(Boolean)
    .map((part) => part.charAt(0).toUpperCase() + part.slice(1))
    .join(' ');
}
