/**
 * Evidence Collector Agent
 * 
 * Automatically ingests new content, media, and evidence into the database.
 * Handles media downloads, archiving, and metadata extraction.
 * 
 * Usage:
 *   agent-collector -- --investigation <id> --auto
 */

import { Type } from "@sinclair/typebox";
import { randomUUID } from "node:crypto";
import { writeFile, mkdir } from "node:fs/promises";
import { join, dirname } from "node:path";
import { query } from "../db.js";
import type { Content, Media } from "../types.js";

// ─── CLI Arguments ──────────────────────────────────────────────────────────

interface CollectorArgs {
  investigation: string;
  auto: boolean;
  checkInterval: number;
  downloadMedia: boolean;
  archiveHttp: boolean;
}

function parseArgs(): CollectorArgs {
  const args = process.argv.slice(2);
  const config: CollectorArgs = {
    investigation: "",
    auto: false,
    checkInterval: 10,
    downloadMedia: true,
    archiveHttp: true,
  };

  for (let i = 0; i < args.length; i++) {
    switch (args[i]) {
      case "--investigation":
      case "-i":
        config.investigation = args[++i];
        break;
      case "--auto":
        config.auto = true;
        break;
      case "--interval":
        config.checkInterval = parseInt(args[++i], 10);
        break;
      case "--media":
        config.downloadMedia = args[++i] === "true";
        break;
      case "--archive":
        config.archiveHttp = args[++i] === "true";
        break;
    }
  }

  if (!config.investigation) {
    console.error("Error: --investigation <id> is required");
    process.exit(1);
  }

  return config;
}

// ─── Evidence Collection ────────────────────────────────────────────────────

interface UnprocessedContent {
  id: string;
  type: Content["type"];
  entity_id: string;
  text?: string;
  url?: string;
  raw_json?: Record<string, unknown>;
  collected_at: Date;
  processed: boolean;
}

async function fetchUnprocessedContent(
  investigationId: string,
  limit: number
): Promise<UnprocessedContent[]> {
  const result = await query(
    `SELECT id, type, entity_id, text, url, raw_json, collected_at
     FROM content
     WHERE investigation_id = $1
     AND (raw_json->>'processed') IS DISTINCT FROM 'true'
     ORDER BY collected_at DESC
     LIMIT $2`,
    [investigationId, limit]
  );

  return result.rows.map((row: any) => ({
    ...row,
    processed: row.raw_json?.processed === true,
  }));
}

async function extractMediaUrls(content: UnprocessedContent): Promise<string[]> {
  const urls: string[] = [];

  if (content.raw_json) {
    // Extract from Facebook post data
    if (content.raw_json.media_urls && Array.isArray(content.raw_json.media_urls)) {
      urls.push(...content.raw_json.media_urls.map(u => String(u)));
    }
    if (content.raw_json.video_urls && Array.isArray(content.raw_json.video_urls)) {
      urls.push(...content.raw_json.video_urls.map(u => String(u)));
    }
    if (content.raw_json.creative_image_url) {
      urls.push(String(content.raw_json.creative_image_url));
    }
    if (content.raw_json.creative_video_url) {
      urls.push(String(content.raw_json.creative_video_url));
    }
    if (content.raw_json.profile_photo_url) {
      urls.push(String(content.raw_json.profile_photo_url));
    }
    if (content.raw_json.cover_photo_url) {
      urls.push(String(content.raw_json.cover_photo_url));
    }
  }

  return urls;
}

async function downloadMediaFile(
  url: string,
  investigationId: string,
  contentId: string,
  entityId: string
): Promise<Media | null> {
  try {
    console.log(`  📥 Downloading: ${url.slice(0, 80)}...`);

    const response = await fetch(url);
    if (!response.ok) {
      console.log(`  ⚠️  Failed: ${response.status}`);
      return null;
    }

    const contentType = response.headers.get("content-type") || "application/octet-stream";
    const buffer = Buffer.from(await response.arrayBuffer());

    // Determine media type
    let mediaType: Media["type"] = "image";
    if (contentType.includes("video")) {
      mediaType = "video";
    } else if (contentType.includes("image")) {
      mediaType = "image";
    }

    // Generate file path
    const extension = getExtensionFromMimeType(contentType);
    const filename = `${randomUUID()}${extension}`;
    const filePath = join("media", "auto", investigationId, filename);

    // Ensure directory exists
    await mkdir(dirname(filePath), { recursive: true });
    await writeFile(filePath, buffer);

    // Create media record
    const media: Media = {
      id: `media_${randomUUID()}`,
      investigation_id: investigationId,
      entity_id: entityId,
      content_id: contentId,
      type: mediaType,
      file_path: filePath,
      url_original: url,
      sha256: await hashBuffer(buffer),
      width: undefined, // Would extract from image metadata
      height: undefined,
      collected_at: new Date(),
    };

    // Save to database
    await query(
      `INSERT INTO media (id, investigation_id, entity_id, content_id, type, file_path, url_original, sha256, collected_at)
       VALUES ($1, $2, $3, $4, $5, $6, $7, $8, NOW())`,
      [
        media.id,
        media.investigation_id,
        media.entity_id,
        media.content_id,
        media.type,
        media.file_path,
        media.url_original,
        media.sha256,
      ]
    );

    return media;
  } catch (error) {
    console.error(`  ❌ Download failed:`, error instanceof Error ? error.message : error);
    return null;
  }
}

async function archiveHttpCapture(
  content: UnprocessedContent
): Promise<void> {
  if (!content.raw_json || !content.url) {
    return;
  }

  const httpLog = {
    url: content.url,
    method: "GET",
    response_status: 200,
    response_body: JSON.stringify(content.raw_json, null, 2),
    collected_at: new Date(),
    notes: `Auto-archived from ${content.type} ${content.id}`,
  };

  // Save to http_log table
  await query(
    `INSERT INTO http_log (investigation_id, url, method, response_status, response_body, collected_at, notes)
     VALUES ($1, $2, $3, $4, $5, NOW(), $6)`,
    [
      content.entity_id ? 
        (await query("SELECT investigation_id FROM entities WHERE id = $1", [content.entity_id])).rows[0]?.investigation_id 
        : content.entity_id,
      httpLog.url,
      httpLog.method,
      httpLog.response_status,
      httpLog.response_body,
      httpLog.notes,
    ]
  );

  console.log(`  📦 Archived HTTP capture for ${content.id}`);
}

async function markContentProcessed(contentId: string): Promise<void> {
  await query(
    `UPDATE content 
     SET raw_json = COALESCE(raw_json, '{}'::jsonb) || jsonb_build_object('processed', true)
     WHERE id = $1`,
    [contentId]
  );
}

async function extractIdentifiers(
  entityId: string,
  investigationId: string,
  rawJson?: Record<string, unknown>
): Promise<void> {
  if (!rawJson) return;

  const identifiers: Array<{ type: string; value: string }> = [];

  // Extract phone numbers
  if (typeof rawJson.phone === "string") {
    identifiers.push({ type: "phone", value: rawJson.phone });
  }

  // Extract emails
  if (typeof rawJson.email === "string") {
    identifiers.push({ type: "email", value: rawJson.email });
  }

  // Extract website/domain
  if (typeof rawJson.website === "string") {
    const domain = extractDomain(rawJson.website);
    if (domain) {
      identifiers.push({ type: "domain", value: domain });
    }
  }

  // Save identifiers
  for (const ident of identifiers) {
    await query(
      `INSERT INTO identifiers (investigation_id, entity_id, type, value, collected_at)
       VALUES ($1, $2, $3, $4, NOW())
       ON CONFLICT DO NOTHING`,
      [investigationId, entityId, ident.type, ident.value]
    );
  }

  if (identifiers.length > 0) {
    console.log(`  🏷️  Extracted ${identifiers.length} identifiers`);
  }
}

// ─── Collection Process ─────────────────────────────────────────────────────

async function processContentBatch(
  investigationId: string,
  contentBatch: UnprocessedContent[],
  downloadMedia: boolean,
  archiveHttp: boolean
): Promise<{
  processedCount: number;
  mediaDownloaded: number;
  identifiersExtracted: number;
}> {
  let processedCount = 0;
  let mediaDownloaded = 0;
  let identifiersExtracted = 0;

  for (const content of contentBatch) {
    console.log(`\n[${processedCount + 1}/${contentBatch.length}] Processing ${content.id}...`);

    try {
      // Download media
      if (downloadMedia) {
        const mediaUrls = await extractMediaUrls(content);
        for (const url of mediaUrls) {
          const media = await downloadMediaFile(url, investigationId, content.id, content.entity_id);
          if (media) {
            mediaDownloaded++;
          }
        }
      }

      // Archive HTTP captures
      if (archiveHttp && content.raw_json) {
        await archiveHttpCapture(content);
      }

      // Extract identifiers
      if (content.entity_id) {
        const entityResult = await query(
          "SELECT investigation_id FROM entities WHERE id = $1",
          [content.entity_id]
        );
        if (entityResult.rows.length > 0) {
          const invId = String(entityResult.rows[0].investigation_id);
          await extractIdentifiers(
            content.entity_id,
            invId,
            content.raw_json
          );
          identifiersExtracted++;
        }
      }

      // Mark as processed
      await markContentProcessed(content.id);
      processedCount++;

      console.log(`  ✅ Processed`);
    } catch (error) {
      console.error(`  ❌ Failed:`, error instanceof Error ? error.message : error);
    }
  }

  return { processedCount, mediaDownloaded, identifiersExtracted };
}

async function runCollectionCycle(config: CollectorArgs): Promise<void> {
  console.log(`\n[Collector] Fetching unprocessed content...`);
  
  const contentBatch = await fetchUnprocessedContent(config.investigation, 50);
  
  if (contentBatch.length === 0) {
    console.log(`[Collector] No unprocessed content found`);
    return;
  }

  console.log(`[Collector] Found ${contentBatch.length} items to process`);

  const stats = await processContentBatch(
    config.investigation,
    contentBatch,
    config.downloadMedia,
    config.archiveHttp
  );

  console.log(`\n${"=".repeat(60)}`);
  console.log("📊 Collection Summary");
  console.log(`${"=".repeat(60)}`);
  console.log(`Content processed: ${stats.processedCount}`);
  console.log(`Media downloaded: ${stats.mediaDownloaded}`);
  console.log(`Identifiers extracted: ${stats.identifiersExtracted}`);
}

async function continuousCollection(config: CollectorArgs) {
  console.log("📥 CultGuard Evidence Collector");
  console.log(`Investigation: ${config.investigation}`);
  console.log(`Auto mode: ${config.auto ? "enabled" : "disabled"}`);
  console.log(`Download media: ${config.downloadMedia ? "yes" : "no"}`);
  console.log(`Archive HTTP: ${config.archiveHttp ? "yes" : "no"}`);
  console.log(`Check interval: ${config.checkInterval} minutes\n`);

  if (!config.auto) {
    await runCollectionCycle(config);
    return;
  }

  let cycleCount = 0;

  while (true) {
    cycleCount++;
    console.log(`\n${"=".repeat(60)}`);
    console.log(`[Cycle ${cycleCount}] ${new Date().toISOString()}`);
    console.log(`${"=".repeat(60)}`);

    try {
      await runCollectionCycle(config);
    } catch (error) {
      console.error(`[Collector] Cycle failed:`, error);
    }

    const waitMs = config.checkInterval * 60 * 1000;
    console.log(`\n[Collector] Next cycle in ${config.checkInterval} minutes`);
    await new Promise(resolve => setTimeout(resolve, waitMs));
  }
}

// ─── Helper Functions ───────────────────────────────────────────────────────

function getExtensionFromMimeType(mimeType: string): string {
  const mimeToExt: Record<string, string> = {
    "image/jpeg": ".jpg",
    "image/png": ".png",
    "image/gif": ".gif",
    "image/webp": ".webp",
    "video/mp4": ".mp4",
    "video/webm": ".webm",
    "video/quicktime": ".mov",
  };

  return mimeToExt[mimeType.split(";")[0]] || ".bin";
}

async function hashBuffer(buffer: Buffer): Promise<string> {
  const crypto = await import("node:crypto");
  return crypto.createHash("sha256").update(buffer).digest("hex");
}

function extractDomain(url: string): string | null {
  try {
    const urlObj = new URL(url);
    return urlObj.hostname;
  } catch {
    return null;
  }
}

// ─── Main Entry Point ───────────────────────────────────────────────────────

async function main() {
  try {
    const config = parseArgs();
    await continuousCollection(config);
  } catch (error) {
    console.error("\n💀 Collector crashed:", error);
    process.exit(1);
  }
}

main();
