#!/usr/bin/env node
/**
 * fb-scrape — CLI entry point for Facebook public page OSINT scraping.
 *
 * Usage:
 *   fb-scrape <pageUrl> [options]
 *
 * Connects to an existing Chromium instance via CDP, collects page metadata,
 * scrolls the feed for posts, and optionally gathers comments, reactions,
 * and ad-library data.
 */

import { program } from 'commander';
import { connect } from './lib/fb/cdp.js';
import { NetworkInterceptor } from './lib/fb/network.js';
import { collectPageMetadata, collectPosts, collectPostDetails, collectAds } from './lib/fb/flows/index.js';
import { upsertPage, upsertPost, upsertComment, storeReactions, storeAd, storeTransparency } from './lib/fb/store.js';
import { downloadMedia } from './lib/fb/media.js';
import { closePool } from './db.js';

import type { FbPageData, FbPostData, FbCommentData, FbReactionData, FbAdData } from './types.js';

// ─── Helpers ───────────────────────────────────────────────────────────────

function ts(): string {
  return new Date().toISOString();
}

function log(msg: string): void {
  console.log(`[${ts()}] ${msg}`);
}

// ─── CLI Definition ────────────────────────────────────────────────────────

program
  .name('fb-scrape')
  .description('Facebook public page OSINT scraper')
  .argument('<url>', 'Facebook page URL')
  .option('-d, --depth <n>', 'Max posts to collect', '50')
  .option('-s, --since <date>', 'Collect posts since date (YYYY-MM-DD)')
  .option('-c, --comments', 'Expand and collect all comments')
  .option('-r, --reactions', 'Collect individual reactor lists')
  .option('-a, --ads', 'Also scrape ad library')
  .option('-p, --port <n>', 'CDP port', '9222')
  .option('-i, --investigation <id>', 'Investigation ID')
  .option('--save', 'Save results to database (default: print only)')
  .action(async (url: string, opts) => {
    const port = parseInt(opts.port, 10);
    const maxPosts = parseInt(opts.depth, 10);
    const sinceDate = opts.since ? new Date(opts.since) : undefined;
    const investigationId = opts.investigation as string | undefined;

    // Results
    let pageData: FbPageData | null = null;
    let posts: FbPostData[] = [];
    let comments: FbCommentData[] = [];
    let reactions: FbReactionData[] = [];
    let ads: FbAdData[] = [];
    let mediaCount = 0;

    // Track whether we should clean up on exit
    let interceptor: NetworkInterceptor | null = null;
    let cleanedUp = false;

    async function cleanup(): Promise<void> {
      if (cleanedUp) return;
      cleanedUp = true;
      log('🧹 cleaning up…');
      try { await interceptor?.stop(); } catch { /* ignore */ }
      try { await closePool(); } catch { /* ignore */ }
    }

    // Graceful SIGINT handling — save what we have and exit
    const onSigint = async () => {
      log('⚠ SIGINT received — saving progress and cleaning up…');
      await persistResults();
      await cleanup();
      process.exit(130);
    };
    process.on('SIGINT', onSigint);

    async function persistResults(): Promise<void> {
      if (!opts.save || !investigationId) return;

      log('💾 saving results to database…');
      try {
        let pageEntityId: string | undefined;
        if (pageData) {
          pageEntityId = await upsertPage(pageData, investigationId);
          if (pageData.transparency) {
            await storeTransparency(pageData.transparency, pageEntityId, investigationId);
          }
        }
        for (const post of posts) {
          await upsertPost(post, pageEntityId ?? `fb_page_unknown`, investigationId);
        }
        for (const comment of comments) {
          await upsertComment(comment, investigationId);
        }
        for (const ad of ads) {
          await storeAd(ad, investigationId);
        }
        log(`💾 saved: ${posts.length} posts, ${comments.length} comments, ${ads.length} ads`);
      } catch (err) {
        log(`❌ error saving results: ${err}`);
      }
    }

    try {
      // 1. Connect to Chromium via CDP
      log(`🔌 connecting to Chromium on port ${port}…`);
      const ctx = await connect(port);

      // 2. Start network interceptor
      log('📡 starting network interceptor…');
      interceptor = new NetworkInterceptor();
      await interceptor.start(ctx);

      // 3. Collect page metadata (always)
      log('📋 collecting page metadata…');
      pageData = await collectPageMetadata(ctx, url, interceptor);
      if (pageData) {
        log(`   page: ${pageData.name} (${pageData.page_id})`);
        log(`   followers: ${pageData.follower_count ?? 'n/a'} | likes: ${pageData.like_count ?? 'n/a'}`);
      } else {
        log('   ⚠ could not extract page metadata — continuing anyway');
      }

      // 4. Collect posts (scroll feed)
      log(`📰 scrolling feed (max ${maxPosts} posts)…`);
      posts = await collectPosts(ctx, interceptor, {
        maxPosts,
        sinceDate,
      });

      // 5. Collect post details (comments/reactions)
      if ((opts.comments || opts.reactions) && posts.length > 0) {
        log('💬 collecting post details…');
        const details = await collectPostDetails(ctx, interceptor, posts, {
          collectComments: opts.comments,
          collectReactions: opts.reactions,
        });
        comments = details.comments;
        reactions = details.reactions;
      }

      // 6. Collect ad library
      if (opts.ads && pageData?.page_id) {
        log('📢 scraping ad library…');
        ads = await collectAds(ctx, interceptor, pageData.page_id);
      } else if (opts.ads && !pageData?.page_id) {
        log('   ⚠ skipping ad library — no page_id available');
      }

      // 7. Persist to database
      await persistResults();

      // 8. Download media for all collected posts
      if (posts.length > 0) {
        log('📥 downloading media…');
        for (const post of posts) {
          const urls = [...(post.media_urls ?? []), ...(post.video_urls ?? [])];
          for (const mediaUrl of urls) {
            try {
              await downloadMedia({
                url: mediaUrl,
                type: mediaUrl.includes('video') ? 'video' : 'image',
                contentId: post.post_id,
                investigationId: investigationId ?? 'default',
              });
              mediaCount++;
            } catch (err) {
              log(`   ⚠ media download failed: ${mediaUrl} — ${err}`);
            }
          }
        }
      }

      // 9. Print summary
      console.log('\n' + '═'.repeat(60));
      console.log('  fb-scrape summary');
      console.log('═'.repeat(60));
      console.log(`  Page:       ${pageData?.name ?? 'unknown'} (${pageData?.page_id ?? 'n/a'})`);
      console.log(`  Posts:      ${posts.length}`);
      console.log(`  Comments:   ${comments.length}`);
      console.log(`  Reactions:  ${reactions.length}`);
      console.log(`  Ads:        ${ads.length}`);
      console.log(`  Media:      ${mediaCount} files downloaded`);
      if (opts.save && investigationId) {
        console.log(`  Saved to:   investigation ${investigationId}`);
      } else {
        console.log('  Saved to:   (none — use --save -i <id> to persist)');
      }
      console.log('═'.repeat(60) + '\n');
    } catch (err) {
      log(`❌ fatal error: ${err}`);
      await persistResults();
      throw err;
    } finally {
      process.off('SIGINT', onSigint);
      await cleanup();
    }
  });

program.parse();
