/**
 * Autonomous Content Analyzer Agent
 * 
 * Analyzes collected content for suspicious patterns, coordinated behavior,
 * sentiment, and policy violations. Uses LLM reasoning for deep analysis.
 * 
 * Usage:
 *   agent-analyzer -- --investigation <id> --batch-size <n> --llm
 */

import {
  createAgentSession,
  SessionManager,
  SettingsManager,
  AuthStorage,
  ModelRegistry,
  createReadTool,
  createBashTool,
} from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { query } from "../db.js";
import type { Content } from "../types.js";

// ─── CLI Arguments ──────────────────────────────────────────────────────────

interface AnalyzerArgs {
  investigation: string;
  batchSize: number;
  useLlm: boolean;
  analysisTypes: Array<"risk" | "coordination" | "sentiment" | "claims">;
  sinceHours: number;
}

function parseArgs(): AnalyzerArgs {
  const args = process.argv.slice(2);
  const config: AnalyzerArgs = {
    investigation: "",
    batchSize: 50,
    useLlm: true,
    analysisTypes: ["risk"],
    sinceHours: 24,
  };

  for (let i = 0; i < args.length; i++) {
    switch (args[i]) {
      case "--investigation":
      case "-i":
        config.investigation = args[++i];
        break;
      case "--batch-size":
        config.batchSize = parseInt(args[++i], 10);
        break;
      case "--llm":
        config.useLlm = args[++i] !== "false";
        break;
      case "--type":
        const type = args[++i] as AnalyzerArgs["analysisTypes"][number];
        if (!config.analysisTypes.includes(type)) {
          config.analysisTypes.push(type);
        }
        break;
      case "--since":
        config.sinceHours = parseInt(args[++i], 10);
        break;
    }
  }

  if (!config.investigation) {
    console.error("Error: --investigation <id> is required");
    process.exit(1);
  }

  return config;
}

// ─── Analysis Engine ────────────────────────────────────────────────────────

interface ContentAnalysis {
  contentId: string;
  type: Content["type"];
  text?: string;
  riskScore: number;
  flags: string[];
  sentiment?: {
    score: number;
    label: "positive" | "negative" | "neutral";
  };
  coordinationSignals?: {
    score: number;
    signals: string[];
  };
  claims?: Array<{
    text: string;
    category: string;
    confidence: number;
  }>;
  llmAnalysis?: {
    summary: string;
    manipulationTactics: string[];
    recommendations: string[];
  };
}

interface SuspiciousPatterns {
  urgencyLanguage: RegExp;
  callToAction: RegExp;
  conspiracyMarkers: RegExp;
  coordinatedHashtags: RegExp;
  botLikeBehavior: RegExp;
}

const PATTERNS: SuspiciousPatterns = {
  urgencyLanguage: /\b(breaking|urgent|alert|must see|share now|before deleted|viral)\b/i,
  callToAction: /\b(share|repost|forward|tell everyone|spread the word)\b/i,
  conspiracyMarkers: /\b(truth|exposed|cover[- ]?up|they don't want|wake up|sheeple)\b/i,
  coordinatedHashtags: /(#\w+\s*){5,}/,
  botLikeBehavior: /\b(\w+)\s+\1\b|\b(\w{3,})\s+\2\b/,
};

async function fetchContentBatch(
  investigationId: string,
  batchSize: number,
  sinceHours: number
): Promise<Content[]> {
  const since = new Date(Date.now() - sinceHours * 60 * 60 * 1000);
  
  const result = await query(
    `SELECT id, entity_id, type, text, text_lang, published_at, collected_at,
            likes, shares, comments, reactions, raw_json
     FROM content
     WHERE investigation_id = $1
     AND collected_at > $2
     AND (text IS NOT NULL AND text != '')
     ORDER BY collected_at DESC
     LIMIT $3`,
    [investigationId, since, batchSize]
  );

  return result.rows as unknown as Content[];
}

async function performHeuristicAnalysis(content: Content): Promise<ContentAnalysis> {
  const analysis: ContentAnalysis = {
    contentId: content.id,
    type: content.type,
    text: content.text,
    riskScore: 0,
    flags: [],
  };

  if (!content.text) {
    return analysis;
  }

  const text = content.text;

  // Check for urgency/manipulation language
  if (PATTERNS.urgencyLanguage.test(text)) {
    analysis.flags.push("urgency_language");
    analysis.riskScore += 15;
  }

  if (PATTERNS.callToAction.test(text)) {
    analysis.flags.push("call_to_action");
    analysis.riskScore += 10;
  }

  if (PATTERNS.conspiracyMarkers.test(text)) {
    analysis.flags.push("conspiracy_markers");
    analysis.riskScore += 20;
  }

  if (PATTERNS.coordinatedHashtags.test(text)) {
    analysis.flags.push("coordinated_hashtags");
    analysis.riskScore += 15;
  }

  // Check engagement patterns
  const totalEngagement = (content.likes || 0) + (content.shares || 0) + (content.comments || 0);
  
  if (totalEngagement > 1000) {
    analysis.flags.push("high_engagement");
    analysis.riskScore += 10;
  }

  if (totalEngagement > 5000) {
    analysis.flags.push("viral_engagement");
    analysis.riskScore += 20;
  }

  // Check for unusual engagement ratios
  if (content.shares && content.likes) {
    const shareRatio = content.shares / content.likes;
    if (shareRatio > 0.8) {
      analysis.flags.push("high_share_ratio");
      analysis.riskScore += 15;
    }
  }

  // Basic sentiment estimation (very simplified)
  const positiveWords = /\b(good|great|excellent|amazing|love|happy|win|victory)\b/gi;
  const negativeWords = /\b(bad|terrible|awful|hate|sad|lose|fail|disaster)\b/gi;
  
  const positiveMatches = text.match(positiveWords)?.length || 0;
  const negativeMatches = text.match(negativeWords)?.length || 0;
  const totalWords = positiveMatches + negativeMatches;

  if (totalWords > 0) {
    const sentimentScore = (positiveMatches - negativeMatches) / totalWords;
    analysis.sentiment = {
      score: sentimentScore,
      label: sentimentScore > 0.2 ? "positive" : sentimentScore < -0.2 ? "negative" : "neutral",
    };
  }

  return analysis;
}

async function performLlmAnalysis(
  session: any,
  content: Content,
  heuristicAnalysis: ContentAnalysis
): Promise<Partial<ContentAnalysis>> {
  if (!content.text || content.text.length < 50) {
    return {};
  }

  const prompt = `Analyze this social media content for manipulation tactics and suspicious patterns:

**Content Type:** ${content.type}
**Published:** ${content.published_at}
**Engagement:** ${content.likes} likes, ${content.shares} shares, ${content.comments} comments

**Text:**
${content.text.slice(0, 2000)}

Provide analysis in JSON format:
{
  "manipulationTactics": ["tactic1", "tactic2"],
  "summary": "Brief summary",
  "recommendations": ["rec1", "rec2"]
}`;

  try {
    // Use pi-agent session to get LLM analysis
    const analysisText = await session.prompt(prompt);
    
    // Parse JSON from response (simplified - would need better parsing)
    const jsonMatch = analysisText.match(/\{[\s\S]*\}/);
    if (jsonMatch) {
      const llmResult = JSON.parse(jsonMatch[0]);
      return {
        llmAnalysis: {
          summary: llmResult.summary || "No summary",
          manipulationTactics: llmResult.manipulationTactics || [],
          recommendations: llmResult.recommendations || [],
        },
      };
    }
  } catch (error) {
    console.error(`[Analyzer] LLM analysis failed for ${content.id}:`, error);
  }

  return {};
}

async function detectCoordination(
  investigationId: string,
  contentBatch: Content[]
): Promise<Map<string, ContentAnalysis["coordinationSignals"]>> {
  const coordinationMap = new Map<string, ContentAnalysis["coordinationSignals"]>();

  // Group content by time windows
  const timeWindows = new Map<string, Content[]>();
  for (const content of contentBatch) {
    if (!content.published_at) continue;
    
    const windowKey = new Date(content.published_at).toISOString().slice(0, 13); // Hour precision
    if (!timeWindows.has(windowKey)) {
      timeWindows.set(windowKey, []);
    }
    timeWindows.get(windowKey)!.push(content);
  }

  // Check for coordinated posting patterns
  for (const [window, contents] of timeWindows.entries()) {
    if (contents.length < 3) continue;

    // Check for similar text patterns
    const textSimilarities = new Map<string, number>();
    for (const content of contents) {
      if (!content.text) continue;
      
      // Simplified similarity check (would use embeddings in production)
      const normalized = content.text.toLowerCase().replace(/\s+/g, " ").slice(0, 100);
      textSimilarities.set(normalized, (textSimilarities.get(normalized) || 0) + 1);
    }

    // Flag if multiple posts have similar text
    for (const [text, count] of textSimilarities.entries()) {
      if (count >= 3) {
        for (const content of contents) {
          if (content.text?.toLowerCase().includes(text.split(" ").slice(0, 5).join(" "))) {
            coordinationMap.set(content.id, {
              score: count * 20,
              signals: [`coordinated_posting:${count} similar posts in ${window}`],
            });
          }
        }
      }
    }
  }

  return coordinationMap;
}

async function saveAnalysis(analysis: ContentAnalysis) {
  // Save analysis to annotations table
  const llmSummary = analysis.llmAnalysis?.summary || null;
  
  await query(
    `INSERT INTO annotations (investigation_id, ref_type, ref_id, analyst, note, confidence, created_at)
     VALUES (
       (SELECT investigation_id FROM content WHERE id = $1),
       'content',
       $1,
       'llm',
       $2,
       $3,
       NOW()
     )`,
    [analysis.contentId, llmSummary, Math.min(100, analysis.riskScore)]
  );

  // Could also save to a dedicated content_analysis table if schema is extended
}

// ─── Main Analysis Loop ─────────────────────────────────────────────────────

async function runAnalysis(config: AnalyzerArgs) {
  console.log("🔬 CultGuard Content Analyzer");
  console.log(`Investigation: ${config.investigation}`);
  console.log(`Batch size: ${config.batchSize}`);
  console.log(`Analysis types: ${config.analysisTypes.join(", ")}`);
  console.log(`Time window: last ${config.sinceHours} hours`);
  console.log(`LLM analysis: ${config.useLlm ? "enabled" : "disabled"}\n`);

  // Create pi-agent session for LLM analysis
  let session: any = null;
  if (config.useLlm) {
    const authStorage = AuthStorage.create();
    const modelRegistry = ModelRegistry.create(authStorage);
    
    const { session: agentSession } = await createAgentSession({
      cwd: process.cwd(),
      sessionManager: SessionManager.inMemory(),
      settingsManager: SettingsManager.inMemory({
        compaction: { enabled: false },
      }),
      authStorage,
      modelRegistry,
      tools: [
        createReadTool(process.cwd()),
        createBashTool(process.cwd()),
      ],
    });

    session = agentSession;
    console.log("✅ LLM session initialized\n");
  }

  // Fetch content batch
  console.log(`[Analyzer] Fetching content...`);
  const contentBatch = await fetchContentBatch(
    config.investigation,
    config.batchSize,
    config.sinceHours
  );

  console.log(`[Analyzer] Found ${contentBatch.length} items to analyze\n`);

  if (contentBatch.length === 0) {
    console.log("[Analyzer] No content to analyze. Exiting.");
    return;
  }

  // Perform analysis
  const analyses: ContentAnalysis[] = [];
  
  for (let i = 0; i < contentBatch.length; i++) {
    const content = contentBatch[i];
    console.log(`[${i + 1}/${contentBatch.length}] Analyzing ${content.id}...`);

    // Heuristic analysis
    const heuristicAnalysis = await performHeuristicAnalysis(content);
    analyses.push(heuristicAnalysis);

    // LLM analysis (if enabled and risk score warrants it)
    if (config.useLlm && heuristicAnalysis.riskScore >= 20) {
      const llmResult = await performLlmAnalysis(session, content, heuristicAnalysis);
      Object.assign(heuristicAnalysis, llmResult);
    }

    // Save analysis
    await saveAnalysis(heuristicAnalysis);

    // Report findings
    if (heuristicAnalysis.riskScore >= 30) {
      console.log(`  ⚠️  High risk (${heuristicAnalysis.riskScore}): ${heuristicAnalysis.flags.join(", ")}`);
    }
  }

  // Coordination detection across batch
  console.log(`\n[Analyzer] Checking for coordination patterns...`);
  const coordinationSignals = await detectCoordination(config.investigation, contentBatch);
  
  for (const [contentId, signals] of coordinationSignals.entries()) {
    const analysis = analyses.find(a => a.contentId === contentId);
    if (analysis && signals) {
      analysis.coordinationSignals = signals;
      analysis.riskScore += signals.score;
    }
    console.log(`  🚨 Coordination detected: ${contentId} (${signals?.signals.join(", ")})`);
  }

  // Summary
  console.log(`\n${"=".repeat(60)}`);
  console.log("📊 Analysis Summary");
  console.log(`${"=".repeat(60)}`);
  
  const highRisk = analyses.filter(a => a.riskScore >= 30);
  const mediumRisk = analyses.filter(a => a.riskScore >= 15 && a.riskScore < 30);
  const lowRisk = analyses.filter(a => a.riskScore < 15);

  console.log(`Total analyzed: ${analyses.length}`);
  console.log(`High risk (≥30): ${highRisk.length}`);
  console.log(`Medium risk (15-29): ${mediumRisk.length}`);
  console.log(`Low risk (<15): ${lowRisk.length}`);

  if (highRisk.length > 0) {
    console.log(`\n⚠️  High-Risk Content IDs:`);
    for (const analysis of highRisk) {
      console.log(`  • ${analysis.contentId}: score=${analysis.riskScore}, flags=[${analysis.flags.join(", ")}]`);
    }
  }

  console.log(`\n✅ Analysis complete. Results saved to database.`);
}

// ─── Main Entry Point ───────────────────────────────────────────────────────

async function main() {
  try {
    const config = parseArgs();
    await runAnalysis(config);
  } catch (error) {
    console.error("\n💀 Analyzer crashed:", error);
    process.exit(1);
  }
}

main();
