/** * Autonomous Content Analyzer Agent * * Analyzes collected content for suspicious patterns, coordinated behavior, * sentiment, and policy violations. Uses LLM reasoning for deep analysis. * * Usage: * agent-analyzer -- --investigation --batch-size --llm */ import { createAgentSession, SessionManager, SettingsManager, AuthStorage, ModelRegistry, createReadTool, createBashTool, } from "@mariozechner/pi-coding-agent"; import { Type } from "@sinclair/typebox"; import { query } from "../db.js"; import type { Content } from "../types.js"; // ─── CLI Arguments ────────────────────────────────────────────────────────── interface AnalyzerArgs { investigation: string; batchSize: number; useLlm: boolean; analysisTypes: Array<"risk" | "coordination" | "sentiment" | "claims">; sinceHours: number; } function parseArgs(): AnalyzerArgs { const args = process.argv.slice(2); const config: AnalyzerArgs = { investigation: "", batchSize: 50, useLlm: true, analysisTypes: ["risk"], sinceHours: 24, }; for (let i = 0; i < args.length; i++) { switch (args[i]) { case "--investigation": case "-i": config.investigation = args[++i]; break; case "--batch-size": config.batchSize = parseInt(args[++i], 10); break; case "--llm": config.useLlm = args[++i] !== "false"; break; case "--type": const type = args[++i] as AnalyzerArgs["analysisTypes"][number]; if (!config.analysisTypes.includes(type)) { config.analysisTypes.push(type); } break; case "--since": config.sinceHours = parseInt(args[++i], 10); break; } } if (!config.investigation) { console.error("Error: --investigation is required"); process.exit(1); } return config; } // ─── Analysis Engine ──────────────────────────────────────────────────────── interface ContentAnalysis { contentId: string; type: Content["type"]; text?: string; riskScore: number; flags: string[]; sentiment?: { score: number; label: "positive" | "negative" | "neutral"; }; coordinationSignals?: { score: number; signals: string[]; }; claims?: Array<{ text: string; category: string; confidence: number; }>; llmAnalysis?: { summary: string; manipulationTactics: string[]; recommendations: string[]; }; } interface SuspiciousPatterns { urgencyLanguage: RegExp; callToAction: RegExp; conspiracyMarkers: RegExp; coordinatedHashtags: RegExp; botLikeBehavior: RegExp; } const PATTERNS: SuspiciousPatterns = { urgencyLanguage: /\b(breaking|urgent|alert|must see|share now|before deleted|viral)\b/i, callToAction: /\b(share|repost|forward|tell everyone|spread the word)\b/i, conspiracyMarkers: /\b(truth|exposed|cover[- ]?up|they don't want|wake up|sheeple)\b/i, coordinatedHashtags: /(#\w+\s*){5,}/, botLikeBehavior: /\b(\w+)\s+\1\b|\b(\w{3,})\s+\2\b/, }; async function fetchContentBatch( investigationId: string, batchSize: number, sinceHours: number ): Promise { const since = new Date(Date.now() - sinceHours * 60 * 60 * 1000); const result = await query( `SELECT id, entity_id, type, text, text_lang, published_at, collected_at, likes, shares, comments, reactions, raw_json FROM content WHERE investigation_id = $1 AND collected_at > $2 AND (text IS NOT NULL AND text != '') ORDER BY collected_at DESC LIMIT $3`, [investigationId, since, batchSize] ); return result.rows as unknown as Content[]; } async function performHeuristicAnalysis(content: Content): Promise { const analysis: ContentAnalysis = { contentId: content.id, type: content.type, text: content.text, riskScore: 0, flags: [], }; if (!content.text) { return analysis; } const text = content.text; // Check for urgency/manipulation language if (PATTERNS.urgencyLanguage.test(text)) { analysis.flags.push("urgency_language"); analysis.riskScore += 15; } if (PATTERNS.callToAction.test(text)) { analysis.flags.push("call_to_action"); analysis.riskScore += 10; } if (PATTERNS.conspiracyMarkers.test(text)) { analysis.flags.push("conspiracy_markers"); analysis.riskScore += 20; } if (PATTERNS.coordinatedHashtags.test(text)) { analysis.flags.push("coordinated_hashtags"); analysis.riskScore += 15; } // Check engagement patterns const totalEngagement = (content.likes || 0) + (content.shares || 0) + (content.comments || 0); if (totalEngagement > 1000) { analysis.flags.push("high_engagement"); analysis.riskScore += 10; } if (totalEngagement > 5000) { analysis.flags.push("viral_engagement"); analysis.riskScore += 20; } // Check for unusual engagement ratios if (content.shares && content.likes) { const shareRatio = content.shares / content.likes; if (shareRatio > 0.8) { analysis.flags.push("high_share_ratio"); analysis.riskScore += 15; } } // Basic sentiment estimation (very simplified) const positiveWords = /\b(good|great|excellent|amazing|love|happy|win|victory)\b/gi; const negativeWords = /\b(bad|terrible|awful|hate|sad|lose|fail|disaster)\b/gi; const positiveMatches = text.match(positiveWords)?.length || 0; const negativeMatches = text.match(negativeWords)?.length || 0; const totalWords = positiveMatches + negativeMatches; if (totalWords > 0) { const sentimentScore = (positiveMatches - negativeMatches) / totalWords; analysis.sentiment = { score: sentimentScore, label: sentimentScore > 0.2 ? "positive" : sentimentScore < -0.2 ? "negative" : "neutral", }; } return analysis; } async function performLlmAnalysis( session: any, content: Content, heuristicAnalysis: ContentAnalysis ): Promise> { if (!content.text || content.text.length < 50) { return {}; } const prompt = `Analyze this social media content for manipulation tactics and suspicious patterns: **Content Type:** ${content.type} **Published:** ${content.published_at} **Engagement:** ${content.likes} likes, ${content.shares} shares, ${content.comments} comments **Text:** ${content.text.slice(0, 2000)} Provide analysis in JSON format: { "manipulationTactics": ["tactic1", "tactic2"], "summary": "Brief summary", "recommendations": ["rec1", "rec2"] }`; try { // Use pi-agent session to get LLM analysis const analysisText = await session.prompt(prompt); // Parse JSON from response (simplified - would need better parsing) const jsonMatch = analysisText.match(/\{[\s\S]*\}/); if (jsonMatch) { const llmResult = JSON.parse(jsonMatch[0]); return { llmAnalysis: { summary: llmResult.summary || "No summary", manipulationTactics: llmResult.manipulationTactics || [], recommendations: llmResult.recommendations || [], }, }; } } catch (error) { console.error(`[Analyzer] LLM analysis failed for ${content.id}:`, error); } return {}; } async function detectCoordination( investigationId: string, contentBatch: Content[] ): Promise> { const coordinationMap = new Map(); // Group content by time windows const timeWindows = new Map(); for (const content of contentBatch) { if (!content.published_at) continue; const windowKey = new Date(content.published_at).toISOString().slice(0, 13); // Hour precision if (!timeWindows.has(windowKey)) { timeWindows.set(windowKey, []); } timeWindows.get(windowKey)!.push(content); } // Check for coordinated posting patterns for (const [window, contents] of timeWindows.entries()) { if (contents.length < 3) continue; // Check for similar text patterns const textSimilarities = new Map(); for (const content of contents) { if (!content.text) continue; // Simplified similarity check (would use embeddings in production) const normalized = content.text.toLowerCase().replace(/\s+/g, " ").slice(0, 100); textSimilarities.set(normalized, (textSimilarities.get(normalized) || 0) + 1); } // Flag if multiple posts have similar text for (const [text, count] of textSimilarities.entries()) { if (count >= 3) { for (const content of contents) { if (content.text?.toLowerCase().includes(text.split(" ").slice(0, 5).join(" "))) { coordinationMap.set(content.id, { score: count * 20, signals: [`coordinated_posting:${count} similar posts in ${window}`], }); } } } } } return coordinationMap; } async function saveAnalysis(analysis: ContentAnalysis) { // Save analysis to annotations table const llmSummary = analysis.llmAnalysis?.summary || null; await query( `INSERT INTO annotations (investigation_id, ref_type, ref_id, analyst, note, confidence, created_at) VALUES ( (SELECT investigation_id FROM content WHERE id = $1), 'content', $1, 'llm', $2, $3, NOW() )`, [analysis.contentId, llmSummary, Math.min(100, analysis.riskScore)] ); // Could also save to a dedicated content_analysis table if schema is extended } // ─── Main Analysis Loop ───────────────────────────────────────────────────── async function runAnalysis(config: AnalyzerArgs) { console.log("🔬 CultGuard Content Analyzer"); console.log(`Investigation: ${config.investigation}`); console.log(`Batch size: ${config.batchSize}`); console.log(`Analysis types: ${config.analysisTypes.join(", ")}`); console.log(`Time window: last ${config.sinceHours} hours`); console.log(`LLM analysis: ${config.useLlm ? "enabled" : "disabled"}\n`); // Create pi-agent session for LLM analysis let session: any = null; if (config.useLlm) { const authStorage = AuthStorage.create(); const modelRegistry = ModelRegistry.create(authStorage); const { session: agentSession } = await createAgentSession({ cwd: process.cwd(), sessionManager: SessionManager.inMemory(), settingsManager: SettingsManager.inMemory({ compaction: { enabled: false }, }), authStorage, modelRegistry, tools: [ createReadTool(process.cwd()), createBashTool(process.cwd()), ], }); session = agentSession; console.log("✅ LLM session initialized\n"); } // Fetch content batch console.log(`[Analyzer] Fetching content...`); const contentBatch = await fetchContentBatch( config.investigation, config.batchSize, config.sinceHours ); console.log(`[Analyzer] Found ${contentBatch.length} items to analyze\n`); if (contentBatch.length === 0) { console.log("[Analyzer] No content to analyze. Exiting."); return; } // Perform analysis const analyses: ContentAnalysis[] = []; for (let i = 0; i < contentBatch.length; i++) { const content = contentBatch[i]; console.log(`[${i + 1}/${contentBatch.length}] Analyzing ${content.id}...`); // Heuristic analysis const heuristicAnalysis = await performHeuristicAnalysis(content); analyses.push(heuristicAnalysis); // LLM analysis (if enabled and risk score warrants it) if (config.useLlm && heuristicAnalysis.riskScore >= 20) { const llmResult = await performLlmAnalysis(session, content, heuristicAnalysis); Object.assign(heuristicAnalysis, llmResult); } // Save analysis await saveAnalysis(heuristicAnalysis); // Report findings if (heuristicAnalysis.riskScore >= 30) { console.log(` ⚠️ High risk (${heuristicAnalysis.riskScore}): ${heuristicAnalysis.flags.join(", ")}`); } } // Coordination detection across batch console.log(`\n[Analyzer] Checking for coordination patterns...`); const coordinationSignals = await detectCoordination(config.investigation, contentBatch); for (const [contentId, signals] of coordinationSignals.entries()) { const analysis = analyses.find(a => a.contentId === contentId); if (analysis && signals) { analysis.coordinationSignals = signals; analysis.riskScore += signals.score; } console.log(` 🚨 Coordination detected: ${contentId} (${signals?.signals.join(", ")})`); } // Summary console.log(`\n${"=".repeat(60)}`); console.log("📊 Analysis Summary"); console.log(`${"=".repeat(60)}`); const highRisk = analyses.filter(a => a.riskScore >= 30); const mediumRisk = analyses.filter(a => a.riskScore >= 15 && a.riskScore < 30); const lowRisk = analyses.filter(a => a.riskScore < 15); console.log(`Total analyzed: ${analyses.length}`); console.log(`High risk (≥30): ${highRisk.length}`); console.log(`Medium risk (15-29): ${mediumRisk.length}`); console.log(`Low risk (<15): ${lowRisk.length}`); if (highRisk.length > 0) { console.log(`\n⚠️ High-Risk Content IDs:`); for (const analysis of highRisk) { console.log(` • ${analysis.contentId}: score=${analysis.riskScore}, flags=[${analysis.flags.join(", ")}]`); } } console.log(`\n✅ Analysis complete. Results saved to database.`); } // ─── Main Entry Point ─────────────────────────────────────────────────────── async function main() { try { const config = parseArgs(); await runAnalysis(config); } catch (error) { console.error("\n💀 Analyzer crashed:", error); process.exit(1); } } main();