#!/usr/bin/env python3
"""Migrate all existing raw_data JSON files into osint.db SQLite schema."""

import json
import sqlite3
import os
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).parent.parent
DB_PATH   = REPO_ROOT / "data" / "osint.db"
RAW_DIR   = REPO_ROOT / "raw_data"

def connect():
    con = sqlite3.connect(DB_PATH)
    con.row_factory = sqlite3.Row
    return con

def init_schema(con):
    schema = (REPO_ROOT / "data" / "schema.sql").read_text()
    con.executescript(schema)
    con.commit()

def upsert_entity(con, row: dict):
    con.execute("""
        INSERT INTO entities(id, type, name, url, platform, country, created_at, confidence, is_sock, notes, raw_json)
        VALUES(:id,:type,:name,:url,:platform,:country,:created_at,:confidence,:is_sock,:notes,:raw_json)
        ON CONFLICT(id) DO UPDATE SET
            name=excluded.name, country=excluded.country,
            created_at=excluded.created_at, notes=excluded.notes,
            raw_json=excluded.raw_json
    """, row)

def upsert_relationship(con, source, target, rtype, strength, evidence):
    con.execute("""
        INSERT INTO relationships(source_id, target_id, type, strength, evidence)
        VALUES(?,?,?,?,?)
    """, (source, target, rtype, strength, evidence))

def migrate_commenter_network(con):
    path = RAW_DIR / "commenter_network.json"
    if not path.exists():
        return
    data = json.loads(path.read_text())
    commenters = data.get("commenters", [])
    for c in commenters:
        uid = str(c.get("uid") or c.get("id", ""))
        if not uid:
            continue
        upsert_entity(con, {
            "id": uid,
            "type": "profile",
            "name": c.get("name"),
            "url": f"https://www.facebook.com/profile.php?id={uid}",
            "platform": "facebook",
            "country": c.get("location"),
            "created_at": c.get("created") or c.get("account_created"),
            "confidence": 80,
            "is_sock": 1 if uid.startswith("615") else 0,
            "notes": json.dumps({k: v for k, v in c.items() if k not in ("uid","id","name")}),
            "raw_json": json.dumps(c),
        })
        # commenter → target page relationship
        upsert_relationship(con, uid, "61585153052901", "commented_on", 90,
                            f"Commented on target page post (Day-1)")
    print(f"  commenter_network: {len(commenters)} entities")

def migrate_target_page(con):
    path = RAW_DIR / "initial_findings.json"
    if not path.exists():
        return
    data = json.loads(path.read_text())
    upsert_entity(con, {
        "id": "61585153052901",
        "type": "page",
        "name": "لبنان يتحرر",
        "url": "https://www.facebook.com/people/لبنان-يتحرر/61585153052901/",
        "platform": "facebook",
        "country": None,
        "created_at": None,
        "confidence": 100,
        "is_sock": 1,
        "notes": "TARGET PAGE. Classic page ID: 913256835205251. ~2484 followers, ~55197 talking-about (22:1 anomalous). Anti-Hezbollah IO.",
        "raw_json": json.dumps(data),
    })
    print("  initial_findings: target page entity created")

def migrate_saman_wasal(con):
    path = RAW_DIR / "work_entity_saman_wasal.json"
    if not path.exists():
        return
    data = json.loads(path.read_text())
    upsert_entity(con, {
        "id": "100065678581703",
        "type": "page",
        "name": "سمن وعسل",
        "url": "https://www.facebook.com/profile.php?id=100065678581703",
        "platform": "facebook",
        "country": "Palestine",
        "created_at": "2021-03-17",
        "confidence": 95,
        "is_sock": 1,
        "notes": "Cover asset. Page Transparency: Palestine (2 managers). Phone: +972 59-747-9787 (Palestinian). Employer listed by Ghaith Ali.",
        "raw_json": json.dumps(data),
    })
    # Also log the phone identifier
    con.execute("""
        INSERT OR IGNORE INTO identifiers(entity_id, type, value, country_code, lookup_result)
        VALUES(?,?,?,?,?)
    """, ("100065678581703", "phone", "+972597479787", "PS",
          json.dumps({"carrier": "Palestinian mobile", "prefix": "+972 59", "notes": "Not Lebanese — contradicts Ghaith Ali South Lebanon claim"})))
    print("  saman_wasal: entity + phone identifier")

def migrate_watin(con):
    path = RAW_DIR / "watin_entity.json"
    if not path.exists():
        return
    data = json.loads(path.read_text())
    upsert_entity(con, {
        "id": "100044623170419",
        "type": "page",
        "name": "وتين / Wateeen7",
        "url": "https://www.facebook.com/Wateeen7",
        "platform": "facebook",
        "country": None,
        "created_at": None,
        "confidence": 70,
        "is_sock": 0,
        "notes": "1.6M followers. Managed by Malak Almughrabi (Palestinian/Gazan). سمن وعسل follows ONLY this page.",
        "raw_json": json.dumps(data),
    })
    upsert_relationship(con, "100065678581703", "100044623170419", "follows", 95,
                        "سمن وعسل follows only Wateeen7 — single follow is highly significant")
    print("  watin: entity + relationship")

def migrate_mounetbeyti(con):
    path = RAW_DIR / "mounetbeyti_entity.json"
    if not path.exists():
        return
    data = json.loads(path.read_text())
    upsert_entity(con, {
        "id": "mounetbeyti",
        "type": "page",
        "name": "mounetbeyti",
        "url": "https://www.facebook.com/mounetbeyti",
        "platform": "facebook",
        "country": "Lebanon",
        "created_at": None,
        "confidence": 30,
        "is_sock": 0,
        "notes": "Lebanese entity. Phone 71 616 729. No structural connection to network confirmed. Treat as noise pending new link.",
        "raw_json": json.dumps(data),
    })
    con.execute("""
        INSERT OR IGNORE INTO identifiers(entity_id, type, value, country_code)
        VALUES(?,?,?,?)
    """, ("mounetbeyti", "phone", "71616729", "LB"))
    print("  mounetbeyti: entity + phone")

def migrate_ad_library(con):
    path = RAW_DIR / "ad_library_data.json"
    if not path.exists():
        return
    data = json.loads(path.read_text())
    ads = data.get("ads", [])
    for i, ad in enumerate(ads):
        ad_id = ad.get("id") or f"ad_{i}"
        con.execute("""
            INSERT OR IGNORE INTO content(id, entity_id, type, text, published_at, raw_json)
            VALUES(?,?,?,?,?,?)
        """, (str(ad_id), "61585153052901", "ad",
              ad.get("ad_creative_body") or ad.get("text"),
              ad.get("ad_delivery_start_time"),
              json.dumps(ad)))
    print(f"  ad_library: {len(ads)} ads")

def migrate_posts(con):
    path = RAW_DIR / "post_timeline.json"
    if not path.exists():
        return
    data = json.loads(path.read_text())
    posts = data.get("posts", [])
    for post in posts:
        pid = post.get("id") or post.get("post_id", "")
        if not pid:
            continue
        con.execute("""
            INSERT OR IGNORE INTO content(id, entity_id, type, text, published_at, url, raw_json)
            VALUES(?,?,?,?,?,?,?)
        """, (str(pid), "61585153052901", "post",
              post.get("message") or post.get("text"),
              post.get("created_time") or post.get("timestamp"),
              post.get("url"),
              json.dumps(post)))
    print(f"  post_timeline: {len(posts)} posts")

def add_known_annotations(con):
    annotations = [
        ("entity", "61562211934692",  "llm", "CRITICAL HUB — only entity bridging Palestinian infrastructure to Lebanese IO page. Created Jan 4 2026, 30 days before first post.", 85),
        ("entity", "100065678581703", "llm", "Cover asset. Page Transparency confirms Palestine (2 managers) — Meta-verified. Creation date 2021-03-17 (5 years old, repurposed ~Jan 2026).", 95),
        ("entity", "61585153052901",  "llm", "TARGET PAGE. 22:1 talking-about/followers ratio = definitively inauthentic. A/B ad testing pattern. SIEP violations.", 99),
        ("entity", "100044623170419", "llm", "Wateeen7 — Palestinian/Gazan managed mega-page. سمن وعسل single-follows this. Possible shared operator network.", 70),
        ("entity", "61577225957303",  "llm", "Hussan Chhab — Day-1 commenter, 615xx ID, About page empty. Suspected factory batch with Ghaith Ali.", 65),
        ("entity", "61579855714899",  "llm", "Zolfekar Hjazi — Day-1 commenter, 615xx ID, Shia-coded name (Dhu al-Faqar). About page empty. Suspected factory batch.", 65),
    ]
    for ref_type, ref_id, analyst, note, confidence in annotations:
        con.execute("""
            INSERT INTO annotations(ref_type, ref_id, analyst, note, confidence)
            VALUES(?,?,?,?,?)
        """, (ref_type, ref_id, analyst, note, confidence))
    print(f"  annotations: {len(annotations)} records")

def main():
    print(f"Initialising database at {DB_PATH}")
    con = connect()
    init_schema(con)

    print("Migrating existing raw_data files...")
    migrate_target_page(con)
    migrate_commenter_network(con)
    migrate_saman_wasal(con)
    migrate_watin(con)
    migrate_mounetbeyti(con)
    migrate_ad_library(con)
    migrate_posts(con)
    add_known_annotations(con)

    con.commit()
    con.close()

    size = os.path.getsize(DB_PATH)
    print(f"\nDone. Database size: {size/1024:.1f} KB")
    print(f"Path: {DB_PATH}")

if __name__ == "__main__":
    main()
