/**
 * Embedding pipeline — text (BGE-M3, 1024-dim) + images (CLIP ViT-L/14, 768-dim).
 * Uses @huggingface/transformers (ONNX Runtime) for local inference.
 *
 * NOTE: This is a stub implementation. The actual @huggingface/transformers
 * integration will be wired once we confirm ONNX model availability.
 * For now, it provides the CLI structure and DB queries.
 */

import { program } from "commander";
import { query, closePool, now } from "./db.js";

interface EmbeddableRow {
  ref_type: string;
  ref_id: string;
  text: string;
}

async function getUnembeddedTexts(): Promise<EmbeddableRow[]> {
  const result = await query<EmbeddableRow>(`
    SELECT 'content' AS ref_type, id AS ref_id,
           COALESCE(text, '') AS text
    FROM content
    WHERE text IS NOT NULL AND length(text) > 0
      AND NOT EXISTS (
        SELECT 1 FROM text_embeddings te
        WHERE te.ref_type = 'content' AND te.ref_id = content.id
      )
    UNION ALL
    SELECT 'entity', id,
           COALESCE(name,'') || ' ' || COALESCE(notes,'')
    FROM entities
    WHERE NOT EXISTS (
        SELECT 1 FROM text_embeddings te
        WHERE te.ref_type = 'entity' AND te.ref_id = entities.id
    )
    UNION ALL
    SELECT 'annotation', id::text, note
    FROM annotations
    WHERE NOT EXISTS (
        SELECT 1 FROM text_embeddings te
        WHERE te.ref_type = 'annotation' AND te.ref_id = annotations.id::text
    )
  `);
  return result.rows;
}

interface MediaRow {
  id: string;
  file_path: string;
  entity_id: string | null;
}

async function getUnembeddedImages(): Promise<MediaRow[]> {
  const result = await query<MediaRow>(`
    SELECT m.id, m.file_path, m.entity_id
    FROM media m
    WHERE m.type IN ('image', 'screenshot', 'thumbnail')
      AND NOT EXISTS (
        SELECT 1 FROM image_embeddings ie WHERE ie.media_id = m.id
      )
  `);
  return result.rows;
}

async function embedTexts(): Promise<void> {
  const rows = await getUnembeddedTexts();
  if (rows.length === 0) {
    console.log("Text embeddings: nothing new to embed.");
    return;
  }

  console.log(`Text embeddings: ${rows.length} items to embed.`);

  // TODO: Wire @huggingface/transformers ONNX pipeline
  // const pipe = await pipeline('feature-extraction', 'BAAI/bge-m3', { quantized: true });
  // const BATCH = 32;
  // for (let i = 0; i < rows.length; i += BATCH) {
  //   const batch = rows.slice(i, i + BATCH);
  //   const texts = batch.map(r => r.text.slice(0, 2000));
  //   const embeddings = await pipe(texts, { pooling: 'cls', normalize: true });
  //   for (let j = 0; j < batch.length; j++) {
  //     const vec = Array.from(embeddings[j].data);
  //     await query(
  //       `INSERT INTO text_embeddings (ref_type, ref_id, text, embedding)
  //        VALUES ($1, $2, $3, $4)
  //        ON CONFLICT (ref_type, ref_id, chunk_idx, model) DO NOTHING`,
  //       [batch[j].ref_type, batch[j].ref_id, texts[j], JSON.stringify(vec)]
  //     );
  //   }
  //   console.log(`  ${Math.min(i + BATCH, rows.length)}/${rows.length}`);
  // }

  console.log(
    "  [stub] @huggingface/transformers ONNX integration pending.\n" +
    "  Add @huggingface/transformers and onnxruntime-node to package.json/package-lock.json, then rebuild the devenv.\n" +
    "  Then uncomment the pipeline code in src/embed.ts",
  );
}

async function embedImages(): Promise<void> {
  const rows = await getUnembeddedImages();
  if (rows.length === 0) {
    console.log("Image embeddings: nothing new to embed.");
    return;
  }

  console.log(`Image embeddings: ${rows.length} items to embed.`);

  // TODO: Wire @huggingface/transformers ONNX CLIP pipeline
  // const pipe = await pipeline('image-feature-extraction', 'openai/clip-vit-large-patch14');
  // for (const row of rows) {
  //   try {
  //     const embedding = await pipe(row.file_path, { pooling: 'cls', normalize: true });
  //     const vec = Array.from(embedding[0].data);
  //     await query(
  //       `INSERT INTO image_embeddings (media_id, embedding)
  //        VALUES ($1, $2)
  //        ON CONFLICT (media_id, model) DO NOTHING`,
  //       [row.id, JSON.stringify(vec)]
  //     );
  //   } catch (err) {
  //     console.warn(`  CLIP error on ${row.file_path}: ${err}`);
  //   }
  // }

  console.log(
    "  [stub] CLIP ONNX integration pending.\n" +
    "  Add @huggingface/transformers and onnxruntime-node to package.json/package-lock.json, then rebuild the devenv.",
  );
}

async function main(): Promise<void> {
  program
    .name("db-embed")
    .description("Embedding pipeline — text (BGE-M3) + images (CLIP)")
    .option("--text-only", "Only embed text")
    .option("--images-only", "Only embed images")
    .parse();

  const opts = program.opts<{ textOnly?: boolean; imagesOnly?: boolean }>();

  try {
    if (!opts.imagesOnly) {
      console.log("=== Text embedding pass ===");
      await embedTexts();
    }
    if (!opts.textOnly) {
      console.log("=== Image embedding pass ===");
      await embedImages();
    }
    console.log("=== Embedding pipeline complete ===");
  } finally {
    await closePool();
  }
}

main().catch((err) => {
  console.error("Fatal:", err);
  process.exit(1);
});
