import fs from 'node:fs'
import https from 'node:https'
import zlib from 'node:zlib'
import path from 'node:path'
import readline from 'node:readline'


// Target sessions: diverse market regimes
const TARGET_DATES = [
  '2026-01-20',  // -4.5% down trend
  '2026-01-29',  // -5.2% sharp down
  '2026-01-31',  // -6.6% crash day
  '2026-02-05',  // -14.0% crash (biggest)
  '2026-02-06',  // +12.2% recovery pump
  '2026-02-25',  // +6.1% strong up
  '2026-03-04',  // +6.4% strong up
  '2026-03-12',  // +0.5% dead flat/chop
  '2026-03-23',  // +4.5% trending up
  '2026-03-26',  // -3.5% trending down
]

const OUTPUT_DIR = process.env.TXOCAP_REPO_ROOT
  ? path.join(process.env.TXOCAP_REPO_ROOT, 'data/historical')
  : 'data/historical'

function download(url: string): Promise<Buffer> {
  return new Promise((resolve, reject) => {
    const chunks: Buffer[] = []
    https.get(url, { headers: { 'User-Agent': 'txocap-research/1.0' } }, (res) => {
      if (res.statusCode === 301 || res.statusCode === 302) {
        download(res.headers.location!).then(resolve).catch(reject)
        return
      }
      if (res.statusCode !== 200) { reject(new Error(`HTTP ${res.statusCode} for ${url}`)); return }
      res.on('data', (chunk) => chunks.push(chunk))
      res.on('end', () => resolve(Buffer.concat(chunks)))
      res.on('error', reject)
    }).on('error', reject)
  })
}

function gunzip(buf: Buffer): Promise<Buffer> {
  return new Promise((resolve, reject) => {
    zlib.gunzip(buf, (err, result) => err ? reject(err) : resolve(result))
  })
}

// Convert Bybit CSV trades to our normalized format
// Bybit format: timestamp, symbol, side, size, price, tickDirection, trdMatchID, grossValue, homeNotional, foreignNotional, RPI
function parseBybitTrades(csv: string): { ts: number; data: { price: number; side: 'buy'|'sell'; size: number; notionalUsd: number; exchange: string; instrumentType: string } }[] {
  const lines = csv.split('\n').filter(Boolean)
  const header = lines[0].toLowerCase().includes('timestamp')
  const rows = header ? lines.slice(1) : lines
  const result = []
  for (const row of rows) {
    const parts = row.split(',')
    if (parts.length < 5) continue
    const ts = Math.round(parseFloat(parts[0]) * 1000) // unix float → ms
    const side = parts[2]?.toLowerCase() as 'buy'|'sell'
    const size = parseFloat(parts[3])
    const price = parseFloat(parts[4])
    if (!ts || !price || !size || isNaN(ts) || isNaN(price)) continue
    result.push({ ts, data: { price, side, size, notionalUsd: price * size, exchange: 'BYBIT', instrumentType: 'linear' } })
  }
  return result.sort((a, b) => a.ts - b.ts)
}

// Convert Binance aggTrades CSV
// Format: agg_trade_id, price, qty, first_trade_id, last_trade_id, transact_time, is_buyer_maker
function parseBinanceTrades(csv: string): { ts: number; data: { price: number; side: 'buy'|'sell'; size: number; notionalUsd: number; exchange: string; instrumentType: string } }[] {
  const lines = csv.split('\n').filter(Boolean)
  const result = []
  for (const row of lines) {
    const parts = row.split(',')
    if (parts.length < 7) continue
    const price = parseFloat(parts[1])
    const qty = parseFloat(parts[2])
    const ts = parseInt(parts[5])
    const isBuyerMaker = parts[6]?.trim() === 'True' // buyer is maker → seller is taker → sell trade
    if (!ts || !price || !qty || isNaN(ts) || isNaN(price)) continue
    const side: 'buy'|'sell' = isBuyerMaker ? 'sell' : 'buy' // aggressor side
    result.push({ ts, data: { price, side, size: qty, notionalUsd: price * qty, exchange: 'BINANCE', instrumentType: 'linear' } })
  }
  return result.sort((a, b) => a.ts - b.ts)
}

async function processDate(date: string): Promise<void> {
  const outDir = path.join(OUTPUT_DIR, date)
  const tradesFile = path.join(outDir, 'trades.jsonl')

  if (fs.existsSync(tradesFile)) {
    const lines = fs.readFileSync(tradesFile).toString().split('\n').filter(Boolean).length
    if (lines > 100000) {
      process.stderr.write(`  ${date}: already exists (${lines} trades), skipping\n`)
      return
    }
  }

  fs.mkdirSync(outDir, { recursive: true })
  process.stderr.write(`  ${date}: downloading...\n`)

  // Download Bybit
  const bybitUrl = `https://public.bybit.com/trading/BTCUSDT/BTCUSDT${date}.csv.gz`
  let bybitTrades: any[] = []
  try {
    const buf = await download(bybitUrl)
    const csv = (await gunzip(buf)).toString()
    bybitTrades = parseBybitTrades(csv)
    process.stderr.write(`    Bybit: ${bybitTrades.length} trades\n`)
  } catch (e: any) {
    process.stderr.write(`    Bybit failed: ${e.message}\n`)
  }

  // Download Binance (zip → need python or node to unzip)
  // Use Binance klines as a simpler alternative for signal computation
  // Actually: use the Bybit data only for now, label as BYBIT
  // The signal uses exchange breakdown but since we only have one exchange,
  // topShare will be 1.0 → most signals will be blocked
  // We need to handle this differently for backtesting

  // Write trades
  const allTrades = [...bybitTrades].sort((a, b) => a.ts - b.ts)
  const fd = fs.openSync(tradesFile, 'w')
  for (const t of allTrades) {
    fs.writeSync(fd, JSON.stringify({ ts: t.ts, data: t.data }) + '\n')
  }
  fs.closeSync(fd)

  process.stderr.write(`    Saved ${allTrades.length} trades to ${tradesFile}\n`)
}

async function main(): Promise<void> {
  fs.mkdirSync(OUTPUT_DIR, { recursive: true })
  process.stderr.write(`Downloading ${TARGET_DATES.length} historical sessions to ${OUTPUT_DIR}\n\n`)

  for (const date of TARGET_DATES) {
    await processDate(date)
  }

  process.stderr.write('\nDone!\n')
  process.stderr.write(`Sessions: ls ${OUTPUT_DIR}\n`)
}

main().catch(e => { process.stderr.write(`FATAL: ${e}\n`); process.exit(1) })
