#!/usr/bin/env bash
# Build the canonical GPU builder kexec tarball from the same NixOS module used
# for the disk image, then optionally copy it to the current rescue host,
# extract it, prepare the Nix store, and kexec into it.

set -euo pipefail

REPO_ROOT="$(git rev-parse --show-toplevel)"
source "$REPO_ROOT/tools/lib/builder-common.sh"
cd "$TOTALLY_SPIES_REPO_ROOT"

STATE_FILE="${STATE_FILE:-$TOTALLY_SPIES_BUILDER_HOST_STATE_FILE}"
PLAN_FILE="${PLAN_FILE:-$TOTALLY_SPIES_BUILDER_ROOTFS_PLAN_FILE}"
FACTS_FILE="${FACTS_FILE:-$TOTALLY_SPIES_BUILDER_RESCUE_FACTS_FILE}"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519}"
PUB_KEY="${PUB_KEY:-$SSH_KEY.pub}"
NIX_BIN="${NIX_BIN:-nix}"
SERVER_HOST=""
EXECUTE=0

usage() {
  cat <<EOF
Usage: $(basename "$0") [--execute] [server-host]

Default mode is dry-run: build the repo-independent builder tarball and print
what would happen next. Pass --execute to copy it to the rescue host, extract
it, prepare /nix for the builder closure, and kexec into the builder.
EOF
}

while [ $# -gt 0 ]; do
  case "$1" in
    --execute)
      EXECUTE=1
      shift
      ;;
    -h|--help)
      usage
      exit 0
      ;;
    *)
      if [ -n "$SERVER_HOST" ]; then
        echo "Unexpected extra argument: $1" >&2
        usage >&2
        exit 1
      fi
      SERVER_HOST="$1"
      shift
      ;;
  esac
done

builder_require_file "$FACTS_FILE"
builder_require_file "$PLAN_FILE"
builder_require_file "$SSH_KEY"
builder_require_file "$PUB_KEY"

if [ -z "$SERVER_HOST" ]; then
  SERVER_HOST="$(builder_current_host)"
fi

if [ -z "$SERVER_HOST" ]; then
  echo "Usage: $(basename "$0") [--execute] <server-host>" >&2
  echo "Or save server_host in $STATE_FILE first." >&2
  exit 1
fi

md_device="$(builder_json_get "$PLAN_FILE" md_device)"
mount_point="$(builder_json_get "$PLAN_FILE" mount_point)"

build_cmd=(
  "$NIX_BIN" build
  -f nix/builder-image.nix
  kexecTarball
  --arg factsFile "$FACTS_FILE"
  --arg authorizedKeyFile "$PUB_KEY"
  --no-link
  --print-out-paths
)

echo "=== Building builder kexec tarball ==="
printf 'Command:'
printf ' %q' "${build_cmd[@]}"
printf '\n'

tarball_out="$("${build_cmd[@]}")"
if [ -d "$tarball_out" ]; then
  tarball_path="$(find "$tarball_out" -maxdepth 3 -type f -name '*.tar.xz' | head -n1)"
else
  tarball_path="$tarball_out"
fi

if [ -z "${tarball_path:-}" ] || [ ! -f "$tarball_path" ]; then
  echo "Failed to resolve built tarball from: $tarball_out" >&2
  exit 1
fi

echo "Tarball output: $tarball_out"
echo "Tarball file: $tarball_path"
echo "Server host: $SERVER_HOST"
echo "MD device: $md_device"
echo "Mount point: $mount_point"

if [ "$EXECUTE" -ne 1 ]; then
  echo ""
  echo "Dry run only. Live action would:"
  echo "  1. scp $tarball_path root@[$SERVER_HOST]:/tmp/spies-builder-kexec.tar.xz"
  echo "  2. ensure $md_device is mounted at $mount_point in rescue"
  echo "  3. extract the tarball at $mount_point so it becomes the future /"
  echo "  4. chroot into $mount_point and run /kexec-bundle/prepare-root"
  echo "  5. run TARGET_ROOT=$mount_point $mount_point/kexec-bundle/kexec-boot"
  echo ""
  echo "Run again with --execute to perform the kexec handoff."
  exit 0
fi

remote_tarball="/tmp/spies-builder-kexec.tar.xz"
scp_target="$(builder_format_scp_target root "$SERVER_HOST" "$remote_tarball")"

echo "Copying kexec tarball to $scp_target"
builder_scp_to "$tarball_path" root "$SERVER_HOST" "$remote_tarball"

run_remote() {
  builder_ssh root "$SERVER_HOST" "$@"
}

remote_action=$(cat <<'EOS'
set -euo pipefail

MD_DEVICE="$1"
TARGET_ROOT="$2"
TARBALL_PATH="$3"

mkdir -p "$TARGET_ROOT"
if ! mountpoint -q "$TARGET_ROOT"; then
  mount "$MD_DEVICE" "$TARGET_ROOT"
fi

tar -xJf "$TARBALL_PATH" -C "$TARGET_ROOT"

mkdir -p /nix
umount /nix >/dev/null 2>&1 || true
mount --bind "$TARGET_ROOT/nix" /nix
mkdir -p "$TARGET_ROOT/proc" "$TARGET_ROOT/dev" "$TARGET_ROOT/sys"
mount --bind /proc "$TARGET_ROOT/proc"
mount --bind /dev "$TARGET_ROOT/dev"
mount --bind /sys "$TARGET_ROOT/sys"

if [ ! -x "$TARGET_ROOT/kexec-bundle/prepare-root" ]; then
  echo "Missing $TARGET_ROOT/kexec-bundle/prepare-root after extraction" >&2
  exit 1
fi
if [ ! -x "$TARGET_ROOT/kexec-bundle/kexec-boot" ]; then
  echo "Missing $TARGET_ROOT/kexec-bundle/kexec-boot after extraction" >&2
  exit 1
fi

chroot "$TARGET_ROOT" /kexec-bundle/prepare-root

cat > /root/spies-kexec-now.sh <<EOF
#!/usr/bin/env bash
set -euo pipefail
systemctl stop nix-daemon.service >/dev/null 2>&1 || true
systemctl stop nix-daemon.socket >/dev/null 2>&1 || true
export TARGET_ROOT="$TARGET_ROOT"
exec "$TARGET_ROOT/kexec-bundle/kexec-boot"
EOF
chmod 700 /root/spies-kexec-now.sh
nohup /root/spies-kexec-now.sh >/root/spies-kexec.log 2>&1 </dev/null &
echo "Queued kexec via /kexec-bundle/kexec-boot"
EOS
)

echo "Launching remote kexec on $SERVER_HOST"
run_remote "bash -s -- $(printf '%q ' "$md_device" "$mount_point" "$remote_tarball")" <<< "$remote_action"

echo "Waiting for SSH to come back after kexec..."
sleep 10
if builder_wait_for_ssh "$SERVER_HOST" root 18 5; then
  echo "SSH is reachable again on $SERVER_HOST"
  echo "Next: devenv tasks run --show-output builder:local:register-remote"
  exit 0
fi

echo "Kexec was launched, but SSH did not come back within the wait window." >&2
echo "Check the host manually, then refresh local builder state with builder:local:register-remote." >&2
exit 1
