#!/usr/bin/env bash
# linkedin_check.sh — Check LinkedIn company page for domain
# Fallback chain: Brave Search API → web_search → cached result
set -uo pipefail

DOMAIN="${1:-}"
[[ -z "$DOMAIN" ]] && { echo "Usage: $0 <domain>" >&2; exit 2; }

ROOT="${DOMAIN%.com}"
ROOT="${ROOT%.org}"
ROOT="${ROOT%.net}"
ROOT="${ROOT%.io}"
ROOT="${ROOT%.co}"
ROOT="${ROOT%.me}"
ROOT_LOW="$(echo "$ROOT" | tr '[:upper:]' '[:lower:]')"

CACHE_DIR="${LINKEDIN_CACHE:-data/cache/linkedin}"
mkdir -p "$CACHE_DIR"
CACHE_FILE="$CACHE_DIR/${ROOT_LOW}_linkedin.txt"

# 1. Cache hit → return immediately
if [[ -s "$CACHE_FILE" ]]; then
  cat "$CACHE_FILE"
  exit 0
fi

# 2. Try Brave Search (API key-dependent; often exhausted)
BRAVE_DIR="${BRAVE_SEARCH:-$HOME/.pi/agent/skills/local-pi-skills/brave-search}"
SEARCH_JS="$BRAVE_DIR/search.js"
SEARCH_RESULT=""
if [[ -x "$SEARCH_JS" ]]; then
  SEARCH_RESULT="$($SEARCH_JS "\"$DOMAIN\" site:linkedin.com" -n 5 2>/dev/null || true)"
fi

# 3. Fallback: pi web_search tool (SearXNG)
# This is slow but works for unique brand names
if [[ -z "$SEARCH_RESULT" ]] || [[ "$SEARCH_RESULT" == *"No results found"* ]]; then
  # Use pi CLI web search as fallback
  PI_RESULTS=""
  if command -v pi &>/dev/null; then
    PI_RESULTS="$(pi "$DOMAIN linkedin company" 2>/dev/null | head -40 || true)"
  fi
  SEARCH_RESULT="$PI_RESULTS"
fi

# 4. Parse results
MATCH="none"
COMPANY=""
URL=""
INDUSTRY=""
SIZE=""
HQ=""
WEBSITE=""
DESC=""

if [[ -n "$SEARCH_RESULT" ]]; then
  # Try to find linkedin.com/company/ URLs
  COMPANY_URL=$(echo "$SEARCH_RESULT" | grep -oiE 'https?://(www\.)?linkedin\.com/company/[^[:space:]"]+' | head -1)
  if [[ -n "$COMPANY_URL" ]]; then
    URL="$COMPANY_URL"
    # Extract company name from URL slug
    SLUG="${COMPANY_URL##*/company/}"
    SLUG="${SLUG%%[/?]*}"
    SLUG_LOW="$(echo "$SLUG" | tr '[:upper:]' '[:lower:]')"

    # Extract title/description from result
    TITLE=$(echo "$SEARCH_RESULT" | grep -m1 -oP '(?i)title:\s*\K[^|]+' | head -1 | sed 's/ | LinkedIn$//i; s/\s*$//')
    SNIPPET=$(echo "$SEARCH_RESULT" | grep -m1 -oP '(?i)snippet:\s*\K.*' | head -1)

    COMPANY="${TITLE:-}"
    DESC="${SNIPPET:-}"

    # Determine match quality by comparing domain root to LinkedIn slug/title
    if [[ "$SLUG_LOW" == "$ROOT_LOW" ]]; then
      MATCH="exact"
    elif echo "$SLUG_LOW" | grep -q "$ROOT_LOW" 2>/dev/null || \
         echo "$TITLE" | tr '[:upper:]' '[:lower:]' | grep -q "$ROOT_LOW" 2>/dev/null; then
      MATCH="strong"
    else
      MATCH="related"
    fi

    # Try to extract website from snippet
    WEBSITE=$(echo "$SNIPPET" | grep -oiE 'https?://[^[:space:]]+' | grep -v 'linkedin' | head -1)
  fi
fi

# 5. Clean CSV fields
clean_field() { echo "$1" | tr ',' ' ' | tr '\n' ' ' | sed 's/[[:space:]]*$//'; }
COMPANY=$(clean_field "$COMPANY")
URL=$(clean_field "$URL")
INDUSTRY=$(clean_field "$INDUSTRY")
SIZE=$(clean_field "$SIZE")
HQ=$(clean_field "$HQ")
WEBSITE=$(clean_field "$WEBSITE")
DESC=$(clean_field "$DESC")

# 6. Cache write + output
printf '%s,%s,%s,%s,%s,%s,%s,%s\n' "$MATCH" "$COMPANY" "$URL" "$INDUSTRY" "$SIZE" "$HQ" "$WEBSITE" "$DESC" > "$CACHE_FILE"
cat "$CACHE_FILE"
