#!/usr/bin/env bash
set -uo pipefail

DOMAIN="${1:-}"
[[ -z "$DOMAIN" ]] && { echo "none,,,,,,,"; exit 2; }

ROOT="${DOMAIN%.com}"
ROOT="${ROOT%.org}"
ROOT="${ROOT%.net}"
ROOT="${ROOT%.io}"
ROOT="${ROOT%.co}"
ROOT="${ROOT%.me}"
ROOT_LOW="$(echo "$ROOT" | tr '[:upper:]' '[:lower:]')"

CACHE_DIR="${LINKEDIN_CACHE:-data/cache/linkedin}"
mkdir -p "$CACHE_DIR"
CACHE_FILE="$CACHE_DIR/${ROOT_LOW}_linkedin.txt"

if [[ -s "$CACHE_FILE" ]]; then
  cat "$CACHE_FILE"
  exit 0
fi

DOMAIN_LOW="$(echo "$DOMAIN" | tr '[:upper:]' '[:lower:]')"

# Query 1: DuckDuckGo HTML search for company page
RESULT=$(
  curl -s --max-time 15 \
    -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) Chrome/120.0" \
    "https://html.duckduckgo.com/html/?q=%22${ROOT_LOW}%22+site%3Alinkedin.com%2Fcompany" \
    2>/dev/null | \
  grep -oiE 'linkedin\.com/company/[a-z0-9-]+[^"]*' | head -1
)

if [[ -z "$RESULT" ]]; then
  # Query 2: broader search
  RESULT=$(
    curl -s --max-time 15 \
      -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) Chrome/120.0" \
      "https://html.duckduckgo.com/html/?q=${ROOT_LOW}+linkedin+company" \
      2>/dev/null | \
    grep -oiE 'linkedin\.com/company/[a-z0-9-]+[^"]*' | head -1
  )
fi

if [[ -n "$RESULT" ]]; then
  SLUG="${RESULT##*/company/}"
  SLUG="${SLUG%%[/?]*}"
  SLUG_LOW="$(echo "$SLUG" | tr '[:upper:]' '[:lower:]')"
  
  if [[ "$SLUG_LOW" == "$ROOT_LOW" ]]; then
    printf 'exact,%s,%s,,,,,\n' "$SLUG" "https://www.linkedin.com/company/$SLUG" > "$CACHE_FILE"
  elif echo "$SLUG_LOW" | grep -qw "$ROOT_LOW" 2>/dev/null; then
    printf 'strong,%s,%s,,,,,\n' "$SLUG" "https://www.linkedin.com/company/$SLUG" > "$CACHE_FILE"
  else
    printf 'related,%s,%s,,,,,\n' "$SLUG" "https://www.linkedin.com/company/$SLUG" > "$CACHE_FILE"
  fi
else
  printf 'none,,,,,,,\n' > "$CACHE_FILE"
fi

cat "$CACHE_FILE"
