#!/usr/bin/env python3
"""
watcher_v4.py — Stock Bot GitHub Command Bridge + Process Manager
=================================================================
Manages trading_bot, shadow_bot, AND dashboard_server:
  - GitHub command bridge (pull cmds, push results, sync code)
  - Keepalive for trading_bot    (checks every 120s)
  - Keepalive for shadow_bot     (checks every 120s, no Alpaca WS)
  - Keepalive for dashboard_server (checks every 120s, port 8080)

Key improvements over v4.0:
  - 120s keepalive (was 3600s — 60 min blind spot → now 2 min)
  - Shadow bot managed alongside trading bot
  - Dashboard server managed as a third process
  - Bot-specific pgrep (prevents false positives between the two bots)
  - Guard against double-starting (check-before-start in all start fns)
  - Shadow bot launched with ALPACA_API_KEY="" to disable WS connection
  - git reset --soft instead of merge/reset --hard (advances HEAD without writing
    non-bot project files from the repo to disk)
  - last_cmd_id persisted to /tmp file (prevents re-running on watcher restart)
"""

import os
import sys
import json
import time
import base64
import subprocess
import urllib.request
import urllib.error
from datetime import datetime, timezone

# ── Configuration ─────────────────────────────────────────────────────────────
PAT         = "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"
REPO        = "Jake-Culberson/Claud-Code"
API         = f"https://api.github.com/repos/{REPO}/contents"
VERSION     = "v4.4"

# ── Trading Bot ───────────────────────────────────────────────────────────────
WORK_DIR    = "/opt/services/bots/trading_bot/Stock_Bot"
VENV_PYTHON = "/opt/services/bots/trading_bot/venv/bin/python3"
BOT_SCRIPT  = "/opt/services/bots/trading_bot/Stock_Bot/StockTrading.py"
BOT_LOG     = "/opt/services/bots/trading_bot/Stock_Bot/trading.log"
BOT_ARGS    = ["--mode", "both", "--max-workers", "12"]

# ── Shadow Bot ────────────────────────────────────────────────────────────────
SHADOW_WORKDIR = "/opt/services/bots/shadow_bot/Stock_Bot"
SHADOW_SCRIPT  = "/opt/services/bots/shadow_bot/Stock_Bot/StockTrading.py"
SHADOW_LOG     = "/opt/services/bots/shadow_bot/Stock_Bot/trading.log"
SHADOW_ARGS    = ["--mode", "both", "--max-workers", "8"]

# ── Dashboard Server ──────────────────────────────────────────────────────────
DASHBOARD_SCRIPT = "/opt/services/bots/trading_bot/Stock_Bot/dashboard_server.py"
DASHBOARD_LOG    = "/opt/services/bots/trading_bot/Stock_Bot/logs/dashboard.log"

# ── Timing ────────────────────────────────────────────────────────────────────
POLL_SECS   = 10    # GitHub command check interval (seconds)
KEEPALIVE   = 120   # Bot health check interval (seconds) — was 3600

# File to persist last executed command ID across restarts (prevents re-execution)
LAST_CMD_ID_FILE = "/tmp/watcher_last_cmd_id"

# ── Helpers ────────────────────────────────────────────────────────────────────
HEADERS = {
    "Authorization": f"Bearer {PAT}",
    "Content-Type":  "application/json",
    "User-Agent":    "StockBotWatcher/4.4",
}

def log(msg):
    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    line = f"[{ts}] {msg}"
    print(line, flush=True)

def _load_last_cmd_id() -> str:
    """Load persisted last_cmd_id from disk (survives watcher restarts)."""
    try:
        with open(LAST_CMD_ID_FILE) as f:
            return f.read().strip()
    except:
        return ""

def _save_last_cmd_id(cmd_id: str):
    """Persist last_cmd_id so it survives watcher restarts."""
    try:
        with open(LAST_CMD_ID_FILE, "w") as f:
            f.write(cmd_id)
    except:
        pass

def api_get(path):
    """GET a file from GitHub. Returns dict or {}."""
    try:
        req = urllib.request.Request(f"{API}/{path}", headers=HEADERS)
        with urllib.request.urlopen(req, timeout=15) as r:
            return json.loads(r.read())
    except Exception as e:
        log(f"API GET error ({path}): {e}")
        return {}

def api_put(path, content_bytes, msg, sha=""):
    """PUT a file to GitHub. Returns (ok, new_sha, commit)."""
    payload = {
        "message": msg,
        "content": base64.b64encode(content_bytes).decode(),
    }
    if sha:
        payload["sha"] = sha
    data = json.dumps(payload).encode()
    try:
        req = urllib.request.Request(
            f"{API}/{path}", data=data, method="PUT", headers=HEADERS
        )
        with urllib.request.urlopen(req, timeout=20) as r:
            d = json.loads(r.read())
            new_sha = d.get("content", {}).get("sha", "")
            commit  = d.get("commit", {}).get("sha", "")[:10]
            return True, new_sha, commit
    except urllib.error.HTTPError as e:
        body = e.read().decode()[:200]
        log(f"API PUT error {e.code} ({path}): {body}")
        return False, sha, ""
    except Exception as e:
        log(f"API PUT error ({path}): {e}")
        return False, sha, ""

def git_fetch():
    """Fetch remote without modifying local files."""
    try:
        subprocess.run(
            ["git", "fetch", "origin", "main", "-q"],
            cwd=os.path.dirname(WORK_DIR),
            timeout=30, capture_output=True
        )
    except Exception as e:
        log(f"git fetch error: {e}")

def git_local_sha():
    try:
        r = subprocess.run(
            ["git", "rev-parse", "HEAD"],
            cwd=os.path.dirname(WORK_DIR),
            capture_output=True, text=True, timeout=10
        )
        return r.stdout.strip()
    except:
        return ""

def git_remote_sha():
    try:
        r = subprocess.run(
            ["git", "rev-parse", "origin/main"],
            cwd=os.path.dirname(WORK_DIR),
            capture_output=True, text=True, timeout=10
        )
        return r.stdout.strip()
    except:
        return ""

def sync_code():
    """Pull updated code files from remote ref. Never commits."""
    repo_root = os.path.dirname(WORK_DIR)
    files_to_sync = [
        ("Stock_Bot/StockTrading.py",      os.path.join(WORK_DIR, "StockTrading.py")),
        ("Stock_Bot/watcher_v4.py",        os.path.join(WORK_DIR, "watcher_v4.py")),
        ("Stock_Bot/gen_dashboard.py",     os.path.join(WORK_DIR, "gen_dashboard.py")),
        ("Stock_Bot/watcher_helper.py",    os.path.join(WORK_DIR, "watcher_helper.py")),
        ("Stock_Bot/dashboard_server.py",  os.path.join(WORK_DIR, "dashboard_server.py")),
    ]
    updated = []
    for remote_path, local_path in files_to_sync:
        try:
            result = subprocess.run(
                ["git", "show", f"origin/main:{remote_path}"],
                cwd=repo_root, capture_output=True, timeout=15
            )
            if result.returncode == 0 and result.stdout:
                with open(local_path, "wb") as f:
                    f.write(result.stdout)
                updated.append(os.path.basename(local_path))
        except Exception as e:
            log(f"  sync error for {remote_path}: {e}")

    # Advance local HEAD to remote using reset --soft so HEAD tracks origin/main
    # (prevents the sync-loop "new commits detected" every cycle) WITHOUT touching
    # any files on disk.  --hard would pull ALL tracked files in the repo (including
    # non-bot project folders like reports/, CCRE-Work/, etc.) which we never want.
    # The individual file-by-file sync above already handles the bot code files.
    try:
        # Remove stale git locks first
        for lock in [".git/index.lock", ".git/MERGE_HEAD"]:
            lp = os.path.join(repo_root, lock)
            if os.path.exists(lp):
                os.remove(lp)
        subprocess.run(
            ["git", "reset", "--soft", "origin/main"],
            cwd=repo_root, capture_output=True, timeout=20
        )
    except Exception as e:
        log(f"  git reset error: {e}")

    if updated:
        log(f"Code synced: {', '.join(updated)}")

def ensure_dirs():
    """Make sure runtime directories exist."""
    dirs = [
        os.path.join(WORK_DIR, "logs", "debug"),
        os.path.join(WORK_DIR, "features", "daily"),
        os.path.join(WORK_DIR, "features", "weekly"),
        os.path.join(WORK_DIR, "data", "db"),
        os.path.join(WORK_DIR, "models"),
        os.path.join(WORK_DIR, "charts"),
        os.path.join(WORK_DIR, "backup", "logs"),
        os.path.join(WORK_DIR, "backup", "charts"),
        os.path.join(WORK_DIR, "backup", "saved_models"),
        # Shadow bot dirs
        os.path.join(SHADOW_WORKDIR, "logs", "debug"),
        os.path.join(SHADOW_WORKDIR, "features", "daily"),
        os.path.join(SHADOW_WORKDIR, "features", "weekly"),
        os.path.join(SHADOW_WORKDIR, "models"),
        os.path.join(SHADOW_WORKDIR, "charts"),
        os.path.join(SHADOW_WORKDIR, "backup"),
    ]
    for d in dirs:
        os.makedirs(d, exist_ok=True)

# ── Process management ────────────────────────────────────────────────────────

def bot_is_running() -> bool:
    """True if the trading bot (primary) is running."""
    try:
        r = subprocess.run(
            ["pgrep", "-f", "trading_bot/Stock_Bot/StockTrading"],
            capture_output=True, timeout=5
        )
        return r.returncode == 0
    except:
        return False

def shadow_bot_is_running() -> bool:
    """True if the shadow bot is running."""
    try:
        r = subprocess.run(
            ["pgrep", "-f", "shadow_bot/Stock_Bot/StockTrading"],
            capture_output=True, timeout=5
        )
        return r.returncode == 0
    except:
        return False

def start_bot() -> int | None:
    """Start the primary trading bot. No-op if already running."""
    if bot_is_running():
        log("start_bot: already running — skip")
        return None
    log("Starting trading bot...")
    try:
        with open(BOT_LOG, "a") as log_f:
            proc = subprocess.Popen(
                [VENV_PYTHON, BOT_SCRIPT] + BOT_ARGS,
                cwd=WORK_DIR,
                stdout=log_f,
                stderr=log_f,
                start_new_session=True,
            )
        log(f"Trading bot started PID {proc.pid}")
        return proc.pid
    except Exception as e:
        log(f"Trading bot start failed: {e}")
        return None

def start_shadow_bot() -> int | None:
    """Start the shadow bot with Alpaca WS disabled. No-op if already running."""
    if shadow_bot_is_running():
        log("start_shadow_bot: already running — skip")
        return None
    log("Starting shadow bot...")
    # Blank Alpaca keys → disables WebSocket stream (avoids Alpaca connection limit)
    env = os.environ.copy()
    env["ALPACA_API_KEY"]    = ""
    env["ALPACA_SECRET_KEY"] = ""
    try:
        with open(SHADOW_LOG, "a") as log_f:
            proc = subprocess.Popen(
                [VENV_PYTHON, SHADOW_SCRIPT] + SHADOW_ARGS,
                cwd=SHADOW_WORKDIR,
                env=env,
                stdout=log_f,
                stderr=log_f,
                start_new_session=True,
            )
        log(f"Shadow bot started PID {proc.pid}")
        return proc.pid
    except Exception as e:
        log(f"Shadow bot start failed: {e}")
        return None

def dashboard_is_running() -> bool:
    """True if the dashboard HTTP server is running."""
    try:
        r = subprocess.run(
            ["pgrep", "-f", "dashboard_server"],
            capture_output=True, timeout=5
        )
        return r.returncode == 0
    except:
        return False

def start_dashboard() -> int | None:
    """Start the live dashboard server. No-op if already running."""
    if dashboard_is_running():
        log("start_dashboard: already running — skip")
        return None
    if not os.path.exists(DASHBOARD_SCRIPT):
        log(f"start_dashboard: script not found at {DASHBOARD_SCRIPT}")
        return None
    log("Starting dashboard server...")
    try:
        os.makedirs(os.path.dirname(DASHBOARD_LOG), exist_ok=True)
        with open(DASHBOARD_LOG, "a") as log_f:
            proc = subprocess.Popen(
                [VENV_PYTHON, DASHBOARD_SCRIPT],
                cwd=WORK_DIR,
                stdout=log_f,
                stderr=log_f,
                start_new_session=True,
            )
        log(f"Dashboard server started PID {proc.pid} → http://0.0.0.0:8080")
        return proc.pid
    except Exception as e:
        log(f"Dashboard server start failed: {e}")
        return None

# ── GitHub command bridge ─────────────────────────────────────────────────────

def get_cmd():
    """
    Read cmd.json directly via API content endpoint.
    Returns (cmd_dict, sha) or (None, "").
    """
    d = api_get("Stock_Bot/jackal/commands/cmd.json")
    if "content" not in d:
        return None, ""
    try:
        content = base64.b64decode(d["content"]).decode()
        return json.loads(content), d.get("sha", "")
    except Exception as e:
        log(f"cmd.json parse error: {e}")
        return None, ""

def push_result(cmd_id, exit_code, output, result_sha):
    """Push result.json to GitHub. Returns new SHA."""
    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
    result = {
        "cmd_id":      cmd_id,
        "executed_at": now,
        "exit_code":   exit_code,
        "output":      output,
    }
    content = json.dumps(result, indent=2).encode()
    msg = f"vm: result for {cmd_id}"
    ok, new_sha, commit = api_put(
        "Stock_Bot/jackal/feedback/result.json", content, msg, result_sha
    )
    if ok:
        log(f"Result pushed ({commit})")
    else:
        log("Result push FAILED")
    return new_sha

def execute_cmd(cmd):
    """Run command payload, return (exit_code, output)."""
    payload = cmd.get("payload", "echo no_payload")
    cmd_id  = cmd.get("id", "unknown")
    log(f"Executing: {cmd_id}")

    script_path = "/tmp/watcher_cmd.sh"
    with open(script_path, "w") as f:
        f.write("#!/bin/bash\n")
        f.write(payload)
    os.chmod(script_path, 0o755)

    try:
        result = subprocess.run(
            ["bash", script_path],
            capture_output=True, text=True,
            timeout=300,
            cwd=WORK_DIR,
        )
        output    = (result.stdout + result.stderr)[:50000]
        exit_code = result.returncode
    except subprocess.TimeoutExpired:
        output    = "ERROR: command timed out after 300 seconds"
        exit_code = 124
    except Exception as e:
        output    = f"ERROR: {e}"
        exit_code = 1

    log(f"  exit={exit_code}  output={len(output)} chars")
    return exit_code, output

# ── Main loop ──────────────────────────────────────────────────────────────────
def main():
    log(f"Watcher {VERSION} starting — poll={POLL_SECS}s keepalive={KEEPALIVE}s dashboard=:8080 workdir={WORK_DIR}")
    os.makedirs(WORK_DIR, exist_ok=True)
    ensure_dirs()

    last_keepalive        = 0   # fires immediately on first loop
    last_shadow_keepalive = 0   # fires immediately on first loop
    last_dash_keepalive   = 0   # fires immediately on first loop
    last_cmd_id           = _load_last_cmd_id()  # persisted — prevents re-execution on restart
    result_sha            = ""
    if last_cmd_id:
        log(f"Restored last_cmd_id: {last_cmd_id}")

    # Pre-fetch result SHA once at startup
    d = api_get("Stock_Bot/jackal/feedback/result.json")
    result_sha = d.get("sha", "")
    log(f"Cached result.json SHA: {result_sha[:10] or 'none'}")

    while True:
        try:
            now = time.time()

            # ── 1. Fetch remote ──────────────────────────────────────────
            git_fetch()
            local_sha  = git_local_sha()
            remote_sha = git_remote_sha()

            # ── 2. Sync code if remote has new commits ───────────────────
            if local_sha != remote_sha and remote_sha:
                log("New commits detected — syncing code")
                sync_code()
                ensure_dirs()

            # ── 3. Check for commands EVERY cycle ───────────────────────
            cmd, cmd_sha = get_cmd()
            if cmd:
                cmd_id   = cmd.get("id", "unknown")
                executed = cmd.get("executed", False)

                if not executed and cmd_id != last_cmd_id:
                    last_cmd_id = cmd_id
                    _save_last_cmd_id(cmd_id)  # persist before executing
                    exit_code, output = execute_cmd(cmd)
                    result_sha = push_result(cmd_id, exit_code, output, result_sha)

            # ── 4. Trading bot keepalive (every 120s) ───────────────────
            if now - last_keepalive >= KEEPALIVE:
                last_keepalive = now
                if not bot_is_running():
                    log("KEEPALIVE: trading bot down — restarting")
                    start_bot()

            # ── 5. Shadow bot keepalive — DISABLED (shadow_bot retired 2026-04-30) ─
            # shadow_bot is permanently shut down; trading_bot runs alone as 105-agent system

            # ── 6. Dashboard server keepalive (every 120s) ───────────────
            if now - last_dash_keepalive >= KEEPALIVE:
                last_dash_keepalive = now
                if not dashboard_is_running():
                    log("KEEPALIVE: dashboard server down — restarting")
                    start_dashboard()

        except Exception as e:
            log(f"Main loop error: {e}")

        time.sleep(POLL_SECS)

if __name__ == "__main__":
    main()
