#!/usr/bin/env python3
"""
Shadow LLM Researcher
Runs during market hours. Researches improvements to LightGBM trading bots.
Writes structured summaries to /opt/services/shadow_llm/knowledge/
"""

import json, time, requests, os
from datetime import datetime
import urllib.request

OLLAMA   = "http://localhost:11434/api/chat"
MODEL    = "deepseek-r1:14b"
BASE     = "/opt/services/shadow_llm"
SHADOW   = "/opt/services/bots/shadow_bot/Stock_Bot"

SYSTEM = """You are a quantitative finance researcher specializing in LightGBM ensemble trading systems.
Your job is to research improvements to a specific stock trading bot and write structured findings.

The bot uses:
- 32 LightGBM agents + 1 MetaLearner (33 total)
- Features: price momentum, ATR, volume, slope, trend consistency, sector data
- Agents compete: best performers share parameters with weaker ones (IC-based ranking)
- Trades long and short, 5-second cooldown per agent per symbol
- SQLite databases for historical bar data

You ONLY propose changes in these safe zones:
1. New feature columns (technical indicators derivable from OHLCV data)
2. Agent hyperparameters (stop_loss_pct, take_profit_pct, min_confidence, max_positions)
3. LightGBM training parameters (n_estimators 50-200, learning_rate 0.01-0.1, num_leaves 15-63)
4. IC weighting adjustments

You NEVER touch:
- Trading pass execution logic
- WebSocket or database handling
- State save/load code
- MetaLearner architecture
- Any networking or API code

Always output JSON. Never output raw Python code. Always explain your reasoning.
"""

RESEARCH_TOPICS = [
    "What technical indicators beyond price momentum and ATR improve LightGBM stock prediction accuracy?",
    "What are optimal LightGBM hyperparameters for high-frequency intraday stock prediction?",
    "How does Relative Strength Index (RSI) improve ensemble trading agent IC scores?",
    "What is the optimal stop-loss percentage for LightGBM momentum agents on US equities?",
    "How does Bollinger Band width as a feature affect trading signal quality?",
    "What rolling window sizes for slope features improve predictive accuracy?",
    "How does VWAP deviation improve intraday LightGBM trading signals?",
    "What is the optimal min_confidence threshold for different agent personality types?",
]

def ask(prompt, system=SYSTEM):
    try:
        r = requests.post(OLLAMA, json={
            "model": MODEL,
            "messages": [
                {"role": "system", "content": system},
                {"role": "user", "content": prompt}
            ],
            "stream": False,
            "options": {"temperature": 0.1}
        }, timeout=300)
        return r.json().get("message", {}).get("content", "")
    except Exception as e:
        return f"ERROR: {e}"

def load_shadow_metrics():
    """Load current shadow bot performance for context."""
    metrics = {}
    try:
        state_path = f"{SHADOW}/logs/agent_state.json"
        if os.path.exists(state_path):
            d = json.load(open(state_path))
            agents = [(k,v) for k,v in d.items() if not k.startswith('__')]
            metrics['total_pnl']   = sum(v.get('total_pnl',0) for _,v in agents)
            metrics['total_trades']= sum(v.get('trades',0) for _,v in agents)
            metrics['n_agents']    = len(agents)
    except: pass
    return metrics

def research_topic(topic_idx):
    """Research one topic and save findings."""
    topic = RESEARCH_TOPICS[topic_idx % len(RESEARCH_TOPICS)]
    metrics = load_shadow_metrics()

    prompt = f"""
Research this question for our LightGBM trading bot:

{topic}

Current shadow bot performance:
{json.dumps(metrics, indent=2)}

Provide a structured JSON response with this exact format:
{{
    "topic": "...",
    "finding": "2-3 sentence summary of what you found",
    "proposed_change": {{
        "type": "feature|hyperparameter|training_param",
        "target": "what specifically to change",
        "current_value": "what it is now if known",
        "proposed_value": "what to change it to",
        "expected_improvement": "specific metric you expect to improve and by how much"
    }},
    "confidence": "low|medium|high",
    "reasoning": "why you believe this will help",
    "risk": "what could go wrong"
}}

Only output valid JSON. No markdown, no preamble.
"""
    response = ask(prompt)

    # Parse and save
    try:
        # Strip thinking tags if present
        if "</think>" in response:
            response = response.split("</think>")[-1].strip()
        finding = json.loads(response)
        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        path = f"{BASE}/knowledge/finding_{ts}.json"
        json.dump({"timestamp": ts, "topic_idx": topic_idx, **finding}, open(path,'w'), indent=2)
        print(f"[{datetime.now().strftime('%H:%M:%S')}] Saved finding: {finding.get('topic','?')[:60]}")
        return finding
    except Exception as e:
        print(f"Parse error: {e}\nRaw: {response[:200]}")
        return None

def main():
    print(f"[{datetime.now().strftime('%H:%M:%S')}] Researcher starting")
    topic_idx = 0
    while True:
        finding = research_topic(topic_idx)
        topic_idx += 1
        # Research one topic per hour during market hours
        time.sleep(3600)

if __name__ == "__main__":
    main()
