#!/usr/bin/env python3
"""
Market Oracle — Event Impact Analyzer
Fetches news + market data, builds a structured context for three-layer impact analysis.
This script collects all data and outputs a structured analysis prompt/context
that the AI agent uses to generate the final prediction.
"""

import argparse
import json
import os
import subprocess
import sys
import urllib.request
from datetime import datetime
from html.parser import HTMLParser


class HTMLTextExtractor(HTMLParser):
    def __init__(self):
        super().__init__()
        self._result = []
        self._skip = False

    def handle_starttag(self, tag, attrs):
        if tag in ('script', 'style', 'nav', 'footer', 'header'):
            self._skip = True

    def handle_endtag(self, tag):
        if tag in ('script', 'style', 'nav', 'footer', 'header'):
            self._skip = False

    def handle_data(self, data):
        if not self._skip:
            text = data.strip()
            if text:
                self._result.append(text)

    def get_text(self):
        return '\n'.join(self._result)


def extract_text_from_url(url, timeout=15):
    """Extract main text content from a URL."""
    req = urllib.request.Request(url, headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
                       'AppleWebKit/537.36 (KHTML, like Gecko) '
                       'Chrome/131.0.0.0 Safari/537.36'
    })
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            html = resp.read().decode('utf-8', errors='replace')
        extractor = HTMLTextExtractor()
        extractor.feed(html)
        text = extractor.get_text()
        # Truncate to reasonable length
        return text[:3000] if len(text) > 3000 else text
    except Exception as e:
        return f"[无法提取URL内容: {e}]"


def run_tool(script_name, args_list):
    """Run a sibling tool script and capture JSON output."""
    base_dir = os.path.dirname(os.path.abspath(__file__))
    script_path = os.path.join(base_dir, script_name)

    cmd = [sys.executable, script_path] + args_list + ['--json']
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        if result.returncode == 0 and result.stdout.strip():
            return json.loads(result.stdout)
        else:
            print(f"[WARN] {script_name} stderr: {result.stderr[:200]}", file=sys.stderr)
            return None
    except subprocess.TimeoutExpired:
        print(f"[WARN] {script_name} timed out", file=sys.stderr)
        return None
    except (json.JSONDecodeError, FileNotFoundError) as e:
        print(f"[WARN] {script_name} error: {e}", file=sys.stderr)
        return None


# Asset correlation knowledge base
CORRELATION_MAP = {
    'gold': {
        'positive': ['silver', 'platinum', 'usdjpy_inverse'],
        'negative': ['usdx', 'spy'],
        'description': '黄金与美元指数负相关，与白银正相关，避险情绪推动上涨'
    },
    'oil': {
        'positive': ['brent', 'natgas', 'energy_stocks'],
        'negative': ['airline_stocks', 'consumer_discretionary'],
        'description': '原油上涨推高通胀预期，利空航空和消费板块，利多能源板块'
    },
    'btc': {
        'positive': ['eth', 'sol', 'risk_appetite'],
        'negative': ['usdx', 'bonds'],
        'description': '加密货币与风险偏好正相关，美元走强时承压'
    },
    'spy': {
        'positive': ['qqq', 'dia', 'risk_appetite'],
        'negative': ['gold', 'vix', 'bonds'],
        'description': '美股上涨反映风险偏好，与避险资产负相关'
    }
}

# Event type classification keywords
EVENT_TYPES = {
    '央行/货币政策': ['降息', '加息', '美联储', 'Fed', 'ECB', '央行', '利率', 'rate', '货币政策',
                      'QE', '量化宽松', 'taper', '缩表'],
    '地缘政治': ['战争', '冲突', '制裁', '军事', '导弹', '攻击', '入侵', 'war', 'sanction',
                  '中东', '俄罗斯', '乌克兰', '台海', '朝鲜'],
    '能源/OPEC': ['OPEC', '减产', '增产', '石油', '原油', '油价', '管道', '炼油', '产油国'],
    '加密货币监管': ['SEC', '监管', '合规', 'ETF', '比特币ETF', '交易所', '稳定币', '币安',
                      'Coinbase', 'regulation', 'crypto ban'],
    '宏观经济数据': ['GDP', 'CPI', 'PPI', '非农', '就业', '失业率', 'PMI', '通胀', '零售',
                      '消费', 'inflation', 'employment', 'payroll'],
    '企业/行业': ['财报', '业绩', '营收', '利润', '裁员', '并购', '收购', 'IPO', '回购',
                    'earnings', 'revenue', 'merger', 'acquisition'],
    '自然灾害/供应链': ['地震', '飓风', '洪水', '干旱', '供应链', '芯片', '短缺', '港口',
                         'supply chain', 'shortage'],
}


def classify_event(event_text):
    """Classify event type based on keywords."""
    matched_types = []
    text_lower = event_text.lower()
    for event_type, keywords in EVENT_TYPES.items():
        for kw in keywords:
            if kw.lower() in text_lower:
                matched_types.append(event_type)
                break
    return matched_types if matched_types else ['其他/综合']


def identify_affected_assets(event_text):
    """Identify which assets are most likely affected by the event."""
    text_lower = event_text.lower()
    affected = []

    asset_keywords = {
        'gold': ['黄金', 'gold', '避险', '贵金属'],
        'silver': ['白银', 'silver'],
        'oil': ['原油', '石油', 'oil', 'OPEC', '减产', '增产', '能源'],
        'btc': ['比特币', 'bitcoin', 'BTC', '加密', 'crypto', '数字货币', '币'],
        'eth': ['以太坊', 'ethereum', 'ETH'],
        'spy': ['美股', 'S&P', '标普', '股市', '股票', 'stock'],
        'qqq': ['科技股', '纳斯达克', 'nasdaq', 'tech'],
        'usdx': ['美元', 'dollar', 'USD', '汇率'],
    }

    for asset, keywords in asset_keywords.items():
        for kw in keywords:
            if kw.lower() in text_lower:
                affected.append(asset)
                break

    # If no specific assets detected, include all major ones
    if not affected:
        affected = ['gold', 'oil', 'btc', 'spy']

    return list(set(affected))


def build_analysis_context(event_text, market_data, news_data, focus_assets):
    """Build a structured context for the AI to analyze."""

    event_types = classify_event(event_text)
    affected_assets = identify_affected_assets(event_text)

    # Merge focus with detected assets
    if focus_assets:
        all_assets = list(set(focus_assets + affected_assets))
    else:
        all_assets = affected_assets

    # Build correlation context
    correlations = []
    for asset in all_assets:
        if asset in CORRELATION_MAP:
            correlations.append(f"  - {asset}: {CORRELATION_MAP[asset]['description']}")

    context = {
        'event': {
            'text': event_text,
            'types': event_types,
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        },
        'affected_assets': all_assets,
        'market_snapshot': market_data.get('assets', {}) if market_data else {},
        'related_news': [
            {'title': a['title'], 'source': a['source'], 'published': a['published']}
            for a in (news_data.get('articles', []) if news_data else [])[:8]
        ],
        'correlations': {
            asset: CORRELATION_MAP.get(asset, {})
            for asset in all_assets if asset in CORRELATION_MAP
        },
        'analysis_framework': {
            'short_term': '立刻 — 1小时内: 直接市场反应、情绪冲击、成交量异动',
            'medium_term': '1-12小时: 二次传导、跨市场联动、后续新闻事件',
            'long_term': '12-24小时: 新均衡价格、政策回应、衍生事件预测',
        }
    }

    return context


def format_text_report(context):
    """Format the analysis context as a readable text report."""
    lines = []
    lines.append("═" * 70)
    lines.append(f"📰 事件: {context['event']['text']}")
    lines.append(f"⏰ 分析时间: {context['event']['timestamp']}")
    lines.append(f"🏷️  事件分类: {', '.join(context['event']['types'])}")
    lines.append(f"🎯 影响资产: {', '.join(context['affected_assets'])}")
    lines.append("═" * 70)

    # Market snapshot
    market = context.get('market_snapshot', {})
    if market:
        lines.append("\n📊 当前市场快照")
        lines.append("─" * 70)
        lines.append(f"  {'资产':<20} {'价格':>12} {'涨跌':>12} {'趋势'}")
        lines.append(f"  {'─' * 64}")

        for key, data in market.items():
            if isinstance(data, dict) and 'price' in data:
                name = data.get('display_name', key)
                price = data['price']
                pct = data.get('change_pct', 0)

                if price >= 10000:
                    price_str = f"${price:,.0f}"
                elif price >= 1:
                    price_str = f"${price:,.2f}"
                else:
                    price_str = f"${price:.6f}"

                if pct > 0:
                    change_str = f"🟢 +{pct:.2f}%"
                elif pct < 0:
                    change_str = f"🔴 {pct:.2f}%"
                else:
                    change_str = f"⚪ {pct:.2f}%"

                trend = data.get('trend', '')
                lines.append(f"  {name:<20} {price_str:>12} {change_str:>12} {trend}")

    # Related news
    news = context.get('related_news', [])
    if news:
        lines.append(f"\n📰 相关新闻 (最近 {len(news)} 条)")
        lines.append("─" * 70)
        for i, n in enumerate(news, 1):
            lines.append(f"  [{i}] {n['title']}")
            lines.append(f"      来源: {n['source']} | {n['published']}")

    # Correlations
    corr = context.get('correlations', {})
    if corr:
        lines.append("\n🔗 资产关联性")
        lines.append("─" * 70)
        for asset, info in corr.items():
            desc = info.get('description', '')
            if desc:
                lines.append(f"  • {asset}: {desc}")

    # Analysis framework
    lines.append("\n" + "═" * 70)
    lines.append("📋 三层影响分析框架")
    lines.append("═" * 70)
    lines.append("")
    lines.append("🔴 短期影响 (立刻 — 1小时内)")
    lines.append("   分析维度: 直接价格反应 | 情绪冲击 | 成交量异动 | 即时关联资产联动")
    lines.append("   → 基于事件类型和历史模式，预测各资产的即时方向和幅度")
    lines.append("")
    lines.append("🟡 中期影响 (1 — 12小时)")
    lines.append("   分析维度: 二次传导效应 | 跨市场蔓延 | 机构仓位调整 | 后续新闻事件")
    lines.append("   → 预测哪些看似无关的市场会被波及，以及可能出现的后续新闻")
    lines.append("")
    lines.append("🟢 长期影响 (12 — 24小时)")
    lines.append("   分析维度: 新均衡价格区间 | 政策回应 | 供应链效应 | 板块轮动 | 衍生事件")
    lines.append("   → 预测事件会催生哪些新事件，形成怎样的连锁反应链")
    lines.append("")
    lines.append("⚡ 关联链: [原始事件] → [直接冲击] → [二次传导] → [衍生事件]")
    lines.append("")
    lines.append("⚠️  风险提示: 以上分析仅供参考，不构成投资建议。市场受多重因素影响，")
    lines.append("   任何单一事件的影响都可能被其他因素对冲或放大。")

    return '\n'.join(lines)


def main():
    parser = argparse.ArgumentParser(description='Event impact analysis for financial markets')
    parser.add_argument('--event', '-e', help='Event description text')
    parser.add_argument('--url', '-u', help='News article URL to analyze')
    parser.add_argument('--focus', '-f', help='Comma-separated asset focus (e.g., gold,oil,btc)')
    parser.add_argument('--market-data', '-m', default='auto',
                        help='"auto" to fetch live data, or path to saved JSON')
    parser.add_argument('--output', '-o', default='text', choices=['text', 'json'],
                        help='Output format (default: text)')
    parser.add_argument('--json', action='store_true', help='Alias for --output json')
    parser.add_argument('--skip-news', action='store_true', help='Skip news fetching')
    parser.add_argument('--skip-market', action='store_true', help='Skip market data fetching')

    args = parser.parse_args()

    if args.json:
        args.output = 'json'

    # Get event text
    event_text = args.event or ''
    if args.url:
        print(f"📥 提取URL内容: {args.url}", file=sys.stderr)
        url_content = extract_text_from_url(args.url)
        if event_text:
            event_text = event_text + '\n\n原文摘要:\n' + url_content
        else:
            event_text = url_content

    if not event_text:
        print("ERROR: 请提供 --event 或 --url 参数", file=sys.stderr)
        sys.exit(1)

    # Parse focus assets
    focus_assets = []
    if args.focus:
        focus_assets = [a.strip().lower() for a in args.focus.split(',')]

    # Fetch market data
    market_data = None
    if not args.skip_market:
        if args.market_data == 'auto':
            print("📊 获取实时市场数据...", file=sys.stderr)
            # Determine which assets to fetch
            detected = identify_affected_assets(event_text)
            fetch_assets = list(set(detected + focus_assets)) if focus_assets else detected
            if not fetch_assets:
                fetch_assets = ['gold', 'oil', 'btc', 'spy']
            # Always include some core reference assets
            for core in ['gold', 'oil', 'btc', 'spy', 'usdx']:
                if core not in fetch_assets:
                    fetch_assets.append(core)

            asset_str = ','.join(fetch_assets)
            market_data = run_tool('market_data.py', ['--assets', asset_str, '--period', '1d', '--interval', '15m'])
        elif os.path.isfile(args.market_data):
            with open(args.market_data, 'r') as f:
                market_data = json.load(f)

    # Fetch related news
    news_data = None
    if not args.skip_news:
        print("📰 获取相关新闻...", file=sys.stderr)
        # Extract key terms for news search
        search_terms = event_text[:50]  # Use first 50 chars as search query
        news_data = run_tool('news_fetch.py', ['--query', search_terms, '--limit', '8'])

    # Build analysis context
    context = build_analysis_context(event_text, market_data, news_data, focus_assets)

    # Output
    if args.output == 'json':
        print(json.dumps(context, ensure_ascii=False, indent=2))
    else:
        print(format_text_report(context))


if __name__ == '__main__':
    main()
