"""
QuantClaw Tape Analysis

Ported from BebBot V3 — battle-tested computations preserved exactly.
Adapted from streaming trades to working with a single batch from REST API.
Pure functions: list of trade dicts in, dict of features out.
"""

import bisect
import numpy as np
from typing import Dict, List, Any, Optional


def compute_tape(trades: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Calculate tape/flow metrics from a batch of recent trades.

    Expects trades sorted by time (ascending).  Each trade dict has keys:
        time  — epoch milliseconds
        price — trade price
        qty   — trade size
        side  — "Buy" or "Sell"

    Returns:
        Dict with tape metrics: CVD, slopes, buy/sell ratio, flow imbalance,
        large trade ratio, bucket-based z-scores, and CVD acceleration
        (approximated from bucket slopes since we have no prior call state).
    """
    if not trades or len(trades) < 10:
        return _empty_tape_metrics()

    features: Dict[str, Any] = {}

    # Get current time
    current_time = trades[-1].get("time", 0)

    # =========================================================================
    # O(n) SINGLE PASS: Build running totals for all subsequent O(log n) lookups
    # =========================================================================
    n = len(trades)
    trade_times = [0] * n
    running_cvd = [0.0] * n
    running_buy_vol = [0.0] * n
    running_sell_vol = [0.0] * n

    cvd = 0.0
    buy_vol_total = 0.0
    sell_vol_total = 0.0

    for i, t in enumerate(trades):
        trade_times[i] = t.get("time", 0)
        qty = float(t.get("qty", 0))
        side = t.get("side", "")

        if side == "Buy":
            cvd += qty
            buy_vol_total += qty
        else:
            cvd -= qty
            sell_vol_total += qty

        running_cvd[i] = cvd
        running_buy_vol[i] = buy_vol_total
        running_sell_vol[i] = sell_vol_total

    features["tape.cvd"] = cvd

    # =========================================================================
    # O(log n) PER WINDOW: CVD slopes using binary search
    # =========================================================================
    window_key_map = {
        30: "tape.cvd_30s_slope",
        120: "tape.cvd_120s_slope",
        300: "tape.cvd_5m_slope",
        1500: "tape.cvd_25m_slope",
        3600: "tape.cvd_1h_slope",
        6000: "tape.cvd_100m_slope",
        14400: "tape.cvd_4h_slope",
    }

    for window_seconds in [30, 120, 300, 1500, 3600, 6000, 14400]:
        cutoff_time = current_time - (window_seconds * 1000)
        idx = bisect.bisect_left(trade_times, cutoff_time)
        cvd_before = running_cvd[idx - 1] if idx > 0 else 0.0
        cvd_slope = (cvd - cvd_before) / window_seconds
        features[window_key_map[window_seconds]] = cvd_slope

    # =========================================================================
    # O(log n): 60s window features using binary search
    # =========================================================================
    cutoff_60s = current_time - 60000
    idx_60s = bisect.bisect_left(trade_times, cutoff_60s)

    buy_volume_60s = buy_vol_total - (running_buy_vol[idx_60s - 1] if idx_60s > 0 else 0.0)
    sell_volume_60s = sell_vol_total - (running_sell_vol[idx_60s - 1] if idx_60s > 0 else 0.0)

    if sell_volume_60s > 0:
        buy_sell_ratio = buy_volume_60s / sell_volume_60s
    else:
        buy_sell_ratio = 10.0 if buy_volume_60s > 0 else 1.0

    features["tape.buy_sell_ratio_60s"] = buy_sell_ratio

    # Flow imbalance 60s
    total_volume_60s = buy_volume_60s + sell_volume_60s
    if total_volume_60s > 0:
        flow_imbalance = (buy_volume_60s - sell_volume_60s) / total_volume_60s
    else:
        flow_imbalance = 0

    features["tape.flow_imbalance_60s"] = flow_imbalance

    # Large trade ratio 60s — need to iterate slice for individual trade sizes
    trades_60s_count = n - idx_60s
    if trades_60s_count > 0:
        avg_size = total_volume_60s / trades_60s_count
        large_threshold = avg_size * 2
        large_count = sum(1 for t in trades[idx_60s:] if t.get("qty", 0) > large_threshold)
        large_trade_ratio = large_count / trades_60s_count
    else:
        large_trade_ratio = 0

    features["tape.large_trade_ratio_60s"] = large_trade_ratio

    # =========================================================================
    # O(20 log n): Bucket features using binary search
    # =========================================================================
    bucket_size_ms = 5 * 60 * 1000  # 5 minutes
    num_buckets = 20
    buckets = []

    for i in range(num_buckets):
        bucket_end = current_time - (i * bucket_size_ms)
        bucket_start = bucket_end - bucket_size_ms

        idx_start = bisect.bisect_left(trade_times, bucket_start)
        idx_end = bisect.bisect_left(trade_times, bucket_end)

        # Volume in bucket = running total at end - running total at start
        buy_vol = (running_buy_vol[idx_end - 1] if idx_end > 0 else 0.0) - \
                  (running_buy_vol[idx_start - 1] if idx_start > 0 else 0.0)
        sell_vol = (running_sell_vol[idx_end - 1] if idx_end > 0 else 0.0) - \
                   (running_sell_vol[idx_start - 1] if idx_start > 0 else 0.0)
        total_vol = buy_vol + sell_vol
        trade_count = idx_end - idx_start

        buckets.append({
            "buy_vol": buy_vol,
            "sell_vol": sell_vol,
            "total_vol": total_vol,
            "trade_count": trade_count,
            "cvd": buy_vol - sell_vol,
        })

    # Reverse so index 0 is oldest, index -1 is most recent
    buckets = list(reversed(buckets))

    # Only compute if we have enough data (at least 5 buckets with trades)
    buckets_with_data = sum(1 for b in buckets if b["total_vol"] > 0)

    if buckets_with_data >= 5:
        # --- CVD z-score over 20 bars ---
        cvd_values = [b["cvd"] for b in buckets if b["total_vol"] > 0]
        if len(cvd_values) >= 5:
            cumulative_cvd = np.cumsum(cvd_values)
            cvd_mean = np.mean(cumulative_cvd)
            cvd_std = np.std(cumulative_cvd)
            if cvd_std > 0:
                features["cvd_zscore_20"] = float((cumulative_cvd[-1] - cvd_mean) / cvd_std)
            else:
                features["cvd_zscore_20"] = 0.0
        else:
            features["cvd_zscore_20"] = None

        # --- Buy/sell ratio MA5 ---
        recent_5 = buckets[-5:]
        buy_vol_5 = sum(b["buy_vol"] for b in recent_5)
        sell_vol_5 = sum(b["sell_vol"] for b in recent_5)
        if sell_vol_5 > 0:
            features["buy_sell_ratio_ma5"] = float(buy_vol_5 / sell_vol_5)
        else:
            features["buy_sell_ratio_ma5"] = 10.0 if buy_vol_5 > 0 else 1.0

        # --- Volume imbalance MA5 ---
        total_vol_5 = buy_vol_5 + sell_vol_5
        if total_vol_5 > 0:
            features["volume_imbalance_ma5"] = float((buy_vol_5 - sell_vol_5) / total_vol_5)
        else:
            features["volume_imbalance_ma5"] = 0.0

        # --- Buy volume percentage ---
        if total_vol_5 > 0:
            features["buy_volume_pct"] = float(buy_vol_5 / total_vol_5)
        else:
            features["buy_volume_pct"] = 0.5

        # --- Trade count z-score over 20 bars ---
        trade_counts = [b["trade_count"] for b in buckets]
        tc_mean = np.mean(trade_counts)
        tc_std = np.std(trade_counts)
        if tc_std > 0:
            features["trade_count_zscore_20"] = float((trade_counts[-1] - tc_mean) / tc_std)
        else:
            features["trade_count_zscore_20"] = 0.0

        # --- Trade count MA ratio ---
        tc_ma = np.mean(trade_counts) if trade_counts else 1
        if tc_ma > 0:
            features["trade_count_ma_ratio"] = float(trade_counts[-1] / tc_ma)
        else:
            features["trade_count_ma_ratio"] = 1.0
    else:
        # Not enough data
        features["cvd_zscore_20"] = None
        features["buy_sell_ratio_ma5"] = None
        features["volume_imbalance_ma5"] = None
        features["buy_volume_pct"] = None
        features["trade_count_zscore_20"] = None
        features["trade_count_ma_ratio"] = None

    # =========================================================================
    # CVD Acceleration — approximate from bucket-level slopes
    # (BebBot used cross-call slope history; here we derive from the batch)
    # =========================================================================
    if buckets_with_data >= 5:
        bucket_cvds = [b["cvd"] for b in buckets]
        cum_cvd = np.cumsum(bucket_cvds)

        # Short-term slope (last 2 buckets = ~10 min)
        if len(cum_cvd) >= 2:
            slope_short = float(cum_cvd[-1] - cum_cvd[-2])
        else:
            slope_short = 0.0

        # Medium-term slope (last 5 buckets = ~25 min)
        if len(cum_cvd) >= 5:
            slope_medium = float((cum_cvd[-1] - cum_cvd[-5]) / 4)
        else:
            slope_medium = 0.0

        # Long-term slope (last 10 buckets = ~50 min)
        if len(cum_cvd) >= 10:
            slope_long = float((cum_cvd[-1] - cum_cvd[-10]) / 9)
        else:
            slope_long = 0.0

        # Acceleration = short slope - long slope
        features["tape.cvd_accel_30s"] = slope_short - slope_medium
        features["tape.cvd_accel_2m"] = slope_medium - slope_long
        features["tape.cvd_accel_5m"] = slope_short - slope_long
    else:
        features["tape.cvd_accel_30s"] = None
        features["tape.cvd_accel_2m"] = None
        features["tape.cvd_accel_5m"] = None

    return features


def compute_bar_aligned_features(
    trades: List[Dict[str, Any]],
    klines: Dict[str, List[Dict[str, Any]]],
) -> Dict[str, float]:
    """
    Compute bar-aligned CVD/volume features for each timeframe.

    Uses kline open times as canonical bar boundaries.

    Args:
        trades: List of trade dicts with keys: time, price, qty, side
        klines: Dict of timeframe -> list of kline dicts with keys:
                time, open, high, low, close, volume

    Returns:
        Dict of bar-aligned features with timeframe suffixes.
    """
    features: Dict[str, float] = {}

    if not trades or len(trades) < 10:
        return features

    timeframes = ["5m", "15m", "30m", "1h", "4h", "D"]

    # Trades should arrive sorted by time
    sorted_trades = trades
    trade_times = [t.get("time", 0) for t in sorted_trades]

    def get_bar_volumes(start_time: int, end_time: int):
        """Get buy/sell volumes for a time range using binary search."""
        left = bisect.bisect_left(trade_times, start_time)
        right = bisect.bisect_left(trade_times, end_time)
        buy_vol = 0.0
        sell_vol = 0.0
        for t in sorted_trades[left:right]:
            qty = float(t.get("qty", 0))
            if t.get("side") == "Buy":
                buy_vol += qty
            else:
                sell_vol += qty
        return buy_vol, sell_vol

    for tf in timeframes:
        kline_list = klines.get(tf, [])
        if len(kline_list) < 25:
            continue

        bar_times = [k["time"] for k in kline_list]

        # Aggregate trades into bars using O(log n) binary search
        bar_cvds = []
        bar_buy_vols = []
        bar_sell_vols = []

        for i in range(len(bar_times) - 1):
            bar_start = bar_times[i]
            bar_end = bar_times[i + 1]
            buy_vol, sell_vol = get_bar_volumes(bar_start, bar_end)
            bar_cvds.append(buy_vol - sell_vol)
            bar_buy_vols.append(buy_vol)
            bar_sell_vols.append(sell_vol)

        # Also aggregate the last (current) bar
        if bar_times:
            bar_start = bar_times[-1]
            left = bisect.bisect_left(trade_times, bar_start)
            buy_vol = 0.0
            sell_vol = 0.0
            for t in sorted_trades[left:]:
                qty = float(t.get("qty", 0))
                if t.get("side") == "Buy":
                    buy_vol += qty
                else:
                    sell_vol += qty

            bar_cvds.append(buy_vol - sell_vol)
            bar_buy_vols.append(buy_vol)
            bar_sell_vols.append(sell_vol)

        if len(bar_cvds) < 25:
            continue

        # Cumulative CVD across bars
        cvd_arr = np.cumsum(bar_cvds)

        # --- CVD slopes ---
        if len(cvd_arr) >= 6:
            features[f"cvd_slope_5_{tf}"] = float((cvd_arr[-1] - cvd_arr[-6]) / 5)
        if len(cvd_arr) >= 21:
            features[f"cvd_slope_20_{tf}"] = float((cvd_arr[-1] - cvd_arr[-21]) / 20)

        # CVD acceleration = short-term slope - long-term slope
        if f"cvd_slope_5_{tf}" in features and f"cvd_slope_20_{tf}" in features:
            features[f"cvd_acceleration_{tf}"] = features[f"cvd_slope_5_{tf}"] - features[f"cvd_slope_20_{tf}"]

        # --- CVD z-score over 20 bars ---
        if len(cvd_arr) >= 20:
            cvd_mean = np.mean(cvd_arr[-20:])
            cvd_std = np.std(cvd_arr[-20:])
            if cvd_std > 0:
                features[f"cvd_zscore_20_{tf}"] = float((cvd_arr[-1] - cvd_mean) / cvd_std)

        # --- Buy/sell volume features (current bar) ---
        buy = bar_buy_vols[-1]
        sell = bar_sell_vols[-1]
        if sell > 0:
            features[f"buy_sell_ratio_{tf}"] = float(buy / sell)
        total = buy + sell
        if total > 0:
            features[f"volume_imbalance_{tf}"] = float((buy - sell) / total)
            features[f"buy_volume_pct_{tf}"] = float(buy / total)

        # --- 5-bar moving averages ---
        if len(bar_buy_vols) >= 5:
            buy_ma5 = np.mean(bar_buy_vols[-5:])
            sell_ma5 = np.mean(bar_sell_vols[-5:])
            if sell_ma5 > 0:
                features[f"buy_sell_ratio_ma5_{tf}"] = float(buy_ma5 / sell_ma5)
            total_ma5 = buy_ma5 + sell_ma5
            if total_ma5 > 0:
                features[f"volume_imbalance_ma5_{tf}"] = float((buy_ma5 - sell_ma5) / total_ma5)

    return features


def _empty_tape_metrics() -> Dict[str, Any]:
    """Return dict with None values for insufficient trades."""
    return {
        "tape.cvd": None,
        "tape.cvd_30s_slope": None,
        "tape.cvd_120s_slope": None,
        "tape.cvd_5m_slope": None,
        "tape.cvd_25m_slope": None,
        "tape.cvd_1h_slope": None,
        "tape.cvd_100m_slope": None,
        "tape.cvd_4h_slope": None,
        "tape.buy_sell_ratio_60s": None,
        "tape.flow_imbalance_60s": None,
        "tape.large_trade_ratio_60s": None,
        "tape.cvd_accel_30s": None,
        "tape.cvd_accel_2m": None,
        "tape.cvd_accel_5m": None,
        "cvd_zscore_20": None,
        "buy_sell_ratio_ma5": None,
        "volume_imbalance_ma5": None,
        "buy_volume_pct": None,
        "trade_count_zscore_20": None,
        "trade_count_ma_ratio": None,
    }
