TOP1000 基准评测

{{ data.skills_count }} Skills × {{ data.models|length }} Models · {{ data.generated_at[:10] if data.generated_at != '未知' else '未知' }}

{{ '🎭 Mock 模式' if data.mode == 'mock' else '🔴 Live 模式' }} 🌐 全局总览 📥 API 数据
{% if not data.model_ranking %}
🧪

暂无评测数据

请先运行 BatchBenchmark 生成评测矩阵。

{% else %}
📦 评测规模
{{ data.skills_count }}
Skills × {{ data.models|length }} 模型
共 {{ data.skills_count * (data.models|length) }} 评测单元
🏆 最强模型
{{ data.model_ranking[0].avg_quality_score }}
{{ data.model_ranking[0].model_name }}
成功率 {{ data.model_ranking[0].avg_success_rate }}%
最快模型
{% set fastest = data.model_ranking|sort(attribute='avg_latency_ms')|first %}
{{ fastest.avg_latency_ms|round(0)|int }}ms
{{ fastest.model_name }}
质量 {{ fastest.avg_quality_score }}
💰 性价比之王
{% set cheapest = data.model_ranking|sort(attribute='total_cost_usd')|first %}
${{ cheapest.total_cost_usd }}
{{ cheapest.model_name }}
质量 {{ cheapest.avg_quality_score }}
{% for m in data.model_ranking[:3] %} {% set medals = ['🥇', '🥈', '🥉'] %} {% set medal_classes = ['medal-gold', 'medal-silver', 'medal-bronze'] %}
{{ medals[loop.index0] }}
{{ m.model_name }}
{{ m.avg_quality_score }}
综合质量评分
{{ m.avg_success_rate }}%
成功率
{{ m.avg_latency_ms|round(0)|int }}ms
延迟
${{ m.total_cost_usd }}
总费用
{% endfor %}
{% for m in data.model_ranking %} {% endfor %}
排名模型综合质量成功率平均延迟总费用评测数最强分类
{{ ['🥇','🥈','🥉','4️⃣','5️⃣','6️⃣'][loop.index0] if loop.index0 < 6 else loop.index }} {{ m.model_name }} {{ m.avg_quality_score }} {{ m.avg_success_rate }}% {{ m.avg_latency_ms|round(0)|int }}ms ${{ m.total_cost_usd }} {{ m.skills_evaluated }} {{ m.best_categories[:2]|join(', ') }}
{% for cat, model in data.category_leaders.items() %}
{{ cat }}
{% if data.category_summaries and cat in data.category_summaries %} {{ data.category_summaries[cat].get('total_skills', 0) }} Skills {% endif %}
🏆 {{ model }}
{% endfor %}
📊 模型质量分布
⏱️ 模型延迟分布
🧬 完整评测矩阵
{% endif %}