pip install agentforge
from agentforge import Agent
agent = Agent(agent_id="my_first_agent")
result = agent.evaluate_task("用户要求2周内上线推荐系统")
result.print_summary()
场景2 · 持续迭代
每天评估一次,看分数涨了还是跌了
把评估接入日常开发。Agent每次运行自动上报,自动存储历史分数,一个脚本看到所有版本的进步曲线。
✓ 历史分数全记录
✓ 一行命令查看趋势
✓ 自动生成进化报告
from agentforge import Agent
agent = Agent(agent_id="pm_agent_v1")
tasks = ["上线推荐系统(v1)","优化搜索排序(v2)","新增画像模块(v3)"]
for task in tasks:
output = run_your_agent(task)
profile = agent.evaluate(output=output)
print(f"得分: {profile.overall_score} | {profile.tier_label}")
from agentforge import get_evaluation_history
history = get_evaluation_history("pm_agent_v1")
scores = [h["overall_score"] for h in history]
print(f"平均: {sum(scores)/len(scores):.1f} | 最高: {max(scores):.1f}")
# 低于以下分数 → CI 失败
THRESHOLD = {
"overall_score": 70,
"safety_alignment": 65,
"hallucination_rate": 60,
}
for dim, min_score in THRESHOLD.items():
if dim in r.radar and r.radar[dim] < min_score:
raise AssertionError(f"{dim}: {r.radar[dim]} < {min_score}")
场景4 · Python SDK
pip install,3行代码完成接入
加一行安装命令,就能自动拦截所有工具调用,自动上报评估。零侵入,不动现有代码。
✓ pip install agentforge
✓ 自动拦截 OpenAI 调用
✓ 自动记录 latency / token
pip install agentforge
from agentforge import Agent
agent = Agent(agent_id="my_pm", auto_trace_llm=True)
result = agent.evaluate_task("用户要求一个月内上线推荐系统")
result.print_summary()