Copy
Ask AI
"""
Tool-Using Agent-as-Judge Evaluation
====================================
Demonstrates judging responses generated by an agent using tools.
"""
from typing import Optional
from agno.agent import Agent
from agno.eval.agent_as_judge import AgentAsJudgeEval, AgentAsJudgeResult
from agno.models.openai import OpenAIChat
from agno.tools.calculator import CalculatorTools
# ---------------------------------------------------------------------------
# Create Agent
# ---------------------------------------------------------------------------
agent = Agent(
model=OpenAIChat(id="gpt-4o"),
tools=[CalculatorTools()],
instructions="Use the calculator tools to solve math problems. Explain your reasoning and show calculation steps clearly.",
)
# ---------------------------------------------------------------------------
# Create Evaluation
# ---------------------------------------------------------------------------
evaluation = AgentAsJudgeEval(
name="Calculator Tool Usage Quality",
model=OpenAIChat(id="gpt-5.2"),
criteria="Response should clearly explain the calculation process, show intermediate steps, and present the final answer in a user-friendly way",
scoring_strategy="numeric",
threshold=7,
)
# ---------------------------------------------------------------------------
# Run Evaluation
# ---------------------------------------------------------------------------
if __name__ == "__main__":
response = agent.run("What is 15 * 23 + 47?")
result: Optional[AgentAsJudgeResult] = evaluation.run(
input="What is 15 * 23 + 47?",
output=str(response.content),
print_results=True,
)
assert result is not None, "Evaluation should return a result"
Run the Example
Copy
Ask AI
# Clone and setup repo
git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/09_evals/agent_as_judge
# Create and activate virtual environment
./scripts/demo_setup.sh
source .venvs/demo/bin/activate
python agent_as_judge_with_tools.py