Learn how to evaluate agent accuracy on comparison tasks.
from typing import Optional
from agno.agent import Agent
from agno.eval.accuracy import AccuracyEval, AccuracyResult
from agno.models.openai import OpenAIChat
from agno.tools.calculator import CalculatorTools
evaluation = AccuracyEval(
name="Comparison Evaluation",
model=OpenAIChat(id="o4-mini"),
agent=Agent(
model=OpenAIChat(id="gpt-5-mini"),
tools=[CalculatorTools()],
instructions="You must use the calculator tools for comparisons.",
),
input="9.11 and 9.9 -- which is bigger?",
expected_output="9.9",
additional_guidelines="Its ok for the output to include additional text or information relevant to the comparison.",
)
result: Optional[AccuracyResult] = evaluation.run(print_results=True)
assert result is not None and result.avg_score >= 8