For this example an agent won’t be executed, but the given result will be evaluated against the expected output for correctness.
Code
from typing import Optional
from agno.eval.accuracy import AccuracyEval, AccuracyResult
from agno.models.openai import OpenAIChat
evaluation = AccuracyEval(
model=OpenAIChat(id="o4-mini"),
input="What is 10*5 then to the power of 2? do it step by step",
expected_output="2500",
num_iterations=1,
)
result_with_given_answer: Optional[AccuracyResult] = evaluation.run_with_output(
output="2500", print_results=True
)
assert result_with_given_answer is not None and result_with_given_answer.avg_score >= 8