from typing import Optionalfrom agno.agent import Agentfrom agno.eval.accuracy import AccuracyEval, AccuracyResultfrom agno.models.openai import OpenAIChatfrom agno.tools.calculator import CalculatorToolsevaluation = AccuracyEval( name="Calculator Evaluation", model=OpenAIChat(id="o4-mini"), agent=Agent( model=OpenAIChat(id="gpt-5-mini"), tools=[CalculatorTools()], ), input="What is 10*5 then to the power of 2? do it step by step", expected_output="2500", additional_guidelines="Agent output should include the steps and the final answer.", num_iterations=3,)result: Optional[AccuracyResult] = evaluation.run(print_results=True)assert result is not None and result.avg_score >= 8