Code

from typing import Optional

from agno.agent import Agent
from agno.eval.reliability import ReliabilityEval, ReliabilityResult
from agno.models.openai import OpenAIChat
from agno.run.team import TeamRunResponse
from agno.team.team import Team
from agno.tools.duckduckgo import DuckDuckGoTools

team_member = Agent(
    name="Research Analyst",
    model=OpenAIChat("gpt-4o"),
    role="Searches the web for information on research topics.",
    tools=[DuckDuckGoTools(search=True, news=True)],
)

team = Team(
    name="Research Analysis Team",
    model=OpenAIChat("gpt-4o"),
    members=[team_member],
    markdown=True,
    show_members_responses=True,
)

expected_tool_calls = [
    "transfer_task_to_member",  # Tool call used to transfer a task to a Team member
    "duckduckgo_search",  # Tool call used to search for information
]


def evaluate_team_reliability():
    response: TeamRunResponse = team.run("What are the latest developments in artificial intelligence?")
    evaluation = ReliabilityEval(
        team_response=response,
        expected_tool_calls=expected_tool_calls,
    )
    result: Optional[ReliabilityResult] = evaluation.run(print_results=True)
    result.assert_passed()


if __name__ == "__main__":
    evaluate_team_reliability()