Copy
Ask AI
"""
Binary Agent-as-Judge Evaluation
================================
Demonstrates pass/fail response quality evaluation.
"""
from agno.agent import Agent
from agno.db.sqlite import SqliteDb
from agno.eval.agent_as_judge import AgentAsJudgeEval
from agno.models.openai import OpenAIChat
# ---------------------------------------------------------------------------
# Create Database
# ---------------------------------------------------------------------------
db = SqliteDb(db_file="tmp/agent_as_judge_binary.db")
# ---------------------------------------------------------------------------
# Create Agent
# ---------------------------------------------------------------------------
agent = Agent(
model=OpenAIChat(id="gpt-4o"),
instructions="You are a customer service agent. Respond professionally.",
db=db,
)
# ---------------------------------------------------------------------------
# Create Evaluation
# ---------------------------------------------------------------------------
evaluation = AgentAsJudgeEval(
name="Professional Tone Check",
criteria="Response must maintain professional tone without informal language or slang",
db=db,
)
# ---------------------------------------------------------------------------
# Run Evaluation
# ---------------------------------------------------------------------------
if __name__ == "__main__":
response = agent.run("I need help with my account")
result = evaluation.run(
input="I need help with my account",
output=str(response.content),
print_results=True,
print_summary=True,
)
print(f"Result: {'PASSED' if result.results[0].passed else 'FAILED'}")
Run the Example
Copy
Ask AI
# Clone and setup repo
git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/09_evals/agent_as_judge
# Create and activate virtual environment
./scripts/demo_setup.sh
source .venvs/demo/bin/activate
python agent_as_judge_binary.py