Copy
Ask AI
"""
Custom Evaluator Agent-as-Judge Evaluation
==========================================
Demonstrates using a custom evaluator agent for judging.
"""
from agno.agent import Agent
from agno.eval.agent_as_judge import AgentAsJudgeEval
from agno.models.openai import OpenAIChat
# ---------------------------------------------------------------------------
# Create Agent
# ---------------------------------------------------------------------------
agent = Agent(
model=OpenAIChat(id="gpt-4o"),
instructions="Explain technical concepts simply.",
)
# ---------------------------------------------------------------------------
# Create Evaluator Agent
# ---------------------------------------------------------------------------
custom_evaluator = Agent(
model=OpenAIChat(id="gpt-4o"),
description="Strict technical evaluator",
instructions="You are a strict evaluator. Only give high scores to exceptionally clear and accurate explanations.",
)
# ---------------------------------------------------------------------------
# Create Evaluation
# ---------------------------------------------------------------------------
evaluation = AgentAsJudgeEval(
name="Technical Accuracy",
criteria="Explanation must be technically accurate and comprehensive",
scoring_strategy="numeric",
threshold=8,
evaluator_agent=custom_evaluator,
)
# ---------------------------------------------------------------------------
# Run Evaluation
# ---------------------------------------------------------------------------
if __name__ == "__main__":
response = agent.run("What is machine learning?")
result = evaluation.run(
input="What is machine learning?",
output=str(response.content),
print_results=True,
)
print(f"Score: {result.results[0].score}/10")
print(f"Passed: {result.results[0].passed}")
Run the Example
Copy
Ask AI
# Clone and setup repo
git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/09_evals/agent_as_judge
# Create and activate virtual environment
./scripts/demo_setup.sh
source .venvs/demo/bin/activate
python agent_as_judge_custom_evaluator.py