This example demonstrates how to create an agent that can transcribe audio conversations, identifying different speakers and providing accurate transcriptions.
import requestsfrom agno.agent import Agentfrom agno.media import Audiofrom agno.models.google import Geminiagent = Agent( model=Gemini(id="gemini-2.0-flash-exp"), markdown=True,)url = "https://agno-public.s3.us-east-1.amazonaws.com/demo_data/QA-01.mp3"response = requests.get(url)audio_content = response.content# Give a transcript of this audio conversation. Use speaker A, speaker B to identify speakers.agent.print_response( "Give a transcript of this audio conversation. Use speaker A, speaker B to identify speakers.", audio=[Audio(content=audio_content)], stream=True,)