Skip to main content
NVIDIA provides high-performance inference for Llama, Nemotron, and other models.
from agno.agent import Agent
from agno.models.nvidia import Nvidia

agent = Agent(
    model=Nvidia(id="meta/llama-3.3-70b-instruct"),
    markdown=True,
)

agent.print_response("Explain GPU acceleration", stream=True)

Tool Use

from agno.agent import Agent
from agno.models.nvidia import Nvidia
from agno.tools.yfinance import YFinanceTools

agent = Agent(
    model=Nvidia(id="meta/llama-3.3-70b-instruct"),
    tools=[YFinanceTools(stock_price=True)],
    markdown=True,
)

agent.print_response("What's NVDA's stock price?", stream=True)

Structured Output

from pydantic import BaseModel, Field
from agno.agent import Agent
from agno.models.nvidia import Nvidia

class Summary(BaseModel):
    title: str = Field(..., description="Title")
    key_points: list[str] = Field(..., description="Key points")

agent = Agent(
    model=Nvidia(id="meta/llama-3.3-70b-instruct"),
    output_schema=Summary,
)

agent.print_response("Summarize CUDA programming benefits")

Run Examples

export NVIDIA_API_KEY=xxx

git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/92_models/nvidia

python basic.py
python tool_use.py