Documentation Index
Fetch the complete documentation index at: https://docs.agno.com/llms.txt
Use this file to discover all available pages before exploring further.
Document readers extract text from various file formats for ingestion into your knowledge base. Agno auto-detects file types or lets you specify readers explicitly.
from agno.knowledge.knowledge import Knowledge
from agno.vectordb.pgvector import PgVector
knowledge = Knowledge(
vector_db=PgVector(table_name="docs", db_url="postgresql://..."),
)
# Auto-detect file type
knowledge.insert(path="./documents/report.pdf")
knowledge.insert(path="./data/records.csv")
knowledge.insert(url="https://example.com/page.html")
Supported Readers
| Format | Reader | Use Case |
|---|
| PDF | PDFReader | Documents, reports, papers |
| CSV | CSVReader | Tabular data, spreadsheets |
| JSON | JSONReader | API responses, configs |
| Markdown | MarkdownReader | Documentation, notes |
| PowerPoint | PPTXReader | Presentations |
| Word | DocxReader | Word documents |
| HTML | HTMLReader | Web pages |
| Arxiv | ArxivReader | Academic papers |
| YouTube | YouTubeReader | Video transcripts |
| Firecrawl | FirecrawlReader | Web scraping |
| Tavily | TavilyReader | Web research |
PDF Files
cookbook/07_knowledge/readers/pdf_reader_async.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.pdf import PDFReader
from agno.vectordb.pgvector import PgVector
knowledge = Knowledge(
vector_db=PgVector(table_name="pdfs", db_url="postgresql://..."),
)
# Add PDF from path
knowledge.insert(
path="./documents/report.pdf",
reader=PDFReader(),
)
# Add PDF from URL
knowledge.insert(
url="https://example.com/whitepaper.pdf",
)
Password-Protected PDFs
cookbook/07_knowledge/readers/pdf_reader_password.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.pdf import PDFReader
knowledge = Knowledge(vector_db=...)
knowledge.insert(
path="./secure.pdf",
reader=PDFReader(password="secret123"),
)
CSV Files
cookbook/07_knowledge/readers/csv_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.csv import CSVReader
knowledge = Knowledge(vector_db=...)
# Basic CSV
knowledge.insert(
path="./data/customers.csv",
reader=CSVReader(),
)
CSV with Field Labels
Add column names as context.
cookbook/07_knowledge/readers/csv_field_labeled_reader.py
from agno.knowledge.reader.csv import CSVReader
reader = CSVReader(
include_field_labels=True,
row_template="Customer {name} from {city} purchased {product}",
)
JSON Files
cookbook/07_knowledge/readers/json_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.json import JSONReader
knowledge = Knowledge(vector_db=...)
knowledge.insert(
path="./data/config.json",
reader=JSONReader(),
)
Markdown Files
cookbook/07_knowledge/readers/markdown_reader_async.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.markdown import MarkdownReader
knowledge = Knowledge(vector_db=...)
# Add all markdown files from directory
knowledge.insert(
path="./docs/",
reader=MarkdownReader(),
)
PowerPoint Presentations
cookbook/07_knowledge/readers/pptx_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.pptx import PPTXReader
knowledge = Knowledge(vector_db=...)
knowledge.insert(
path="./presentations/quarterly_review.pptx",
reader=PPTXReader(),
)
Arxiv Papers
Automatically fetch and parse academic papers.
cookbook/07_knowledge/readers/arxiv_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.arxiv import ArxivReader
knowledge = Knowledge(vector_db=...)
# Add paper by Arxiv ID
knowledge.insert(
topic="2301.00234", # Arxiv ID
reader=ArxivReader(),
)
# Search and add papers
knowledge.insert(
topic="transformer architecture attention",
reader=ArxivReader(max_results=5),
)
Web Content with Firecrawl
cookbook/07_knowledge/readers/firecrawl_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.firecrawl import FirecrawlReader
knowledge = Knowledge(vector_db=...)
knowledge.insert(
url="https://docs.example.com/",
reader=FirecrawlReader(
api_key="your-key",
crawl_subpages=True,
),
)
Web Research with Tavily
cookbook/07_knowledge/readers/tavily_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.tavily import TavilyReader
knowledge = Knowledge(vector_db=...)
knowledge.insert(
topic="Latest developments in quantum computing",
reader=TavilyReader(api_key="your-key"),
)
Run Examples
git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/07_knowledge/readers
# PDF
python pdf_reader_async.py
# CSV
python csv_reader.py
# Arxiv
python arxiv_reader.py