Skip to main content
Document readers extract text from various file formats for ingestion into your knowledge base. Agno auto-detects file types or lets you specify readers explicitly.
from agno.knowledge.knowledge import Knowledge
from agno.vectordb.pgvector import PgVector

knowledge = Knowledge(
    vector_db=PgVector(table_name="docs", db_url="postgresql://..."),
)

# Auto-detect file type
knowledge.add_content(path="./documents/report.pdf")
knowledge.add_content(path="./data/records.csv")
knowledge.add_content(url="https://example.com/page.html")

Supported Readers

FormatReaderUse Case
PDFPDFReaderDocuments, reports, papers
CSVCSVReaderTabular data, spreadsheets
JSONJSONReaderAPI responses, configs
MarkdownMarkdownReaderDocumentation, notes
PowerPointPPTXReaderPresentations
WordDocxReaderWord documents
HTMLHTMLReaderWeb pages
ArxivArxivReaderAcademic papers
YouTubeYouTubeReaderVideo transcripts
FirecrawlFirecrawlReaderWeb scraping
TavilyTavilyReaderWeb research

Examples by Format

PDF Files

cookbook/07_knowledge/readers/pdf_reader_async.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.pdf import PDFReader
from agno.vectordb.pgvector import PgVector

knowledge = Knowledge(
    vector_db=PgVector(table_name="pdfs", db_url="postgresql://..."),
)

# Add PDF from path
knowledge.add_content(
    path="./documents/report.pdf",
    reader=PDFReader(),
)

# Add PDF from URL
knowledge.add_content(
    url="https://example.com/whitepaper.pdf",
)

Password-Protected PDFs

cookbook/07_knowledge/readers/pdf_reader_password.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.pdf import PDFReader

knowledge = Knowledge(vector_db=...)

knowledge.add_content(
    path="./secure.pdf",
    reader=PDFReader(password="secret123"),
)

CSV Files

cookbook/07_knowledge/readers/csv_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.csv import CSVReader

knowledge = Knowledge(vector_db=...)

# Basic CSV
knowledge.add_content(
    path="./data/customers.csv",
    reader=CSVReader(),
)

CSV with Field Labels

Add column names as context.
cookbook/07_knowledge/readers/csv_field_labeled_reader.py
from agno.knowledge.reader.csv import CSVReader

reader = CSVReader(
    include_field_labels=True,
    row_template="Customer {name} from {city} purchased {product}",
)

JSON Files

cookbook/07_knowledge/readers/json_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.json import JSONReader

knowledge = Knowledge(vector_db=...)

knowledge.add_content(
    path="./data/config.json",
    reader=JSONReader(),
)

Markdown Files

cookbook/07_knowledge/readers/markdown_reader_async.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.markdown import MarkdownReader

knowledge = Knowledge(vector_db=...)

# Add all markdown files from directory
knowledge.add_content(
    path="./docs/",
    reader=MarkdownReader(),
)

PowerPoint Presentations

cookbook/07_knowledge/readers/pptx_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.pptx import PPTXReader

knowledge = Knowledge(vector_db=...)

knowledge.add_content(
    path="./presentations/quarterly_review.pptx",
    reader=PPTXReader(),
)

Arxiv Papers

Automatically fetch and parse academic papers.
cookbook/07_knowledge/readers/arxiv_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.arxiv import ArxivReader

knowledge = Knowledge(vector_db=...)

# Add paper by Arxiv ID
knowledge.add_content(
    topic="2301.00234",  # Arxiv ID
    reader=ArxivReader(),
)

# Search and add papers
knowledge.add_content(
    topic="transformer architecture attention",
    reader=ArxivReader(max_results=5),
)

Web Content with Firecrawl

cookbook/07_knowledge/readers/firecrawl_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.firecrawl import FirecrawlReader

knowledge = Knowledge(vector_db=...)

knowledge.add_content(
    url="https://docs.example.com/",
    reader=FirecrawlReader(
        api_key="your-key",
        crawl_subpages=True,
    ),
)

Web Research with Tavily

cookbook/07_knowledge/readers/tavily_reader.py
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.tavily import TavilyReader

knowledge = Knowledge(vector_db=...)

knowledge.add_content(
    topic="Latest developments in quantum computing",
    reader=TavilyReader(api_key="your-key"),
)

Run Examples

git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/07_knowledge/readers

# PDF
python pdf_reader_async.py

# CSV
python csv_reader.py

# Arxiv
python arxiv_reader.py