Skip to main content
Chunking strategies control how documents are split into smaller pieces for embedding. The right strategy improves retrieval accuracy.
from agno.knowledge.chunking.semantic import SemanticChunking
from agno.knowledge.reader.pdf_reader import PDFReader

reader = PDFReader(
    chunking_strategy=SemanticChunking(
        chunk_size=500,
        similarity_threshold=0.5,
    ),
)

Chunking Strategies

StrategyBest ForDescription
SemanticGeneral textGroups semantically similar sentences
Fixed SizeSimple docsSplits by character/token count
RecursiveStructured docsHierarchical splitting
DocumentNatural sectionsPreserves document structure
CodeSource codeRespects code syntax
MarkdownMD filesSplits by headers
CSV RowTabular dataOne chunk per row
AgenticComplex docsAI-driven chunking

Examples by Strategy

Semantic Chunking

Groups semantically related sentences together.
cookbook/07_knowledge/chunking/semantic_chunking.py
from agno.agent import Agent
from agno.knowledge.chunking.semantic import SemanticChunking
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.pdf_reader import PDFReader
from agno.vectordb.pgvector import PgVector

knowledge = Knowledge(
    vector_db=PgVector(table_name="semantic_docs", db_url="postgresql://..."),
)

knowledge.add_content(
    url="https://example.com/document.pdf",
    reader=PDFReader(
        chunking_strategy=SemanticChunking(
            embedder="text-embedding-3-small",
            chunk_size=500,
            similarity_threshold=0.5,
            similarity_window=3,
            min_sentences_per_chunk=1,
        ),
    ),
)

agent = Agent(knowledge=knowledge, search_knowledge=True)
agent.print_response("What is the main topic?", markdown=True)

Fixed Size Chunking

Simple chunking by character or token count.
cookbook/07_knowledge/chunking/fixed_size_chunking.py
from agno.knowledge.chunking.fixed import FixedSizeChunking
from agno.knowledge.reader.pdf_reader import PDFReader

reader = PDFReader(
    chunking_strategy=FixedSizeChunking(
        chunk_size=1000,
        overlap=200,
    ),
)

Recursive Chunking

Hierarchical splitting that preserves structure.
cookbook/07_knowledge/chunking/recursive_chunking.py
from agno.knowledge.chunking.recursive import RecursiveChunking
from agno.knowledge.reader.pdf_reader import PDFReader

reader = PDFReader(
    chunking_strategy=RecursiveChunking(
        chunk_size=500,
        chunk_overlap=50,
        separators=["\n\n", "\n", ". ", " "],
    ),
)

Document Chunking

Preserves natural document boundaries.
cookbook/07_knowledge/chunking/document_chunking.py
from agno.knowledge.chunking.document import DocumentChunking
from agno.knowledge.reader.pdf_reader import PDFReader

reader = PDFReader(
    chunking_strategy=DocumentChunking(
        max_chunk_size=2000,
        preserve_sections=True,
    ),
)

Code Chunking

Respects code structure and syntax.
cookbook/07_knowledge/chunking/code_chunking.py
from agno.knowledge.chunking.code import CodeChunking
from agno.knowledge.reader.text import TextReader

reader = TextReader(
    chunking_strategy=CodeChunking(
        language="python",
        chunk_by="function",
    ),
)

Markdown Chunking

Splits markdown by headers.
cookbook/07_knowledge/chunking/markdown_chunking.py
from agno.knowledge.chunking.markdown import MarkdownChunking
from agno.knowledge.reader.markdown import MarkdownReader

reader = MarkdownReader(
    chunking_strategy=MarkdownChunking(
        split_by_header=True,
        header_levels=[1, 2],
    ),
)

CSV Row Chunking

One chunk per row.
cookbook/07_knowledge/chunking/csv_row_chunking.py
from agno.knowledge.chunking.csv import CSVRowChunking
from agno.knowledge.reader.csv import CSVReader

reader = CSVReader(
    chunking_strategy=CSVRowChunking(
        include_headers=True,
    ),
)

Agentic Chunking

AI-driven chunking for complex documents.
cookbook/07_knowledge/chunking/agentic_chunking.py
from agno.knowledge.chunking.agentic import AgenticChunking
from agno.knowledge.reader.pdf_reader import PDFReader

reader = PDFReader(
    chunking_strategy=AgenticChunking(
        model="gpt-4o-mini",
        instructions="Group related concepts together",
    ),
)

Custom Chunking Strategy

Build your own chunking logic.
cookbook/07_knowledge/chunking/custom_strategy_example.py
from agno.knowledge.chunking.base import ChunkingStrategy
from agno.knowledge.document import Document

class MyChunking(ChunkingStrategy):
    def chunk(self, document: Document) -> list[Document]:
        # Custom logic
        chunks = document.content.split("---")
        return [
            Document(content=chunk.strip(), metadata=document.metadata)
            for chunk in chunks
            if chunk.strip()
        ]

reader = PDFReader(chunking_strategy=MyChunking())

Run Examples

git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/07_knowledge/chunking

# Semantic chunking
python semantic_chunking.py

# Fixed size
python fixed_size_chunking.py

# Custom strategy
python custom_strategy_example.py