Custom chunking allows you to implement your own chunking strategy by creating a class that inherits from ChunkingStrategy. This is useful when you need to split documents based on specific separators, apply custom logic, or handle domain-specific content formats.
from typing import List
from agno.knowledge.chunking.base import ChunkingStrategy
from agno.knowledge.content import Document

class CustomChunking(ChunkingStrategy):
    def __init__(self, separator: str = "---", **kwargs):
        self.separator = separator

    def chunk(self, document: Document) -> List[Document]:
        # Split by custom separator
        chunks = document.content.split(self.separator)

        result = []
        for i, chunk_content in enumerate(chunks):
            chunk_content = self.clean_text(chunk_content)  # Use inherited method
            if chunk_content:
                meta_data = document.meta_data.copy()
                meta_data["chunk"] = i + 1
                result.append(Document(
                    id=f"{document.id}_{i+1}" if document.id else None,
                    name=document.name,
                    meta_data=meta_data,
                    content=chunk_content
                ))
        return result

Usage

import asyncio
from agno.agent import Agent
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.pdf_reader import PDFReader
from agno.vectordb.pgvector import PgVector

db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"

knowledge = Knowledge(
    vector_db=PgVector(table_name="recipes_custom_chunking", db_url=db_url),
)

asyncio.run(knowledge.add_content_async(
    url="https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf",
    reader=PDFReader(
        name="Custom Chunking Reader",
        chunking_strategy=CustomChunking(separator="---"),
    ),
))

agent = Agent(
    knowledge=knowledge,
    search_knowledge=True,
)

agent.print_response("How to make Thai curry?", markdown=True)

Custom Chunking Params

ParameterTypeDefaultDescription
separatorstr"---"The string used to split the document content into chunks.