Async Filtering

import asyncio
import os

from agno.agent import Agent
from agno.db.postgres import AsyncPostgresDb
from agno.db.sqlite import AsyncSqliteDb
from agno.filters import IN
from agno.knowledge.knowledge import Knowledge
from agno.utils.media import (
    SampleDataFileExtension,
    download_knowledge_filters_sample_data,
)
from agno.vectordb.pgvector import PgVector

# Download all sample CVs and get their paths
downloaded_cv_paths = download_knowledge_filters_sample_data(
    num_files=5, file_extension=SampleDataFileExtension.DOCX
)

# Clean up old databases
if os.path.exists("tmp/knowledge_contents.db"):
    os.remove("tmp/knowledge_contents.db")
db = AsyncSqliteDb(
    db_file="tmp/knowledge_contents.db",
)

db = AsyncPostgresDb(
    db_url="postgresql+psycopg_async://ai:ai@localhost:5532/ai",
    knowledge_table="knowledge_contents",
)

# Initialize Vector Database
vector_db = PgVector(
    table_name="CVs",
    db_url="postgresql+psycopg://ai:ai@localhost:5532/ai",
)

# Step 1: Initialize knowledge base with documents and metadata
# ------------------------------------------------------------------------------
# When initializing the knowledge base, we can attach metadata that will be used for filtering
# This metadata can include user IDs, document types, dates, or any other attributes

knowledge = Knowledge(
    name="Async Filtering",
    vector_db=vector_db,
    contents_db=db,
)

asyncio.run(
    knowledge.ainsert_many(
        [
            {
                "path": downloaded_cv_paths[0],
                "metadata": {
                    "user_id": "jordan_mitchell",
                    "document_type": "cv",
                    "year": 2025,
                },
            },
            {
                "path": downloaded_cv_paths[1],
                "metadata": {
                    "user_id": "taylor_brooks",
                    "document_type": "cv",
                    "year": 2025,
                },
            },
            {
                "path": downloaded_cv_paths[2],
                "metadata": {
                    "user_id": "morgan_lee",
                    "document_type": "cv",
                    "year": 2025,
                },
            },
            {
                "path": downloaded_cv_paths[3],
                "metadata": {
                    "user_id": "casey_jordan",
                    "document_type": "cv",
                    "year": 2025,
                },
            },
            {
                "path": downloaded_cv_paths[4],
                "metadata": {
                    "user_id": "alex_rivera",
                    "document_type": "cv",
                    "year": 2025,
                },
            },
        ],
    )
)


# Step 2: Query the knowledge base with different filter combinations
# ------------------------------------------------------------------------------

# Option 1: Filters on the Agent
# Initialize the Agent with the knowledge base and filters
agent = Agent(
    db=db,
    knowledge=knowledge,
    search_knowledge=True,
)

if __name__ == "__main__":
    # Query for Jordan Mitchell's experience and skills
    asyncio.run(
        agent.aprint_response(
            "Search the knowledge base for the candidate's experience and skills",
            knowledge_filters={"user_id": "jordan_mitchell"},
            markdown=True,
        )
    )

    asyncio.run(
        agent.aprint_response(
            "Tell me about the candidate's experience and skills",
            knowledge_filters=[(IN("user_id", ["jordan_mitchell"]))],
            markdown=True,
        )
    )

Run the Example

# Clone and setup repo
git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/07_knowledge/filters

# Create and activate virtual environment
./scripts/demo_setup.sh
source .venvs/demo/bin/activate

# Optiona: Run PgVector (needs docker)
./cookbook/scripts/run_pgvector.sh

python async_filtering.py

​Run the Example

Run the Example