Google File Search Advanced

"""
Google File Search Advanced
===========================

Cookbook example for `google/gemini/file_search_advanced.py`.
"""

from pathlib import Path

from agno.agent import Agent
from agno.models.google import Gemini

# ---------------------------------------------------------------------------
# Create Agent
# ---------------------------------------------------------------------------

# Create Gemini model
model = Gemini(id="gemini-2.5-flash")

# Create agent
agent = Agent(model=model, markdown=True)

print("=" * 60)
print("Setting up multiple File Search stores...")
print("=" * 60)

# Create two different stores for different types of content
technical_store = model.create_file_search_store(display_name="Technical Documentation")
marketing_store = model.create_file_search_store(display_name="Marketing Content")

print(f"[OK] Created technical store: {technical_store.name}")
print(f"[OK] Created marketing store: {marketing_store.name}")

# Upload files with custom chunking and metadata
print("\n" + "=" * 60)
print("Uploading files with custom configuration...")
print("=" * 60)

# Upload technical document with custom chunking
print("\n1. Uploading technical document...")
tech_operation = model.upload_to_file_search_store(
    file_path=Path(__file__).parent / "documents" / "technical_manual.txt",
    store_name=technical_store.name,
    display_name="Technical Manual v2.0",
    chunking_config={
        "white_space_config": {
            "max_tokens_per_chunk": 300,
            "max_overlap_tokens": 50,
        }
    },
    custom_metadata=[
        {"key": "type", "string_value": "technical"},
        {"key": "version", "numeric_value": 2},
        {"key": "department", "string_value": "engineering"},
    ],
)

# Upload marketing document
print("2. Uploading marketing document...")
marketing_operation = model.upload_to_file_search_store(
    file_path=Path(__file__).parent / "documents" / "product_brochure.txt",
    store_name=marketing_store.name,
    display_name="Product Brochure Q1 2024",
    chunking_config={
        "white_space_config": {
            "max_tokens_per_chunk": 200,
            "max_overlap_tokens": 20,
        }
    },
    custom_metadata=[
        {"key": "type", "string_value": "marketing"},
        {"key": "quarter", "string_value": "Q1"},
        {"key": "year", "numeric_value": 2024},
    ],
)

# Wait for both uploads
print("\nWaiting for uploads to complete...")
model.wait_for_operation(tech_operation)
print("[OK] Technical document uploaded")
model.wait_for_operation(marketing_operation)
print("[OK] Marketing document uploaded")

# List documents in each store
print("\n" + "=" * 60)
print("Document Management")
print("=" * 60)

print("\nTechnical Store Documents:")
tech_docs = model.list_documents(technical_store.name)
for doc in tech_docs:
    print(f"  - {doc.display_name} ({doc.name})")

print("\nMarketing Store Documents:")
marketing_docs = model.list_documents(marketing_store.name)
for doc in marketing_docs:
    print(f"  - {doc.display_name} ({doc.name})")

# Query with metadata filtering - Technical docs only
print("\n" + "=" * 60)
print("Query 1: Technical documentation with metadata filter")
print("=" * 60)

model.file_search_store_names = [technical_store.name]
model.file_search_metadata_filter = 'type="technical" AND version=2'

run1 = agent.run(
    "What are the technical specifications mentioned in the documentation?"
)
print(f"\nResponse:\n{run1.content}")

if run1.citations and run1.citations.raw:
    print("\nCitations:")
    print("=" * 50)
    grounding_metadata = run1.citations.raw.get("grounding_metadata", {})
    sources = set()
    for chunk in grounding_metadata.get("grounding_chunks", []) or []:
        if isinstance(chunk, dict) and chunk.get("retrieved_context"):
            rc = chunk["retrieved_context"]
            sources.add(rc.get("title", "Unknown"))
    if sources:
        print(f"\nSources ({len(sources)}):")
        for i, source in enumerate(sorted(sources), 1):
            print(f"  [{i}] {source}")

# Query across multiple stores
print("\n" + "=" * 60)
print("Query 2: Search across both stores")
print("=" * 60)

model.file_search_store_names = [technical_store.name, marketing_store.name]
model.file_search_metadata_filter = None  # Remove filter

run2 = agent.run("What are the key product features and how do they work?")
print(f"\nResponse:\n{run2.content}")

if run2.citations and run2.citations.raw:
    print("\nCitations:")
    print("=" * 50)
    grounding_metadata = run2.citations.raw.get("grounding_metadata", {})
    chunks = grounding_metadata.get("grounding_chunks", []) or []

    sources = set()
    for chunk in chunks:
        if isinstance(chunk, dict) and chunk.get("retrieved_context"):
            rc = chunk["retrieved_context"]
            sources.add(rc.get("title", "Unknown"))

    if sources:
        print(f"\nSources ({len(sources)}):")
        for i, source in enumerate(sorted(sources), 1):
            print(f"  [{i}] {source}")

        print(f"\nDetailed Citations ({len(chunks)}):")
        for i, chunk in enumerate(chunks, 1):
            if isinstance(chunk, dict) and chunk.get("retrieved_context"):
                rc = chunk["retrieved_context"]
                print(f"\n  [{i}] {rc.get('title', 'Unknown')}")
                if rc.get("uri"):
                    print(f"      URI: {rc['uri']}")
                print("      Type: file_search")
                if rc.get("text"):
                    text = rc["text"]
                    if len(text) > 200:
                        text = text[:200] + "..."
                    print(f"      Text: {text}")

# Update document metadata (API not yet available)
print("\n" + "=" * 60)
print("Document metadata management...")
print("=" * 60)

if tech_docs:
    print(f"[OK] Document retrieved: {tech_docs[0].display_name}")
    print(f"  Document ID: {tech_docs[0].name}")
    # Note: Document update API is not yet available in the current SDK version
    print("  (Metadata update API coming soon)")

# Cleanup
print("\n" + "=" * 60)
print("Cleaning up...")
print("=" * 60)

model.delete_file_search_store(technical_store.name)
print(f"[OK] Deleted {technical_store.name}")

model.delete_file_search_store(marketing_store.name)
print(f"[OK] Deleted {marketing_store.name}")

print("\n[OK] Example completed successfully!")

# ---------------------------------------------------------------------------
# Run Agent
# ---------------------------------------------------------------------------

if __name__ == "__main__":
    pass

Run the Example

# Clone and setup repo
git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/90_models/google/gemini

# Create and activate virtual environment
./scripts/demo_setup.sh
source .venvs/demo/bin/activate

python file_search_advanced.py

Examples

Primitives

Context

Models

Tools

More

Google File Search Advanced

Run the Example

Examples

Primitives

Context

Models

Tools

More

​Run the Example

Run the Example