Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crewai_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
BrightDataSearchTool,
BrightDataWebUnlockerTool,
BrowserbaseLoadTool,
ChromaSearchTool,
CodeDocsSearchTool,
CodeInterpreterTool,
ComposioTool,
Expand Down
1 change: 1 addition & 0 deletions crewai_tools/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
BrightDataWebUnlockerTool,
)
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
from .chroma_tool.chroma_search_tool import ChromaSearchTool
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
from .composio_tool.composio_tool import ComposioTool
Expand Down
61 changes: 61 additions & 0 deletions crewai_tools/tools/chroma_tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# ChromaSearchTool

## Description

Use this tool to do semantic search in a Chroma collection.

Chroma is the search and retrieval database built for AI. Chroma can search
documents using semantic search, full-text search, regex search, and metadata
tags. You can follow their docs here: <https://docs.trychroma.com/docs/overview/introduction>

## Installation

Install the crewai_tools package by executing the following command in your terminal:

```shell
uv pip install 'crewai[tools]' chromadb
```

## Basic Usage

The `ChromaSearchTool` takes a Chroma collection directly. See the Chroma
docs on how you can [configure your collection](https://docs.trychroma.com/docs/collections/configure)
or [choose which embedding model is used](https://docs.trychroma.com/docs/embeddings/embedding-functions)

```python
import chromadb
from crewai_tools import ChromaSearchTool

# 1. Create your Chroma client
client = chromadb.PersistentClient(path="./chroma_db")
# or client = chromadb.HttpClient(...)
# or client = chromadb.CloudClient(...)

# 2. Get or create your collection
collection = client.get_or_create_collection(name="my_documents")

# 3. Create the tool using the collections
tool = ChromaSearchTool(
collection=collection,
limit=5 # optional, default is 3
)

# 4. Add the tool to an agent
rag_agent = Agent(
name="rag_agent",
role="You are a helpful assistant that can answer questions with the help of the Chroma tool.",
llm="gpt-4o-mini",
tools=[tool],
)
```

## Arguments

- `collection` : A Chroma collection object to search in. (Required)
- `limit` : The number of results to return. (Optional, default: 3)

## Query Parameters

- `query` : The search query text. (Required)
- `where` : Optional metadata filter to apply to the search. (Optional)
- `where_document` : Optional document content filter to apply to the search. (Optional)
3 changes: 3 additions & 0 deletions crewai_tools/tools/chroma_tool/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .chroma_search_tool import ChromaSearchTool

__all__ = ["ChromaSearchTool"]
107 changes: 107 additions & 0 deletions crewai_tools/tools/chroma_tool/chroma_search_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import json
from typing import Any, Optional, Type, List, Dict

try:
import chromadb

CHROMA_AVAILABLE = True
except ImportError:
CHROMA_AVAILABLE = False
chromadb = Any # type placeholder

from crewai.tools import BaseTool
from pydantic import BaseModel, Field


class ChromaToolSchema(BaseModel):
"""Input for ChromaTool."""

query: str = Field(
...,
description="The query to search retrieve relevant information from the Chroma database. Pass only the query, not the question.",
)
where: Optional[Dict[str, Any]] = Field(
default=None,
description="Optional metadata filter to apply to the search. Pass as a dictionary.",
)
where_document: Optional[Dict[str, Any]] = Field(
default=None,
description="Optional document content filter to apply to the search. Pass as a dictionary with one key $contains, $regex, $not_contains, $not_regex, $and, $or.",
)


class ChromaSearchTool(BaseTool):
"""Tool to search a Chroma collection"""

package_dependencies: List[str] = ["chromadb"]
name: str = "ChromaSearchTool"
description: str = (
"A tool to search a Chroma collection for relevant information on internal documents."
)
args_schema: Type[BaseModel] = ChromaToolSchema
collection: Any = Field(
...,
description="Chroma collection to search in.",
)
limit: Optional[int] = Field(default=3)

def __init__(self, **kwargs):
super().__init__(**kwargs)
if not CHROMA_AVAILABLE:
import click

if click.confirm(
"You are missing the 'chromadb' package. Would you like to install it?"
):
import subprocess

subprocess.run(["uv", "add", "chromadb"], check=True)
else:
raise ImportError(
"You are missing the 'chromadb' package. Please install it with: uv add chromadb"
)

def _run(self, query: str, where: Optional[Dict[str, Any]] = None, where_document: Optional[Dict[str, Any]] = None) -> str:
if not CHROMA_AVAILABLE:
raise ImportError(
"You are missing the 'chromadb' package. Please install it with: uv add chromadb"
)

if not self.collection:
raise ValueError("Collection is required.")

search_kwargs = {
"query_texts": [query],
"n_results": self.limit,
}

if where:
search_kwargs["where"] = where

if where_document:
search_kwargs["where_document"] = where_document

results = self.collection.query(**search_kwargs)

# Format results for output
formatted_results = []
if results["documents"] and results["documents"][0]:
for i, doc in enumerate(results["documents"][0]):
result = {
"document": doc,
"metadata": (
results["metadatas"][0][i]
if results["metadatas"]
and results["metadatas"][0]
and results["metadatas"][0][i] is not None
else {}
),
"id": (
results["ids"][0][i]
if results["ids"] and results["ids"][0]
else None
),
}
formatted_results.append(result)

return json.dumps(formatted_results, indent=2)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies = [
"pydantic>=2.6.1",
"lancedb>=0.5.4",
"openai>=1.12.0",
"chromadb==0.5.23",
"chromadb>=0.5.23",
"pyright>=1.1.350",
"pytube>=15.0.0",
"requests>=2.31.0",
Expand Down
119 changes: 119 additions & 0 deletions tests/tools/chroma_search_tool_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import json
from unittest.mock import Mock, patch

import pytest

from crewai_tools.tools.chroma_tool.chroma_search_tool import ChromaSearchTool


@pytest.fixture
def mock_collection():
collection = Mock()
collection.query.return_value = {
"documents": [["Test document 1", "Test document 2"]],
"metadatas": [[{"source": "test1.txt"}, {"source": "test2.txt"}]],
"ids": [["doc1", "doc2"]]
}
return collection


@pytest.fixture
def chroma_tool(mock_collection):
return ChromaSearchTool(collection=mock_collection)


def test_tool_initialization(mock_collection):
tool = ChromaSearchTool(collection=mock_collection, limit=5)

assert tool.collection == mock_collection
assert tool.limit == 5
assert tool.name == "ChromaSearchTool"


def test_missing_collection():
"""Test initialization fails without collection"""
with pytest.raises(Exception):
ChromaSearchTool()


def test_successful_search(chroma_tool):
"""Test successful search execution"""
result = chroma_tool._run(query="test query")

parsed_result = json.loads(result)
assert len(parsed_result) == 2
assert parsed_result[0]["document"] == "Test document 1"
assert parsed_result[0]["metadata"]["source"] == "test1.txt"
assert parsed_result[0]["id"] == "doc1"


def test_search_with_filter(chroma_tool):
"""Test search with metadata filtering"""
chroma_tool.collection.query.return_value = {
"documents": [["Filtered document"]],
"metadatas": [[{"source": "filtered.txt"}]],
"ids": [["filtered_doc"]]
}

result = chroma_tool._run(query="test query", where={"source": "filtered.txt"})

parsed_result = json.loads(result)
assert len(parsed_result) == 1
assert parsed_result[0]["metadata"]["source"] == "filtered.txt"

chroma_tool.collection.query.assert_called_with(
query_texts=["test query"],
n_results=3,
where={"source": "filtered.txt"}
)


def test_search_with_document_filter(chroma_tool):
"""Test search with document content filtering"""
chroma_tool.collection.query.return_value = {
"documents": [["Document containing specific text"]],
"metadatas": [[{"source": "doc.txt"}]],
"ids": [["doc_with_text"]]
}

result = chroma_tool._run(
query="test query",
where_document={"$contains": "specific text"}
)

parsed_result = json.loads(result)
assert len(parsed_result) == 1
assert "specific text" in parsed_result[0]["document"]

chroma_tool.collection.query.assert_called_with(
query_texts=["test query"],
n_results=3,
where_document={"$contains": "specific text"}
)


def test_search_with_both_filters(chroma_tool):
"""Test search with both metadata and document filtering"""
chroma_tool.collection.query.return_value = {
"documents": [["Filtered document with specific content"]],
"metadatas": [[{"source": "special.txt", "category": "important"}]],
"ids": [["special_doc"]]
}

result = chroma_tool._run(
query="test query",
where={"category": "important"},
where_document={"$contains": "specific content"}
)

parsed_result = json.loads(result)
assert len(parsed_result) == 1
assert parsed_result[0]["metadata"]["category"] == "important"
assert "specific content" in parsed_result[0]["document"]

chroma_tool.collection.query.assert_called_with(
query_texts=["test query"],
n_results=3,
where={"category": "important"},
where_document={"$contains": "specific content"}
)
Loading
Loading