crewAIInc · kylediaz · Aug 19, 2025 · Aug 19, 2025 · Aug 19, 2025 · Aug 26, 2025
diff --git a/crewai_tools/__init__.py b/crewai_tools/__init__.py
@@ -16,6 +16,7 @@
     BrightDataSearchTool,
     BrightDataWebUnlockerTool,
     BrowserbaseLoadTool,
+    ChromaSearchTool,
     CodeDocsSearchTool,
     CodeInterpreterTool,
     ComposioTool,

diff --git a/crewai_tools/tools/__init__.py b/crewai_tools/tools/__init__.py
@@ -8,6 +8,7 @@
     BrightDataWebUnlockerTool,
 )
 from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
+from .chroma_tool.chroma_search_tool import ChromaSearchTool
 from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
 from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
 from .composio_tool.composio_tool import ComposioTool

diff --git a/crewai_tools/tools/chroma_tool/README.md b/crewai_tools/tools/chroma_tool/README.md
@@ -0,0 +1,61 @@
+# ChromaSearchTool
+
+## Description
+
+Use this tool to do semantic search in a Chroma collection.
+
+Chroma is the search and retrieval database built for AI. Chroma can search
+documents using semantic search, full-text search, regex search, and metadata
+tags. You can follow their docs here: <https://docs.trychroma.com/docs/overview/introduction>
+
+## Installation
+
+Install the crewai_tools package by executing the following command in your terminal:
+
+```shell
+uv pip install 'crewai[tools]' chromadb
+```
+
+## Basic Usage
+
+The `ChromaSearchTool` takes a Chroma collection directly. See the Chroma
+docs on how you can [configure your collection](https://docs.trychroma.com/docs/collections/configure)
+or [choose which embedding model is used](https://docs.trychroma.com/docs/embeddings/embedding-functions)
+
+```python
+import chromadb
+from crewai_tools import ChromaSearchTool
+
+# 1. Create your Chroma client
+client = chromadb.PersistentClient(path="./chroma_db")
+# or client = chromadb.HttpClient(...)
+# or client = chromadb.CloudClient(...)
+
+# 2. Get or create your collection
+collection = client.get_or_create_collection(name="my_documents")
+
+# 3. Create the tool using the collections
+tool = ChromaSearchTool(
+    collection=collection,
+    limit=5  # optional, default is 3
+)
+
+# 4. Add the tool to an agent
+rag_agent = Agent(
+    name="rag_agent",
+    role="You are a helpful assistant that can answer questions with the help of the Chroma tool.",
+    llm="gpt-4o-mini",
+    tools=[tool],
+)
+```
+
+## Arguments
+
+- `collection` : A Chroma collection object to search in. (Required)
+- `limit` : The number of results to return. (Optional, default: 3)
+
+## Query Parameters
+
+- `query` : The search query text. (Required)
+- `where` : Optional metadata filter to apply to the search. (Optional)
+- `where_document` : Optional document content filter to apply to the search. (Optional)
diff --git a/crewai_tools/tools/chroma_tool/__init__.py b/crewai_tools/tools/chroma_tool/__init__.py
@@ -0,0 +1,3 @@
+from .chroma_search_tool import ChromaSearchTool
+
+__all__ = ["ChromaSearchTool"]
diff --git a/crewai_tools/tools/chroma_tool/chroma_search_tool.py b/crewai_tools/tools/chroma_tool/chroma_search_tool.py
@@ -0,0 +1,107 @@
+import json
+from typing import Any, Optional, Type, List, Dict
+
+try:
+    import chromadb
+
+    CHROMA_AVAILABLE = True
+except ImportError:
+    CHROMA_AVAILABLE = False
+    chromadb = Any  # type placeholder
+
+from crewai.tools import BaseTool
+from pydantic import BaseModel, Field
+
+
+class ChromaToolSchema(BaseModel):
+    """Input for ChromaTool."""
+
+    query: str = Field(
+        ...,
+        description="The query to search retrieve relevant information from the Chroma database. Pass only the query, not the question.",
+    )
+    where: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Optional metadata filter to apply to the search. Pass as a dictionary.",
+    )
+    where_document: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Optional document content filter to apply to the search. Pass as a dictionary with one key $contains, $regex, $not_contains, $not_regex, $and, $or.",
+    )
+
+
+class ChromaSearchTool(BaseTool):
+    """Tool to search a Chroma collection"""
+
+    package_dependencies: List[str] = ["chromadb"]
+    name: str = "ChromaSearchTool"
+    description: str = (
+        "A tool to search a Chroma collection for relevant information on internal documents."
+    )
+    args_schema: Type[BaseModel] = ChromaToolSchema
+    collection: Any = Field(
+        ...,
+        description="Chroma collection to search in.",
+    )
+    limit: Optional[int] = Field(default=3)
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        if not CHROMA_AVAILABLE:
+            import click
+
+            if click.confirm(
+                "You are missing the 'chromadb' package. Would you like to install it?"
+            ):
+                import subprocess
+
+                subprocess.run(["uv", "add", "chromadb"], check=True)
+            else:
+                raise ImportError(
+                    "You are missing the 'chromadb' package. Please install it with: uv add chromadb"
+                )
+
+    def _run(self, query: str, where: Optional[Dict[str, Any]] = None, where_document: Optional[Dict[str, Any]] = None) -> str:
+        if not CHROMA_AVAILABLE:
+            raise ImportError(
+                "You are missing the 'chromadb' package. Please install it with: uv add chromadb"
+            )
+
+        if not self.collection:
+            raise ValueError("Collection is required.")
+
+        search_kwargs = {
+            "query_texts": [query],
+            "n_results": self.limit,
+        }
+
+        if where:
+            search_kwargs["where"] = where
+
+        if where_document:
+            search_kwargs["where_document"] = where_document
+
+        results = self.collection.query(**search_kwargs)
+
+        # Format results for output
+        formatted_results = []
+        if results["documents"] and results["documents"][0]:
+            for i, doc in enumerate(results["documents"][0]):
+                result = {
+                    "document": doc,
+                    "metadata": (
+                        results["metadatas"][0][i]
+                        if results["metadatas"]
+                        and results["metadatas"][0]
+                        and results["metadatas"][0][i] is not None
+                        else {}
+                    ),
+                    "id": (
+                        results["ids"][0][i]
+                        if results["ids"] and results["ids"][0]
+                        else None
+                    ),
+                }
+                formatted_results.append(result)
+
+        return json.dumps(formatted_results, indent=2)
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,7 +11,7 @@ dependencies = [
     "pydantic>=2.6.1",
     "lancedb>=0.5.4",
     "openai>=1.12.0",
-    "chromadb==0.5.23",
+    "chromadb>=0.5.23",
     "pyright>=1.1.350",
     "pytube>=15.0.0",
     "requests>=2.31.0",

diff --git a/tests/tools/chroma_search_tool_test.py b/tests/tools/chroma_search_tool_test.py
@@ -0,0 +1,119 @@
+import json
+from unittest.mock import Mock, patch
+
+import pytest
+
+from crewai_tools.tools.chroma_tool.chroma_search_tool import ChromaSearchTool
+
+
+@pytest.fixture
+def mock_collection():
+    collection = Mock()
+    collection.query.return_value = {
+        "documents": [["Test document 1", "Test document 2"]],
+        "metadatas": [[{"source": "test1.txt"}, {"source": "test2.txt"}]],
+        "ids": [["doc1", "doc2"]]
+    }
+    return collection
+
+
+@pytest.fixture
+def chroma_tool(mock_collection):
+    return ChromaSearchTool(collection=mock_collection)
+
+
+def test_tool_initialization(mock_collection):
+    tool = ChromaSearchTool(collection=mock_collection, limit=5)
+
+    assert tool.collection == mock_collection
+    assert tool.limit == 5
+    assert tool.name == "ChromaSearchTool"
+
+
+def test_missing_collection():
+    """Test initialization fails without collection"""
+    with pytest.raises(Exception):
+        ChromaSearchTool()
+
+
+def test_successful_search(chroma_tool):
+    """Test successful search execution"""
+    result = chroma_tool._run(query="test query")
+
+    parsed_result = json.loads(result)
+    assert len(parsed_result) == 2
+    assert parsed_result[0]["document"] == "Test document 1"
+    assert parsed_result[0]["metadata"]["source"] == "test1.txt"
+    assert parsed_result[0]["id"] == "doc1"
+
+
+def test_search_with_filter(chroma_tool):
+    """Test search with metadata filtering"""
+    chroma_tool.collection.query.return_value = {
+        "documents": [["Filtered document"]],
+        "metadatas": [[{"source": "filtered.txt"}]],
+        "ids": [["filtered_doc"]]
+    }
+
+    result = chroma_tool._run(query="test query", where={"source": "filtered.txt"})
+
+    parsed_result = json.loads(result)
+    assert len(parsed_result) == 1
+    assert parsed_result[0]["metadata"]["source"] == "filtered.txt"
+
+    chroma_tool.collection.query.assert_called_with(
+        query_texts=["test query"],
+        n_results=3,
+        where={"source": "filtered.txt"}
+    )
+
+
+def test_search_with_document_filter(chroma_tool):
+    """Test search with document content filtering"""
+    chroma_tool.collection.query.return_value = {
+        "documents": [["Document containing specific text"]],
+        "metadatas": [[{"source": "doc.txt"}]],
+        "ids": [["doc_with_text"]]
+    }
+
+    result = chroma_tool._run(
+        query="test query", 
+        where_document={"$contains": "specific text"}
+    )
+
+    parsed_result = json.loads(result)
+    assert len(parsed_result) == 1
+    assert "specific text" in parsed_result[0]["document"]
+
+    chroma_tool.collection.query.assert_called_with(
+        query_texts=["test query"],
+        n_results=3,
+        where_document={"$contains": "specific text"}
+    )
+
+
+def test_search_with_both_filters(chroma_tool):
+    """Test search with both metadata and document filtering"""
+    chroma_tool.collection.query.return_value = {
+        "documents": [["Filtered document with specific content"]],
+        "metadatas": [[{"source": "special.txt", "category": "important"}]],
+        "ids": [["special_doc"]]
+    }
+
+    result = chroma_tool._run(
+        query="test query",
+        where={"category": "important"},
+        where_document={"$contains": "specific content"}
+    )
+
+    parsed_result = json.loads(result)
+    assert len(parsed_result) == 1
+    assert parsed_result[0]["metadata"]["category"] == "important"
+    assert "specific content" in parsed_result[0]["document"]
+
+    chroma_tool.collection.query.assert_called_with(
+        query_texts=["test query"],
+        n_results=3,
+        where={"category": "important"},
+        where_document={"$contains": "specific content"}
+    )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .chroma_search_tool import ChromaSearchTool

		__all__ = ["ChromaSearchTool"]