diff --git a/crewai_tools/__init__.py b/crewai_tools/__init__.py index 36624f35..cb4305c8 100644 --- a/crewai_tools/__init__.py +++ b/crewai_tools/__init__.py @@ -67,6 +67,7 @@ SpiderTool, StagehandTool, TXTSearchTool, + VertexCambAITool, VisionTool, WeaviateVectorSearchTool, WebsiteSearchTool, diff --git a/crewai_tools/tools/__init__.py b/crewai_tools/tools/__init__.py index 957d2f1e..5af3b869 100644 --- a/crewai_tools/tools/__init__.py +++ b/crewai_tools/tools/__init__.py @@ -81,6 +81,7 @@ from .spider_tool.spider_tool import SpiderTool from .stagehand_tool.stagehand_tool import StagehandTool from .txt_search_tool.txt_search_tool import TXTSearchTool +from .vertex_cambai_tool.vertex_cambai_tool import VertexCambAITool from .vision_tool.vision_tool import VisionTool from .weaviate_tool.vector_search import WeaviateVectorSearchTool from .website_search.website_search_tool import WebsiteSearchTool diff --git a/crewai_tools/tools/vertex_cambai_tool/README.md b/crewai_tools/tools/vertex_cambai_tool/README.md new file mode 100644 index 00000000..aa6afc75 --- /dev/null +++ b/crewai_tools/tools/vertex_cambai_tool/README.md @@ -0,0 +1,243 @@ +# Vertex CambAI Tool + +The Vertex CambAI Tool provides voice cloning capabilities using CambAI's MARS7 model deployed on Google Cloud Vertex AI. This tool requires reference audio and transcription for all voice synthesis. + +## Features + +- **Voice Cloning with MARS7**: High-quality voice cloning using CambAI's advanced model +- **Multilingual Support**: English (en-us) and Spanish (es-es) +- **FLAC Audio Output**: Professional-grade audio format +- **Google Cloud Integration**: Deployed on Vertex AI infrastructure + +## Installation + +Install the required dependencies: + +```bash +pip install google-cloud-aiplatform soundfile +``` + +Or install with optional dependencies: + +```bash +pip install crewai-tools[vertex-cambai] +``` + +## Setup + +### Required Environment Variables + +Set these environment variables before using the tool: + +```bash +export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account-key.json" +export PROJECT_ID="your-google-cloud-project-id" +export LOCATION="us-central1" +export ENDPOINT_ID="your-vertex-ai-endpoint-id" +``` + +### Google Cloud Setup + +1. Create a Google Cloud service account with Vertex AI access +2. Download the service account key file +3. Deploy the MARS7 model on Vertex AI (contact CambAI for access) +4. Get your endpoint ID from the Vertex AI console + +## Usage + +### Basic Voice Cloning + +```python +from crewai_tools import VertexCambAITool + +# Initialize tool (requires environment variables) +tts_tool = VertexCambAITool() + +# Synthesize speech with voice cloning +result = tts_tool.run( + text="Hello, this will sound like my voice!", + audio_ref_path="./my_voice_reference.wav", + ref_text="This is the transcription of my reference audio", + language="en-us", + output_file="cloned_speech.flac" +) + +print(result) +``` + +### Multilingual Voice Cloning + +```python +from crewai_tools import VertexCambAITool + +tts_tool = VertexCambAITool() + +# English voice cloning +english_result = tts_tool.run( + text="Hello, how are you today?", + audio_ref_path="./english_ref.wav", + ref_text="Hello, this is my English voice", + language="en-us", + output_file="english_output.flac" +) + +# Spanish voice cloning +spanish_result = tts_tool.run( + text="Hola, ¿cómo estás hoy?", + audio_ref_path="./spanish_ref.wav", + ref_text="Hola, esta es mi voz en español", + language="es-es", + output_file="spanish_output.flac" +) +``` + +## Parameters + +### Required Parameters + +| Parameter | Type | Description | +|-----------|------|-------------| +| `text` | str | Text to synthesize | +| `audio_ref_path` | str | Path to reference audio file | +| `ref_text` | str | Transcription of reference audio | + +### Optional Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `language` | str | `"en-us"` | Language code (en-us, es-es) | +| `output_file` | str | `"output.flac"` | Output audio file path | + +## Response Format + +```json +{ + "status": "success", + "message": "Audio saved to output.flac", + "audio_file_path": "/absolute/path/to/output.flac", + "text": "Synthesized text", + "language": "en-us" +} +``` + +## Error Handling + +```json +{ + "status": "error", + "message": "Error description", + "audio_file_path": null +} +``` + +## Supported Audio Formats + +- **Input**: WAV, FLAC, MP3, M4A, OGG, WEBM +- **Output**: FLAC (default) + +## Testing + +Run the test script to verify your setup: + +```bash +python test_vertex_cambai_example.py +``` + +Make sure all environment variables are set before testing. + +## Integration with CrewAI + +```python +from crewai import Agent, Task, Crew +from crewai_tools import VertexCambAITool + +# Initialize tool +voice_tool = VertexCambAITool() + +# Create agent +voice_agent = Agent( + role="Voice Cloning Specialist", + goal="Convert text to cloned speech", + backstory="Expert in voice cloning with MARS7", + tools=[voice_tool] +) + +# Create task +task = Task( + description="Clone voice saying: 'Welcome to CrewAI voice cloning!'", + agent=voice_agent +) + +# Run +crew = Crew(agents=[voice_agent], tasks=[task]) +result = crew.kickoff() +``` + +## Troubleshooting + +### Common Issues + +**Missing Environment Variables** +```bash +ValueError: Missing required environment variables: PROJECT_ID, ENDPOINT_ID +``` +Solution: Set all required environment variables + +**Import Error** +```bash +ImportError: Please install google-cloud-aiplatform +``` +Solution: `pip install google-cloud-aiplatform soundfile` + +**File Not Found** +```bash +FileNotFoundError: [Errno 2] No such file or directory: './reference.wav' +``` +Solution: Provide correct path to reference audio file + +**Authentication Error** +```bash +DefaultCredentialsError: Could not automatically determine credentials +``` +Solution: Check GOOGLE_APPLICATION_CREDENTIALS path + +## Example Script + +```python +#!/usr/bin/env python3 +import os +import json +from crewai_tools import VertexCambAITool + +# Set environment variables +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/key.json" +os.environ["PROJECT_ID"] = "your-project" +os.environ["LOCATION"] = "us-central1" +os.environ["ENDPOINT_ID"] = "your-endpoint" + +# Initialize and use tool +tool = VertexCambAITool() +result = tool.run( + text="This is a test of voice cloning", + audio_ref_path="./reference.wav", + ref_text="Hello, this is my reference voice", + language="en-us" +) + +# Parse result +data = json.loads(result) +if data["status"] == "success": + print(f"✅ Audio saved: {data['audio_file_path']}") +else: + print(f"❌ Error: {data['message']}") +``` + +## License + +This tool is part of the CrewAI Tools package. + +## Support + +- **Tool Issues**: CrewAI Tools GitHub repository +- **MARS7 Model**: Contact CambAI support +- **Google Cloud**: Google Cloud documentation \ No newline at end of file diff --git a/crewai_tools/tools/vertex_cambai_tool/vertex_cambai_tool.py b/crewai_tools/tools/vertex_cambai_tool/vertex_cambai_tool.py new file mode 100644 index 00000000..af44689e --- /dev/null +++ b/crewai_tools/tools/vertex_cambai_tool/vertex_cambai_tool.py @@ -0,0 +1,111 @@ +"""Vertex CambAI Tool for text-to-speech synthesis using Google Cloud Vertex AI.""" + +import base64 +import json +import os +from typing import Type + +from crewai.tools import BaseTool +from pydantic import BaseModel +from google.cloud import aiplatform + +class VertexCambAISchema(BaseModel): + """Input schema for Vertex CambAI Tool.""" + + text: str + audio_ref_path: str + ref_text: str + language: str = "en-us" + output_file: str = "output.flac" + + +class VertexCambAITool(BaseTool): + """Tool for text-to-speech synthesis using Google Cloud Vertex AI with CambAI's MARS7 model. + + This tool provides voice cloning capabilities using the MARS7 model. + Requires Google Cloud credentials and Vertex AI endpoint access. + + Environment Variables Required: + GOOGLE_APPLICATION_CREDENTIALS: Path to service account key file + PROJECT_ID: Google Cloud project ID + LOCATION: Google Cloud location (e.g., us-central1) + ENDPOINT_ID: Vertex AI endpoint ID for MARS7 model + """ + + name: str = "Vertex CambAI Tool" + description: str = ( + "Converts text to speech using CambAI's MARS7 model on Google Cloud Vertex AI. " + "Requires reference audio and transcription for voice cloning." + ) + args_schema: Type[BaseModel] = VertexCambAISchema + package_dependencies: list = ["google-cloud-aiplatform", "soundfile"] + + def __init__(self, **kwargs): + """Initialize the Vertex CambAI Tool.""" + super().__init__(**kwargs) + + # Check required environment variables + required_vars = ["GOOGLE_APPLICATION_CREDENTIALS", "PROJECT_ID", "LOCATION", "ENDPOINT_ID"] + missing_vars = [var for var in required_vars if not os.getenv(var)] + + if missing_vars: + raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}") + + # Initialize Vertex AI + try: + aiplatform.init( + project=os.getenv("PROJECT_ID"), + location=os.getenv("LOCATION") + ) + except ImportError: + raise ImportError("Please install google-cloud-aiplatform: pip install google-cloud-aiplatform") + + def _run(self, **kwargs) -> str: + """Execute the text-to-speech synthesis.""" + try: + # Validate inputs + schema = VertexCambAISchema(**kwargs) + + # Encode reference audio + with open(schema.audio_ref_path, "rb") as f: + audio_ref_base64 = base64.b64encode(f.read()).decode("utf-8") + + # Build prediction instances + instances = { + "text": schema.text, + "audio_ref": audio_ref_base64, + "ref_text": schema.ref_text, + "language": schema.language + } + + # Make prediction + endpoint = aiplatform.Endpoint(endpoint_name=os.getenv("ENDPOINT_ID")) + response = endpoint.raw_predict( + body=json.dumps({"instances": [instances]}).encode("utf-8"), + headers={"Content-Type": "application/json"} + ) + + # Save audio response + predictions = json.loads(response.content)["predictions"] + audio_bytes = base64.b64decode(predictions[0]) + + with open(schema.output_file, "wb") as f: + f.write(audio_bytes) + + result = { + "status": "success", + "message": f"Audio saved to {schema.output_file}", + "audio_file_path": os.path.abspath(schema.output_file), + "text": schema.text, + "language": schema.language + } + + return json.dumps(result, indent=2) + + except Exception as e: + error_result = { + "status": "error", + "message": str(e), + "audio_file_path": None + } + return json.dumps(error_result, indent=2) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 052b2869..745d139b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,6 +115,12 @@ xml = [ oxylabs = [ "oxylabs==2.0.0" ] +vertex-cambai = [ + "google-cloud-aiplatform>=1.98.0", + "soundfile>=0.13.1", + "python-dotenv>=1.1.0", + "numpy>=1.21.0", +] [tool.pytest.ini_options] pythonpath = ["."]