Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,28 @@ Integrated with a Python interpreter, CodeAct can execute code actions and dynam

**Feb 2, 2024**: CodeAct is released!

## Supported LLM Providers

CodeAct supports multiple LLM providers for agent evaluation:

| Provider | Agent Class | Models |
|----------|------------|--------|
| OpenAI | `OpenAILMAgent` | GPT-3.5-Turbo, GPT-4, etc. |
| Anthropic | `ClaudeLMAgent` | Claude 2, Claude Instant |
| Google | `BardLMAgent` | PaLM |
| [MiniMax](https://platform.minimaxi.com/) | `MiniMaxLMAgent` | MiniMax-M2.7, MiniMax-M2.7-highspeed |
| vLLM (self-hosted) | `VLLMAgent` | Any OpenAI-compatible model |

### Using MiniMax

To use [MiniMax](https://platform.minimaxi.com/) models, set your API key:

```bash
export MINIMAX_API_KEY="your-api-key"
```

MiniMax models are configured in `EVALUATED_MODEL_LIST` with `agent_class: "MiniMaxLMAgent"`. The agent uses MiniMax's OpenAI-compatible API endpoint (`https://api.minimax.io/v1`) with automatic temperature clamping for API compatibility.

## Why CodeAct?

Our extensive analysis of 17 LLMs on API-Bank and a newly curated benchmark [M<sup>3</sup>ToolEval](docs/EVALUATION.md) shows that CodeAct outperforms widely used alternatives like Text and JSON (up to 20% higher success rate). Please check our paper for more detailed analysis!
Expand Down
2 changes: 2 additions & 0 deletions mint/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@
from .vllm_feedback_agent import VLLMFeedbackAgent
from .vllm_agent import VLLMAgent
from .claude_agent import ClaudeLMAgent
from .minimax_lm_agent import MiniMaxLMAgent
from .minimax_feedback_agent import MiniMaxFeedbackAgent
93 changes: 93 additions & 0 deletions mint/agents/minimax_feedback_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import re
import logging

from .minimax_lm_agent import MiniMaxLMAgent

from mint.datatypes import State, Action
from mint.prompt import FeedbackPromptTemplate
import openai
import traceback

LOGGER = logging.getLogger("MINT")


class MiniMaxFeedbackAgent(MiniMaxLMAgent):
"""Feedback agent using MiniMax's OpenAI-compatible API."""

def __init__(self, config):
super().__init__(config)
self.stop_words = ["\nObservation:", "\nTask:", "\nAssistant:"]
self.feedback_prompt = FeedbackPromptTemplate()

def lm_output_to_action(self, lm_output, form) -> Action:
if form == "textual":
feedback = lm_output
elif form == "binary":
first_sent = re.findall(r"([^.]*\.)", lm_output)[0]
if "GOOD" in first_sent:
feedback = "This is GOOD."
elif "BAD" in first_sent:
feedback = "This is BAD."
else:
raise ValueError(f"Cannot find GOOD or BAD in feedback: {lm_output}")
return Action(feedback, use_tool=False)

def act(
self,
state: State,
observation: str,
form: str,
gt,
task_in_context_example: str,
tool_desc: str,
) -> Action:
try:
gt_solution = (
(
f"Correct solution (please DO NOT disclose the correct solution to the assistant): {str(gt).strip()}\n"
)
if gt
else "Correct solution (please DO NOT disclose the correct solution to the assistant): NOT GIVEN\n"
)
trajectory = (
"---\n".join(state.history[0]["content"].split("---\n")[2:]) + "\n"
)
trajectory += "\n".join([x["content"] for x in state.history[1:]])
trajectory += "\n" + observation
trajectory = trajectory[
trajectory.find("Task:") :
] # Get rid of the initial instruction to avoid confusion
messages = [
{
"role": "user",
"content": self.feedback_prompt(
in_context_example=task_in_context_example[
task_in_context_example.find("Task:") :
],
trajectory=trajectory,
correct_solution=gt_solution,
tool_desc=tool_desc,
),
}
]

LOGGER.debug(
"Feedback Agent Prompt:\n"
+ "\033[93m"
+ messages[0]["content"]
+ "\033[0m"
)
lm_output, token_usage = self.call_lm(messages)
for usage_type, count in token_usage.items():
state.token_counter["feedback_" + usage_type] += count
action = self.lm_output_to_action(lm_output, form)
LOGGER.debug(
"Feedback Agent Action:\n" + "\033[91m" + action.value + "\033[0m"
)
return action
except openai.error.InvalidRequestError:
tb = traceback.format_exc()
return Action(f"", False, error=f"InvalidRequestError\n{tb}")
except Exception as e:
tb = traceback.format_exc()
return Action(f"", False, error=f"Unknown error\n{tb}")
87 changes: 87 additions & 0 deletions mint/agents/minimax_lm_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from .openai_lm_agent import OpenAILMAgent
import openai
import openai.error
import logging
import os
import traceback
from mint.datatypes import Action
import backoff

LOGGER = logging.getLogger("MINT")

MINIMAX_API_BASE = "https://api.minimax.io/v1"


class MiniMaxLMAgent(OpenAILMAgent):
"""LLM agent using MiniMax's OpenAI-compatible API.

MiniMax provides large language models (M2.7, M2.5) accessible via
an OpenAI-compatible chat completions endpoint.

Config:
model_name: MiniMax model name (e.g. "MiniMax-M2.7")
chat_mode: Must be True (MiniMax only supports chat completions)
max_tokens: Maximum tokens to generate (default: 512)
temperature: Sampling temperature, clamped to [0.01, 1.0] (default: 0)
"""

def __init__(self, config):
super().__init__(config)
self.api_key = os.environ.get("MINIMAX_API_KEY", None)
if not self.api_key:
raise ValueError(
"MINIMAX_API_KEY environment variable is required. "
"Get your API key at https://platform.minimaxi.com/"
)
self.api_base = MINIMAX_API_BASE

def _clamp_temperature(self, temperature):
"""Clamp temperature to MiniMax's accepted range [0.01, 1.0]."""
if temperature <= 0:
return 0.01
return min(temperature, 1.0)

@backoff.on_exception(
backoff.fibo,
(
openai.error.APIError,
openai.error.Timeout,
openai.error.RateLimitError,
openai.error.ServiceUnavailableError,
openai.error.APIConnectionError,
),
)
def call_lm(self, messages):
temperature = self._clamp_temperature(
self.config.get("temperature", 0)
)
response = openai.ChatCompletion.create(
model=self.config["model_name"],
messages=messages,
max_tokens=self.config.get("max_tokens", 512),
temperature=temperature,
stop=self.stop_words,
api_base=self.api_base,
api_key=self.api_key,
)
# Convert OpenAIObject to plain dict for token counting
usage_obj = response["usage"]
usage = {}
for k in usage_obj:
try:
usage[k] = int(usage_obj[k])
except (TypeError, ValueError):
pass
return response.choices[0].message["content"], usage

def act(self, state):
messages = state.history
try:
lm_output, token_usage = self.call_lm(messages)
for usage_type, count in token_usage.items():
state.token_counter[usage_type] += count
action = self.lm_output_to_action(lm_output)
return action
except openai.error.InvalidRequestError:
tb = traceback.format_exc()
return Action(f"", False, error=f"InvalidRequestError\n{tb}")
18 changes: 18 additions & 0 deletions mint/configs/config_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,24 @@
"temperature": 0.0,
},
},
{
"agent_class": "MiniMaxLMAgent",
"config": {
"model_name": "MiniMax-M2.7",
"chat_mode": True,
"max_tokens": 1024,
"temperature": 0.0,
},
},
{
"agent_class": "MiniMaxLMAgent",
"config": {
"model_name": "MiniMax-M2.7-highspeed",
"chat_mode": True,
"max_tokens": 1024,
"temperature": 0.0,
},
},
# {
# "agent_class": "VLLMAgent",
# "config": {
Expand Down
Empty file added tests/__init__.py
Empty file.
Loading