Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 72 additions & 19 deletions examples/pipelines/providers/aws_bedrock_claude_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""
title: AWS Bedrock Claude Pipeline
author: G-mario
date: 2024-08-18
version: 1.0
author: G-mario, shadowdao
date: 2025-06-02
version: 1.4
license: MIT
description: A pipeline for generating text and processing images using the AWS Bedrock API(By Anthropic claude).
requirements: requests, boto3
environment_variables: AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION_NAME
environment_variables: AWS_ACCESS_KEY (optional with instance roles), AWS_SECRET_KEY (optional with instance roles), AWS_REGION_NAME
"""
import base64
import json
Expand All @@ -31,8 +31,9 @@
"max": 32768,
}

# Maximum combined token limit for Claude 3.7
# Maximum combined token limit for Claude Sonnet 3.7 and 4.0
MAX_COMBINED_TOKENS = 64000
OPUS_MAX_COMBINED_TOKENS = 32000


class Pipeline:
Expand Down Expand Up @@ -91,21 +92,36 @@ async def on_valves_updated(self):

def update_pipelines(self) -> None:
try:
self.bedrock = boto3.client(service_name="bedrock",
# Check if we have explicit credentials or should try instance role
if self.valves.AWS_ACCESS_KEY and self.valves.AWS_SECRET_KEY:
# Use explicit credentials
self.bedrock = boto3.client(
service_name="bedrock",
aws_access_key_id=self.valves.AWS_ACCESS_KEY,
aws_secret_access_key=self.valves.AWS_SECRET_KEY,
region_name=self.valves.AWS_REGION_NAME)
self.bedrock_runtime = boto3.client(service_name="bedrock-runtime",
region_name=self.valves.AWS_REGION_NAME
)
self.bedrock_runtime = boto3.client(
service_name="bedrock-runtime",
aws_access_key_id=self.valves.AWS_ACCESS_KEY,
aws_secret_access_key=self.valves.AWS_SECRET_KEY,
region_name=self.valves.AWS_REGION_NAME)
region_name=self.valves.AWS_REGION_NAME
)
print("Using provided AWS credentials")
else:
# Try to use instance role
region = self.valves.AWS_REGION_NAME if self.valves.AWS_REGION_NAME else None
self.bedrock = boto3.client(service_name="bedrock", region_name=region)
self.bedrock_runtime = boto3.client(service_name="bedrock-runtime", region_name=region)
print("No AWS credentials provided, using instance role or AWS credential chain")

self.pipelines = self.get_models()
except Exception as e:
print(f"Error: {e}")
self.pipelines = [
{
"id": "error",
"name": "Could not fetch models from Bedrock, please set up AWS Key/Secret or Instance/Task Role.",
"name": "Could not fetch models from Bedrock. Please check AWS credentials or instance role permissions.",
},
]

Expand All @@ -124,11 +140,11 @@ def get_models(self):

return res
except Exception as e:
print(f"Error: {e}")
print(f"Error accessing Bedrock: {e}")
return [
{
"id": "error",
"name": "Could not fetch models from Bedrock, please check permissoin.",
"name": "Could not fetch models from Bedrock. Please check AWS credentials or instance role permissions.",
},
]

Expand All @@ -140,6 +156,33 @@ def getInferenceProfileId(self, modelArn: str) -> str:
return profile['inferenceProfileId']
return None

def check_supports_thinking(self, model_id: str) -> bool:
"""Helper function to determine if a model supports thinking feature"""
if "claude" not in model_id.lower():
return False

import re

# Initialize version variables
major_version = None
minor_version = None

# First try the standard pattern (claude-3-7)
version_match = re.search(r'claude-(\d+)(?:[.-](\d+))?', model_id.lower())
if version_match:
major_version = int(version_match.group(1))
minor_version = int(version_match.group(2)) if version_match.group(2) else 0
else:
# Try Claude 4 pattern with potential minor version (claude-sonnet-4-2, claude-sonnet-4.2)
version_match = re.search(r'claude-\w+-(\d+)(?:[.-](\d+))?', model_id.lower())
if version_match:
major_version = int(version_match.group(1))
minor_version = int(version_match.group(2)) if version_match.group(2) else 0
# Set supports_thinking if version was found
if major_version is not None:
return (major_version > 3) or (major_version == 3 and minor_version >= 7)
return False

def pipe(
self, user_message: str, model_id: str, messages: List[dict], body: dict
) -> Union[str, Generator, Iterator]:
Expand All @@ -149,7 +192,6 @@ def pipe(
system_message, messages = pop_system_message(messages)

logging.info(f"pop_system_message: {json.dumps(messages)}")

try:
processed_messages = []
image_count = 0
Expand Down Expand Up @@ -183,7 +225,12 @@ def pipe(
}

if body.get("stream", False):
supports_thinking = "claude-3-7" in model_id
supports_thinking = self.check_supports_thinking(model_id)

# Debug logging to help troubleshoot version detection
print(f"Model ID: {model_id}")
print(f"Supports thinking: {supports_thinking}")

reasoning_effort = body.get("reasoning_effort", "none")
budget_tokens = REASONING_EFFORT_BUDGET_TOKEN_MAP.get(reasoning_effort)

Expand All @@ -202,11 +249,17 @@ def pipe(
# Check if the combined tokens (budget_tokens + max_tokens) exceeds the limit
max_tokens = payload.get("max_tokens", 4096)
combined_tokens = budget_tokens + max_tokens

if combined_tokens > MAX_COMBINED_TOKENS:
error_message = f"Error: Combined tokens (budget_tokens {budget_tokens} + max_tokens {max_tokens} = {combined_tokens}) exceeds the maximum limit of {MAX_COMBINED_TOKENS}"
print(error_message)
return error_message
# Opus version of the model has a lower max token theshold
if "opus" in model_id.lower():
if combined_tokens > OPUS_MAX_COMBINED_TOKENS:
error_message = f"Error: Combined tokens (budget_tokens {budget_tokens} + max_tokens {max_tokens} = {combined_tokens}) exceeds the maximum limit of {MAX_COMBINED_TOKENS}"
print(error_message)
return error_message
else:
if combined_tokens > MAX_COMBINED_TOKENS:
error_message = f"Error: Combined tokens (budget_tokens {budget_tokens} + max_tokens {max_tokens} = {combined_tokens}) exceeds the maximum limit of {MAX_COMBINED_TOKENS}"
print(error_message)
return error_message

payload["inferenceConfig"]["maxTokens"] = combined_tokens
payload["additionalModelRequestFields"]["thinking"] = {
Expand Down