Skip to content

Commit 9f33fa4

Browse files
authored
Merge branch 'aws-samples:main' into deploy
2 parents e99a78a + fa70b0f commit 9f33fa4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1735
-1149
lines changed

.github/workflows/python.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# This workflow will install Python dependencies and run linting checks
2+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3+
4+
name: Python CI
5+
6+
permissions:
7+
contents: read
8+
9+
on:
10+
push:
11+
branches: ['main']
12+
paths: ['packages/cdk/lambda-python/**']
13+
pull_request:
14+
branches: ['main']
15+
paths: ['packages/cdk/lambda-python/**']
16+
17+
jobs:
18+
lint:
19+
name: 'Python Lint Check'
20+
runs-on: ubuntu-latest
21+
22+
strategy:
23+
matrix:
24+
python-version: ['3.12']
25+
26+
steps:
27+
- uses: actions/checkout@v4
28+
- name: Set up Python ${{ matrix.python-version }}
29+
uses: actions/setup-python@v4
30+
with:
31+
python-version: ${{ matrix.python-version }}
32+
33+
- name: Install uv
34+
uses: astral-sh/setup-uv@v3
35+
with:
36+
version: 'latest'
37+
38+
- name: Install dependencies
39+
working-directory: packages/cdk/lambda-python/generic-agent-core-runtime
40+
run: uv sync --group=lint
41+
42+
- name: Run ruff check
43+
working-directory: packages/cdk/lambda-python/generic-agent-core-runtime
44+
run: uv run ruff check --fix .
45+
46+
- name: Run ruff format
47+
working-directory: packages/cdk/lambda-python/generic-agent-core-runtime
48+
run: uv run ruff format .
49+
50+
- name: Check for changes
51+
run: |
52+
if [ -n "$(git status --porcelain)" ]; then
53+
echo "Linting changes detected. Please run 'uv run ruff check --fix .' and 'uv run ruff format .' locally and commit the changes."
54+
git diff
55+
exit 1
56+
fi

browser-extension/package-lock.json

Lines changed: 21 additions & 21 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

browser-extension/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
"@twind/preset-autoprefix": "^1.0.7",
3333
"@twind/preset-tailwind": "^1.1.4",
3434
"aws-amplify": "^6.0.23",
35-
"axios": "^1.6.8",
35+
"axios": "^1.12.0",
3636
"immer": "^10.0.4",
3737
"lodash-es": "^4.17.21",
3838
"react": "18.2.0",
@@ -78,7 +78,7 @@
7878
"tailwindcss": "^3.3.2",
7979
"ts-jest": "^29.1.0",
8080
"typescript": "4.9.5",
81-
"vite": "^6.3.5",
81+
"vite": "^6.3.6",
8282
"vite-plugin-svgr": "^4.2.0"
8383
}
8484
}
122 KB
Loading

docs/assets/images/arch.drawio.png

17.5 KB
Loading

docs/en/CLOSED_NETWORK.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ Options related to closed network mode have the `closedNetwork` prefix. The foll
2020
| closedNetworkCreateTestEnvironment | Whether to create a test environment. Created by default. Specify false if not needed. The test environment is created as an EC2 Windows instance and accessed via Fleet Manager. (Detailed procedures described later.) |
2121
| closedNetworkCreateResolverEndpoint | Whether to generate Route53 Resolver Endpoint. Default is true. |
2222

23+
## Architecture
24+
25+
![arch-closed-network.drawio.png](../assets/images/arch-closed-network.drawio.png)
26+
2327
## Current Limitations
2428

2529
- Deployment must be performed in an environment with internet connectivity. Also, internet connectivity is required when accessing the operation verification environment from the management console.

docs/en/DEPLOY_OPTION.md

Lines changed: 45 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,33 @@ This is a use case for integrating with agents created in AgentCore. (Experiment
688688
Enabling `createGenericAgentCoreRuntime` will deploy the default AgentCore Runtime.
689689
By default, it is deployed to the `modelRegion`, but you can override this by specifying `agentCoreRegion`.
690690

691+
The default agents available in AgentCore can utilize MCP servers defined in [mcp.json](https://github.com/aws-samples/generative-ai-use-cases/blob/main/packages/cdk/lambda-python/generic-agent-core-runtime/mcp.json).
692+
The MCP servers defined by default are AWS-related MCP servers and MCP servers related to current time.
693+
For details, please refer to the documentation [here](https://awslabs.github.io/mcp/).
694+
When adding MCP servers, please add them to the aforementioned `mcp.json`.
695+
However, MCP servers that start with methods other than `uvx` require development work such as rewriting the Dockerfile.
696+
691697
With `agentCoreExternalRuntimes`, you can use externally created AgentCore Runtimes.
692698

699+
To enable AgentCore use cases, the `docker` command must be executable.
700+
701+
> [!WARNING]
702+
> On Linux machines using x86_64 CPUs (Intel, AMD, etc.), run the following command before cdk deployment:
703+
>
704+
> ```
705+
> docker run --privileged --rm tonistiigi/binfmt --install arm64
706+
> ```
707+
>
708+
> If you do not run the above command, the following error will occur:
709+
> During the deployment process, ARM-based container images used by AgentCore Runtime are built. When building ARM container images on x86_64 CPUs, errors occur due to CPU architecture differences.
710+
>
711+
> ```
712+
> ERROR: failed to solve: process "/bin/sh -c apt-get update -y && apt-get install curl nodejs npm graphviz -y" did not complete successfully: exit code: 255
713+
> AgentCoreStack: fail: docker build --tag cdkasset-64ba68f71e3d29f5b84d8e8d062e841cb600c436bb68a540d6fce32fded36c08 --platform linux/arm64 . exited with error code 1: #0 building with "default" instance using docker driver
714+
> ```
715+
>
716+
> Running this command makes temporary configuration changes to the host Linux Kernel. It registers QEMU emulator custom handlers in Binary Format Miscellaneous (binfmt_misc), enabling ARM container image builds. The configuration returns to its original state after reboot, so the command must be re-executed before re-deployments.
717+
693718
**Edit [parameter.ts](/packages/cdk/parameter.ts)**
694719
695720
```typescript
@@ -761,6 +786,7 @@ As of 2025/03, the multimodal models are:
761786
"anthropic.claude-3-opus-20240229-v1:0",
762787
"anthropic.claude-3-sonnet-20240229-v1:0",
763788
"anthropic.claude-3-haiku-20240307-v1:0",
789+
"global.anthropic.claude-sonnet-4-20250514-v1:0",
764790
"us.anthropic.claude-opus-4-1-20250805-v1:0",
765791
"us.anthropic.claude-opus-4-20250514-v1:0",
766792
"us.anthropic.claude-sonnet-4-20250514-v1:0",
@@ -926,6 +952,7 @@ This solution supports the following text generation models:
926952
"anthropic.claude-3-opus-20240229-v1:0",
927953
"anthropic.claude-3-sonnet-20240229-v1:0",
928954
"anthropic.claude-3-haiku-20240307-v1:0",
955+
"global.anthropic.claude-sonnet-4-20250514-v1:0",
929956
"us.anthropic.claude-opus-4-1-20250805-v1:0",
930957
"us.anthropic.claude-opus-4-20250514-v1:0",
931958
"us.anthropic.claude-sonnet-4-20250514-v1:0",
@@ -1423,32 +1450,21 @@ const envs: Record<string, StackInput> = {
14231450
}
14241451
```
14251452

1426-
## Using Custom Models with Amazon SageMaker
1427-
1428-
You can use large language models deployed to Amazon SageMaker endpoints. This solution supports SageMaker endpoints using [Hugging Face's Text Generation Inference (TGI) LLM inference containers](https://aws.amazon.com/blogs/machine-learning/announcing-the-launch-of-new-hugging-face-llm-inference-containers-on-amazon-sagemaker/). Ideally, the models should support chat-formatted prompts where user and assistant take turns speaking. Currently, image generation use cases are not supported with Amazon SageMaker endpoints.
1453+
## When you want to use Amazon SageMaker custom models
14291454

1430-
There are two ways to deploy models using TGI containers to SageMaker endpoints:
1455+
It is possible to use large language models deployed to Amazon SageMaker endpoints. It supports SageMaker Endpoints using [Text Generation Inference (TGI) Hugging Face LLM inference containers](https://aws.amazon.com/blogs/machine-learning/announcing-the-launch-of-new-hugging-face-llm-inference-containers-on-amazon-sagemaker/). Since it uses TGI's [Message API](https://huggingface.co/docs/text-generation-inference/messages_api), TGI must be version 1.4.0 or later, and the model must support Chat Template (`chat_template` defined in `tokenizer.config`). Currently, only text models are supported.
14311456

1432-
**Deploy pre-packaged models from SageMaker JumpStart**
1457+
There are currently two ways to deploy models using TGI containers to SageMaker endpoints.
14331458

1434-
SageMaker JumpStart offers one-click deployment of packaged open-source large language models. You can deploy these models by opening them in the JumpStart screen in SageMaker Studio and clicking the "Deploy" button. Examples of Japanese models provided include:
1459+
**Deploy pre-prepared models by AWS with SageMaker JumpStart**
14351460

1436-
- [SageMaker JumpStart Elyza Japanese Llama 2 7B Instruct](https://aws.amazon.com/jp/blogs/news/sagemaker-jumpstart-elyza-7b/)
1437-
- [SageMaker JumpStart Elyza Japanese Llama 2 13B Instruct](https://aws.amazon.com/jp/blogs/news/sagemaker-jumpstart-elyza-7b/)
1438-
- [SageMaker JumpStart CyberAgentLM2 7B Chat](https://aws.amazon.com/jp/blogs/news/cyberagentlm2-on-sagemaker-jumpstart/)
1439-
- [SageMaker JumpStart Stable LM Instruct Alpha 7B v2](https://aws.amazon.com/jp/blogs/news/japanese-stable-lm-instruct-alpha-7b-v2-from-stability-ai-is-now-available-in-amazon-sagemaker-jumpstart/)
1440-
- [SageMaker JumpStart Rinna 3.6B](https://aws.amazon.com/jp/blogs/news/generative-ai-rinna-japanese-llm-on-amazon-sagemaker-jumpstart/)
1441-
- [SageMaker JumpStart Bilingual Rinna 4B](https://aws.amazon.com/jp/blogs/news/generative-ai-rinna-japanese-llm-on-amazon-sagemaker-jumpstart/)
1461+
SageMaker JumpStart provides OSS large language models packaged for one-click deployment. You can open a model from the JumpStart screen in SageMaker Studio and deploy it by clicking the "Deploy" button.
14421462

14431463
**Deploy with a few lines of code using SageMaker SDK**
14441464

1445-
Thanks to [AWS's partnership with Hugging Face](https://aws.amazon.com/jp/blogs/news/aws-and-hugging-face-collaborate-to-make-generative-ai-more-accessible-and-cost-efficient/), you can deploy models by simply specifying the model ID from Hugging Face using the SageMaker SDK.
1465+
Through the [partnership between AWS and Hugging Face](https://aws.amazon.com/jp/blogs/news/aws-and-hugging-face-collaborate-to-make-generative-ai-more-accessible-and-cost-efficient/), you can deploy models by simply specifying the ID of models published on Hugging Face with the SageMaker SDK.
14461466

1447-
From a model's Hugging Face page, select _Deploy_ > _Amazon SageMaker_ to see the code for deploying the model. Copy and run this code to deploy the model. (You may need to adjust parameters like instance size or `SM_NUM_GPUS` depending on the model. If deployment fails, you can check the logs in CloudWatch Logs.)
1448-
1449-
> [!NOTE]
1450-
> There's one modification needed when deploying: The endpoint name will be displayed in the GenU application and is used to determine the model's prompt template (explained in the next section). Therefore, you need to specify a distinguishable endpoint name.
1451-
> Add `endpoint_name="<distinguishable endpoint name>"` as an argument to `huggingface_model.deploy()` when deploying.
1467+
From a published Hugging Face model page, select _Deploy_ > _Amazon SageMaker_ to display the code for deploying the model. You can deploy the model by copying and executing this code. (Depending on the model, you may need to change parameters such as instance size or `SM_NUM_GPUS`. If deployment fails, you can check the logs from CloudWatch Logs)
14521468

14531469
![Select Amazon SageMaker from Deploy on Hugging Face model page](../assets/DEPLOY_OPTION/HF_Deploy.png)
14541470
![Deployment script guide on Hugging Face model page](../assets/DEPLOY_OPTION/HF_Deploy2.png)
@@ -1457,18 +1473,19 @@ From a model's Hugging Face page, select _Deploy_ > _Amazon SageMaker_ to see th
14571473

14581474
To use deployed SageMaker endpoints with the target solution, specify them as follows:
14591475

1460-
endpointNames is a list of SageMaker endpoint names. (Example: `["elyza-llama-2", "rinna"]`)
1461-
1462-
To specify the prompt template used when constructing prompts in the backend, you need to include the prompt type in the endpoint name. (Example: `llama-2`, `rinna`, etc.) See `packages/cdk/lambda/utils/models.ts` for details. Add prompt templates as needed.
1476+
`endpointNames` is a list of SageMaker endpoint names. Optionally you can specify region for each endpoint.
14631477

14641478
```typescript
14651479
// parameter.ts
14661480
const envs: Record<string, Partial<StackInput>> = {
14671481
dev: {
14681482
modelRegion: 'us-east-1',
14691483
endpointNames: [
1470-
'jumpstart-dft-hf-llm-rinna-3-6b-instruction-ppo-bf16',
1471-
'jumpstart-dft-bilingual-rinna-4b-instruction-ppo-bf16',
1484+
'<SageMaker Endpoint Name>',
1485+
{
1486+
modelIds: '<SageMaker Endpoint Name>',
1487+
region: '<SageMaker Endpoint Region>',
1488+
},
14721489
],
14731490
},
14741491
};
@@ -1479,38 +1496,17 @@ const envs: Record<string, Partial<StackInput>> = {
14791496
{
14801497
"context": {
14811498
"modelRegion": "<SageMaker Endpoint Region>",
1482-
"endpointNames": ["<SageMaker Endpoint Name>"]
1483-
}
1484-
}
1485-
```
1486-
1487-
**Example: Using Rinna 3.6B and Bilingual Rinna 4B**
1488-
1489-
```json
1490-
// cdk.json
1491-
{
1492-
"context": {
1493-
"modelRegion": "us-west-2",
14941499
"endpointNames": [
1495-
"jumpstart-dft-hf-llm-rinna-3-6b-instruction-ppo-bf16",
1496-
"jumpstart-dft-bilingual-rinna-4b-instruction-ppo-bf16"
1500+
"<SageMaker Endpoint Name>",
1501+
{
1502+
"modelIds": "<SageMaker Endpoint Name>",
1503+
"region": "<SageMaker Endpoint Region>"
1504+
}
14971505
]
14981506
}
14991507
}
15001508
```
15011509

1502-
**Example: Using ELYZA-japanese-Llama-2-7b-instruct**
1503-
1504-
```json
1505-
// cdk.json
1506-
{
1507-
"context": {
1508-
"modelRegion": "us-west-2",
1509-
"endpointNames": ["elyza-japanese-llama-2-7b-inference"]
1510-
}
1511-
}
1512-
```
1513-
15141510
## Security-Related Settings
15151511

15161512
### Disable Self-Signup

docs/ja/CLOSED_NETWORK.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
| closedNetworkCreateTestEnvironment | 検証環境を作成するかどうか。デフォルトで作成する。必要ない場合は false を指定する。検証環境は EC2 の Windows インスタンスとして作成し、Fleet Manager 経由でアクセスする。(詳細な手順は後述。) |
2121
| closedNetworkCreateResolverEndpoint | Route53 の Resolver Endpoint を生成するかどうか。デフォルトで true。 |
2222

23+
## アーキテクチャ
24+
25+
![arch-closed-network.drawio.png](../assets/images/arch-closed-network.drawio.png)
26+
2327
## 現状の制約
2428

2529
- デプロイはインターネットに疎通可能な環境で行う必要があります。また、動作検証環境にはマネージメントコンソールからアクセスするため、その場合もインターネット疎通が必要になります。

0 commit comments

Comments
 (0)