aws-samples
diff --git a/‎.github/workflows/python.yml‎
Lines changed: 56 additions & 0 deletions b/‎.github/workflows/python.yml‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎browser-extension/package-lock.json‎
Lines changed: 21 additions & 21 deletions b/‎browser-extension/package-lock.json‎
Lines changed: 21 additions & 21 deletions
diff --git a/‎browser-extension/package.json‎
Lines changed: 2 additions & 2 deletions b/‎browser-extension/package.json‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/assets/images/arch-closed-network.drawio.png‎
122 KB b/‎docs/assets/images/arch-closed-network.drawio.png‎
122 KB
diff --git a/‎docs/assets/images/arch.drawio.png‎
17.5 KB b/‎docs/assets/images/arch.drawio.png‎
17.5 KB
diff --git a/‎docs/en/CLOSED_NETWORK.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/en/CLOSED_NETWORK.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/en/DEPLOY_OPTION.md‎
Lines changed: 45 additions & 49 deletions b/‎docs/en/DEPLOY_OPTION.md‎
Lines changed: 45 additions & 49 deletions
diff --git a/‎docs/ja/CLOSED_NETWORK.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/ja/CLOSED_NETWORK.md‎
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,56 @@
+# This workflow will install Python dependencies and run linting checks
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python CI
+
+permissions:
+  contents: read
+
+on:
+  push:
+    branches: ['main']
+    paths: ['packages/cdk/lambda-python/**']
+  pull_request:
+    branches: ['main']
+    paths: ['packages/cdk/lambda-python/**']
+
+jobs:
+  lint:
+    name: 'Python Lint Check'
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        python-version: ['3.12']
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: 'latest'
+
+      - name: Install dependencies
+        working-directory: packages/cdk/lambda-python/generic-agent-core-runtime
+        run: uv sync --group=lint
+
+      - name: Run ruff check
+        working-directory: packages/cdk/lambda-python/generic-agent-core-runtime
+        run: uv run ruff check --fix .
+
+      - name: Run ruff format
+        working-directory: packages/cdk/lambda-python/generic-agent-core-runtime
+        run: uv run ruff format .
+
+      - name: Check for changes
+        run: |
+          if [ -n "$(git status --porcelain)" ]; then
+            echo "Linting changes detected. Please run 'uv run ruff check --fix .' and 'uv run ruff format .' locally and commit the changes."
+            git diff
+            exit 1
+          fi
@@ -32,7 +32,7 @@
     "@twind/preset-autoprefix": "^1.0.7",
     "@twind/preset-tailwind": "^1.1.4",
     "aws-amplify": "^6.0.23",
-    "axios": "^1.6.8",
+    "axios": "^1.12.0",
     "immer": "^10.0.4",
     "lodash-es": "^4.17.21",
     "react": "18.2.0",
@@ -78,7 +78,7 @@
     "tailwindcss": "^3.3.2",
     "ts-jest": "^29.1.0",
     "typescript": "4.9.5",
-    "vite": "^6.3.5",
+    "vite": "^6.3.6",
     "vite-plugin-svgr": "^4.2.0"
   }
 }
@@ -20,6 +20,10 @@ Options related to closed network mode have the `closedNetwork` prefix. The foll
 | closedNetworkCreateTestEnvironment  | Whether to create a test environment. Created by default. Specify false if not needed. The test environment is created as an EC2 Windows instance and accessed via Fleet Manager. (Detailed procedures described later.)                               |
 | closedNetworkCreateResolverEndpoint | Whether to generate Route53 Resolver Endpoint. Default is true.                                                                                                                                                                                        |
 
+## Architecture
+
+![arch-closed-network.drawio.png](../assets/images/arch-closed-network.drawio.png)
+
 ## Current Limitations
 
 - Deployment must be performed in an environment with internet connectivity. Also, internet connectivity is required when accessing the operation verification environment from the management console.
 
@@ -688,8 +688,33 @@ This is a use case for integrating with agents created in AgentCore. (Experiment
 Enabling `createGenericAgentCoreRuntime` will deploy the default AgentCore Runtime.
 By default, it is deployed to the `modelRegion`, but you can override this by specifying `agentCoreRegion`.
 
+The default agents available in AgentCore can utilize MCP servers defined in [mcp.json](https://github.com/aws-samples/generative-ai-use-cases/blob/main/packages/cdk/lambda-python/generic-agent-core-runtime/mcp.json).
+The MCP servers defined by default are AWS-related MCP servers and MCP servers related to current time.
+For details, please refer to the documentation [here](https://awslabs.github.io/mcp/).
+When adding MCP servers, please add them to the aforementioned `mcp.json`.
+However, MCP servers that start with methods other than `uvx` require development work such as rewriting the Dockerfile.
+
 With `agentCoreExternalRuntimes`, you can use externally created AgentCore Runtimes.
 
+To enable AgentCore use cases, the `docker` command must be executable.
+
+> [!WARNING]
+> On Linux machines using x86_64 CPUs (Intel, AMD, etc.), run the following command before cdk deployment:
+>
+> ```
+> docker run --privileged --rm tonistiigi/binfmt --install arm64
+> ```
+>
+> If you do not run the above command, the following error will occur:  
+> During the deployment process, ARM-based container images used by AgentCore Runtime are built. When building ARM container images on x86_64 CPUs, errors occur due to CPU architecture differences.
+>
+> ```
+> ERROR: failed to solve: process "/bin/sh -c apt-get update -y && apt-get install curl nodejs npm graphviz -y" did not complete successfully: exit code: 255
+> AgentCoreStack: fail: docker build --tag cdkasset-64ba68f71e3d29f5b84d8e8d062e841cb600c436bb68a540d6fce32fded36c08 --platform linux/arm64 . exited with error code 1: #0 building with "default" instance using docker driver
+> ```
+>
+> Running this command makes temporary configuration changes to the host Linux Kernel. It registers QEMU emulator custom handlers in Binary Format Miscellaneous (binfmt_misc), enabling ARM container image builds. The configuration returns to its original state after reboot, so the command must be re-executed before re-deployments.
+
 **Edit [parameter.ts](/packages/cdk/parameter.ts)**
 
 ```typescript
@@ -761,6 +786,7 @@ As of 2025/03, the multimodal models are:
 "anthropic.claude-3-opus-20240229-v1:0",
 "anthropic.claude-3-sonnet-20240229-v1:0",
 "anthropic.claude-3-haiku-20240307-v1:0",
+"global.anthropic.claude-sonnet-4-20250514-v1:0",
 "us.anthropic.claude-opus-4-1-20250805-v1:0",
 "us.anthropic.claude-opus-4-20250514-v1:0",
 "us.anthropic.claude-sonnet-4-20250514-v1:0",
@@ -926,6 +952,7 @@ This solution supports the following text generation models:
 "anthropic.claude-3-opus-20240229-v1:0",
 "anthropic.claude-3-sonnet-20240229-v1:0",
 "anthropic.claude-3-haiku-20240307-v1:0",
+"global.anthropic.claude-sonnet-4-20250514-v1:0",
 "us.anthropic.claude-opus-4-1-20250805-v1:0",
 "us.anthropic.claude-opus-4-20250514-v1:0",
 "us.anthropic.claude-sonnet-4-20250514-v1:0",
@@ -1423,32 +1450,21 @@ const envs: Record<string, StackInput> = {
 }
 ```
 
-## Using Custom Models with Amazon SageMaker
-
-You can use large language models deployed to Amazon SageMaker endpoints. This solution supports SageMaker endpoints using [Hugging Face's Text Generation Inference (TGI) LLM inference containers](https://aws.amazon.com/blogs/machine-learning/announcing-the-launch-of-new-hugging-face-llm-inference-containers-on-amazon-sagemaker/). Ideally, the models should support chat-formatted prompts where user and assistant take turns speaking. Currently, image generation use cases are not supported with Amazon SageMaker endpoints.
+## When you want to use Amazon SageMaker custom models
 
-There are two ways to deploy models using TGI containers to SageMaker endpoints:
+It is possible to use large language models deployed to Amazon SageMaker endpoints. It supports SageMaker Endpoints using [Text Generation Inference (TGI) Hugging Face LLM inference containers](https://aws.amazon.com/blogs/machine-learning/announcing-the-launch-of-new-hugging-face-llm-inference-containers-on-amazon-sagemaker/). Since it uses TGI's [Message API](https://huggingface.co/docs/text-generation-inference/messages_api), TGI must be version 1.4.0 or later, and the model must support Chat Template (`chat_template` defined in `tokenizer.config`). Currently, only text models are supported.
 
-**Deploy pre-packaged models from SageMaker JumpStart**
+There are currently two ways to deploy models using TGI containers to SageMaker endpoints.
 
-SageMaker JumpStart offers one-click deployment of packaged open-source large language models. You can deploy these models by opening them in the JumpStart screen in SageMaker Studio and clicking the "Deploy" button. Examples of Japanese models provided include:
+**Deploy pre-prepared models by AWS with SageMaker JumpStart**
 
-- [SageMaker JumpStart Elyza Japanese Llama 2 7B Instruct](https://aws.amazon.com/jp/blogs/news/sagemaker-jumpstart-elyza-7b/)
-- [SageMaker JumpStart Elyza Japanese Llama 2 13B Instruct](https://aws.amazon.com/jp/blogs/news/sagemaker-jumpstart-elyza-7b/)
-- [SageMaker JumpStart CyberAgentLM2 7B Chat](https://aws.amazon.com/jp/blogs/news/cyberagentlm2-on-sagemaker-jumpstart/)
-- [SageMaker JumpStart Stable LM Instruct Alpha 7B v2](https://aws.amazon.com/jp/blogs/news/japanese-stable-lm-instruct-alpha-7b-v2-from-stability-ai-is-now-available-in-amazon-sagemaker-jumpstart/)
-- [SageMaker JumpStart Rinna 3.6B](https://aws.amazon.com/jp/blogs/news/generative-ai-rinna-japanese-llm-on-amazon-sagemaker-jumpstart/)
-- [SageMaker JumpStart Bilingual Rinna 4B](https://aws.amazon.com/jp/blogs/news/generative-ai-rinna-japanese-llm-on-amazon-sagemaker-jumpstart/)
+SageMaker JumpStart provides OSS large language models packaged for one-click deployment. You can open a model from the JumpStart screen in SageMaker Studio and deploy it by clicking the "Deploy" button.
 
 **Deploy with a few lines of code using SageMaker SDK**
 
-Thanks to [AWS's partnership with Hugging Face](https://aws.amazon.com/jp/blogs/news/aws-and-hugging-face-collaborate-to-make-generative-ai-more-accessible-and-cost-efficient/), you can deploy models by simply specifying the model ID from Hugging Face using the SageMaker SDK.
+Through the [partnership between AWS and Hugging Face](https://aws.amazon.com/jp/blogs/news/aws-and-hugging-face-collaborate-to-make-generative-ai-more-accessible-and-cost-efficient/), you can deploy models by simply specifying the ID of models published on Hugging Face with the SageMaker SDK.
 
-From a model's Hugging Face page, select _Deploy_ > _Amazon SageMaker_ to see the code for deploying the model. Copy and run this code to deploy the model. (You may need to adjust parameters like instance size or `SM_NUM_GPUS` depending on the model. If deployment fails, you can check the logs in CloudWatch Logs.)
-
-> [!NOTE]
-> There's one modification needed when deploying: The endpoint name will be displayed in the GenU application and is used to determine the model's prompt template (explained in the next section). Therefore, you need to specify a distinguishable endpoint name.
-> Add `endpoint_name="<distinguishable endpoint name>"` as an argument to `huggingface_model.deploy()` when deploying.
+From a published Hugging Face model page, select _Deploy_ > _Amazon SageMaker_ to display the code for deploying the model. You can deploy the model by copying and executing this code. (Depending on the model, you may need to change parameters such as instance size or `SM_NUM_GPUS`. If deployment fails, you can check the logs from CloudWatch Logs)
 
 ![Select Amazon SageMaker from Deploy on Hugging Face model page](../assets/DEPLOY_OPTION/HF_Deploy.png)
 ![Deployment script guide on Hugging Face model page](../assets/DEPLOY_OPTION/HF_Deploy2.png)
@@ -1457,18 +1473,19 @@ From a model's Hugging Face page, select _Deploy_ > _Amazon SageMaker_ to see th
 
 To use deployed SageMaker endpoints with the target solution, specify them as follows:
 
-endpointNames is a list of SageMaker endpoint names. (Example: `["elyza-llama-2", "rinna"]`)
-
-To specify the prompt template used when constructing prompts in the backend, you need to include the prompt type in the endpoint name. (Example: `llama-2`, `rinna`, etc.) See `packages/cdk/lambda/utils/models.ts` for details. Add prompt templates as needed.
+`endpointNames` is a list of SageMaker endpoint names. Optionally you can specify region for each endpoint.
 
 ```typescript
 // parameter.ts
 const envs: Record<string, Partial<StackInput>> = {
   dev: {
     modelRegion: 'us-east-1',
     endpointNames: [
-      'jumpstart-dft-hf-llm-rinna-3-6b-instruction-ppo-bf16',
-      'jumpstart-dft-bilingual-rinna-4b-instruction-ppo-bf16',
+      '<SageMaker Endpoint Name>',
+      {
+        modelIds: '<SageMaker Endpoint Name>',
+        region: '<SageMaker Endpoint Region>',
+      },
     ],
   },
 };
@@ -1479,38 +1496,17 @@ const envs: Record<string, Partial<StackInput>> = {
 {
   "context": {
     "modelRegion": "<SageMaker Endpoint Region>",
-    "endpointNames": ["<SageMaker Endpoint Name>"]
-  }
-}
-```
-
-**Example: Using Rinna 3.6B and Bilingual Rinna 4B**
-
-```json
-// cdk.json
-{
-  "context": {
-    "modelRegion": "us-west-2",
     "endpointNames": [
-      "jumpstart-dft-hf-llm-rinna-3-6b-instruction-ppo-bf16",
-      "jumpstart-dft-bilingual-rinna-4b-instruction-ppo-bf16"
+      "<SageMaker Endpoint Name>",
+      {
+        "modelIds": "<SageMaker Endpoint Name>",
+        "region": "<SageMaker Endpoint Region>"
+      }
     ]
   }
 }
 ```
 
-**Example: Using ELYZA-japanese-Llama-2-7b-instruct**
-
-```json
-// cdk.json
-{
-  "context": {
-    "modelRegion": "us-west-2",
-    "endpointNames": ["elyza-japanese-llama-2-7b-inference"]
-  }
-}
-```
-
 ## Security-Related Settings
 
 ### Disable Self-Signup
 
@@ -20,6 +20,10 @@
 | closedNetworkCreateTestEnvironment  | 検証環境を作成するかどうか。デフォルトで作成する。必要ない場合は false を指定する。検証環境は EC2 の Windows インスタンスとして作成し、Fleet Manager 経由でアクセスする。(詳細な手順は後述。)                               |
 | closedNetworkCreateResolverEndpoint | Route53 の Resolver Endpoint を生成するかどうか。デフォルトで true。                                                                                                                                                        |
 
+## アーキテクチャ
+
+![arch-closed-network.drawio.png](../assets/images/arch-closed-network.drawio.png)
+
 ## 現状の制約
 
 - デプロイはインターネットに疎通可能な環境で行う必要があります。また、動作検証環境にはマネージメントコンソールからアクセスするため、その場合もインターネット疎通が必要になります。