From 049f8c44263b4ab52592d9abd25e1830c530f4ff Mon Sep 17 00:00:00 2001 From: Keisuke Minami Date: Sun, 5 Oct 2025 18:47:29 +0700 Subject: [PATCH 1/3] Qwen3 model --- docs/en/DEPLOY_OPTION.md | 4 +++ docs/ja/DEPLOY_OPTION.md | 4 +++ docs/ko/DEPLOY_OPTION.md | 4 +++ packages/cdk/lambda/utils/models.ts | 40 ++++++++++++++++++++++++ packages/common/src/application/model.ts | 17 ++++++++++ 5 files changed, 69 insertions(+) diff --git a/docs/en/DEPLOY_OPTION.md b/docs/en/DEPLOY_OPTION.md index a45f5e7a6..8f8fc5463 100644 --- a/docs/en/DEPLOY_OPTION.md +++ b/docs/en/DEPLOY_OPTION.md @@ -980,6 +980,10 @@ This solution supports the following text generation models: "apac.anthropic.claude-3-5-sonnet-20240620-v1:0", "apac.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.deepseek.r1-v1:0", +"qwen.qwen3-235b-a22b-2507-v1:0", +"qwen.qwen3-32b-v1:0", +"qwen.qwen3-coder-480b-a35b-v1:0", +"qwen.qwen3-coder-30b-a3b-v1:0", "us.writer.palmyra-x5-v1:0", "us.writer.palmyra-x4-v1:0", "amazon.titan-text-premier-v1:0", diff --git a/docs/ja/DEPLOY_OPTION.md b/docs/ja/DEPLOY_OPTION.md index 870044bfd..71f29faf2 100644 --- a/docs/ja/DEPLOY_OPTION.md +++ b/docs/ja/DEPLOY_OPTION.md @@ -995,6 +995,10 @@ const envs: Record> = { "apac.anthropic.claude-3-5-sonnet-20240620-v1:0", "apac.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.deepseek.r1-v1:0", +"qwen.qwen3-235b-a22b-2507-v1:0", +"qwen.qwen3-32b-v1:0", +"qwen.qwen3-coder-480b-a35b-v1:0", +"qwen.qwen3-coder-30b-a3b-v1:0", "us.writer.palmyra-x5-v1:0", "us.writer.palmyra-x4-v1:0", "amazon.titan-text-premier-v1:0", diff --git a/docs/ko/DEPLOY_OPTION.md b/docs/ko/DEPLOY_OPTION.md index caf3742a2..22ce12ccf 100644 --- a/docs/ko/DEPLOY_OPTION.md +++ b/docs/ko/DEPLOY_OPTION.md @@ -976,6 +976,10 @@ const envs: Record> = { "apac.anthropic.claude-3-5-sonnet-20240620-v1:0", "apac.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.deepseek.r1-v1:0", +"qwen.qwen3-235b-a22b-2507-v1:0", +"qwen.qwen3-32b-v1:0", +"qwen.qwen3-coder-480b-a35b-v1:0", +"qwen.qwen3-coder-30b-a3b-v1:0", "us.writer.palmyra-x5-v1:0", "us.writer.palmyra-x4-v1:0", "amazon.titan-text-premier-v1:0", diff --git a/packages/cdk/lambda/utils/models.ts b/packages/cdk/lambda/utils/models.ts index 2d7dda05c..59aa03a68 100644 --- a/packages/cdk/lambda/utils/models.ts +++ b/packages/cdk/lambda/utils/models.ts @@ -206,6 +206,14 @@ const DEEPSEEK_DEFAULT_PARAMS: ConverseInferenceParams = { }, }; +const QWEN_DEFAULT_PARAMS: ConverseInferenceParams = { + inferenceConfig: { + maxTokens: 32768, + temperature: 0.7, + topP: 0.9, + }, +}; + const PALMYRA_DEFAULT_PARAMS: ConverseInferenceParams = { inferenceConfig: { maxTokens: 8192, @@ -1491,6 +1499,38 @@ export const BEDROCK_TEXT_GEN_MODELS: { extractConverseOutput: extractConverseOutput, extractConverseStreamOutput: extractConverseStreamOutput, }, + 'qwen.qwen3-235b-a22b-2507-v1:0': { + defaultParams: QWEN_DEFAULT_PARAMS, + usecaseParams: USECASE_DEFAULT_PARAMS, + createConverseCommandInput: createConverseCommandInput, + createConverseStreamCommandInput: createConverseStreamCommandInput, + extractConverseOutput: extractConverseOutput, + extractConverseStreamOutput: extractConverseStreamOutput, + }, + 'qwen.qwen3-32b-v1:0': { + defaultParams: QWEN_DEFAULT_PARAMS, + usecaseParams: USECASE_DEFAULT_PARAMS, + createConverseCommandInput: createConverseCommandInput, + createConverseStreamCommandInput: createConverseStreamCommandInput, + extractConverseOutput: extractConverseOutput, + extractConverseStreamOutput: extractConverseStreamOutput, + }, + 'qwen.qwen3-coder-480b-a35b-v1:0': { + defaultParams: QWEN_DEFAULT_PARAMS, + usecaseParams: USECASE_DEFAULT_PARAMS, + createConverseCommandInput: createConverseCommandInput, + createConverseStreamCommandInput: createConverseStreamCommandInput, + extractConverseOutput: extractConverseOutput, + extractConverseStreamOutput: extractConverseStreamOutput, + }, + 'qwen.qwen3-coder-30b-a3b-v1:0': { + defaultParams: QWEN_DEFAULT_PARAMS, + usecaseParams: USECASE_DEFAULT_PARAMS, + createConverseCommandInput: createConverseCommandInput, + createConverseStreamCommandInput: createConverseStreamCommandInput, + extractConverseOutput: extractConverseOutput, + extractConverseStreamOutput: extractConverseStreamOutput, + }, // Although Palmyra supports system context, the model seems work best without it. 'us.writer.palmyra-x4-v1:0': { defaultParams: PALMYRA_DEFAULT_PARAMS, diff --git a/packages/common/src/application/model.ts b/packages/common/src/application/model.ts index 3728f757a..34238ab50 100644 --- a/packages/common/src/application/model.ts +++ b/packages/common/src/application/model.ts @@ -384,6 +384,23 @@ export const modelMetadata: Record = { flags: MODEL_FEATURE.TEXT_DOC_REASONING, displayName: 'DeepSeek-R1', }, + // Qwen + 'qwen.qwen3-235b-a22b-2507-v1:0': { + flags: MODEL_FEATURE.TEXT_ONLY, + displayName: 'Qwen3 235B A22B 2507', + }, + 'qwen.qwen3-32b-v1:0': { + flags: MODEL_FEATURE.TEXT_ONLY, + displayName: 'Qwen3 32B', + }, + 'qwen.qwen3-coder-480b-a35b-v1:0': { + flags: MODEL_FEATURE.TEXT_ONLY, + displayName: 'Qwen3-Coder 480B A35B Instruct', + }, + 'qwen.qwen3-coder-30b-a3b-v1:0': { + flags: MODEL_FEATURE.TEXT_ONLY, + displayName: 'Qwen3-Coder 30B A3B Instruct', + }, // Writer 'us.writer.palmyra-x4-v1:0': { flags: MODEL_FEATURE.TEXT_DOC, From c15b16eaec2f60adc3b59cbbfcc6707182665b4a Mon Sep 17 00:00:00 2001 From: Keisuke Minami Date: Sat, 11 Oct 2025 13:31:27 +0700 Subject: [PATCH 2/3] fix max token --- packages/cdk/lambda/utils/models.ts | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/packages/cdk/lambda/utils/models.ts b/packages/cdk/lambda/utils/models.ts index 59aa03a68..1d1d82fdc 100644 --- a/packages/cdk/lambda/utils/models.ts +++ b/packages/cdk/lambda/utils/models.ts @@ -206,9 +206,26 @@ const DEEPSEEK_DEFAULT_PARAMS: ConverseInferenceParams = { }, }; -const QWEN_DEFAULT_PARAMS: ConverseInferenceParams = { +// Qwen3 model parameters based on actual AWS Bedrock limits +const QWEN_16K_DEFAULT_PARAMS: ConverseInferenceParams = { inferenceConfig: { - maxTokens: 32768, + maxTokens: 16384, // Actual limit for qwen3-32b in Bedrock + temperature: 0.7, + topP: 0.9, + }, +}; + +const QWEN_64K_DEFAULT_PARAMS: ConverseInferenceParams = { + inferenceConfig: { + maxTokens: 65536, // Actual limit for qwen3-coder-480b in Bedrock + temperature: 0.7, + topP: 0.9, + }, +}; + +const QWEN_192K_DEFAULT_PARAMS: ConverseInferenceParams = { + inferenceConfig: { + maxTokens: 196608, // Actual limit for qwen3-235b and qwen3-coder-30b in Bedrock temperature: 0.7, topP: 0.9, }, @@ -1500,7 +1517,7 @@ export const BEDROCK_TEXT_GEN_MODELS: { extractConverseStreamOutput: extractConverseStreamOutput, }, 'qwen.qwen3-235b-a22b-2507-v1:0': { - defaultParams: QWEN_DEFAULT_PARAMS, + defaultParams: QWEN_192K_DEFAULT_PARAMS, usecaseParams: USECASE_DEFAULT_PARAMS, createConverseCommandInput: createConverseCommandInput, createConverseStreamCommandInput: createConverseStreamCommandInput, @@ -1508,7 +1525,7 @@ export const BEDROCK_TEXT_GEN_MODELS: { extractConverseStreamOutput: extractConverseStreamOutput, }, 'qwen.qwen3-32b-v1:0': { - defaultParams: QWEN_DEFAULT_PARAMS, + defaultParams: QWEN_16K_DEFAULT_PARAMS, usecaseParams: USECASE_DEFAULT_PARAMS, createConverseCommandInput: createConverseCommandInput, createConverseStreamCommandInput: createConverseStreamCommandInput, @@ -1516,7 +1533,7 @@ export const BEDROCK_TEXT_GEN_MODELS: { extractConverseStreamOutput: extractConverseStreamOutput, }, 'qwen.qwen3-coder-480b-a35b-v1:0': { - defaultParams: QWEN_DEFAULT_PARAMS, + defaultParams: QWEN_64K_DEFAULT_PARAMS, usecaseParams: USECASE_DEFAULT_PARAMS, createConverseCommandInput: createConverseCommandInput, createConverseStreamCommandInput: createConverseStreamCommandInput, @@ -1524,7 +1541,7 @@ export const BEDROCK_TEXT_GEN_MODELS: { extractConverseStreamOutput: extractConverseStreamOutput, }, 'qwen.qwen3-coder-30b-a3b-v1:0': { - defaultParams: QWEN_DEFAULT_PARAMS, + defaultParams: QWEN_192K_DEFAULT_PARAMS, usecaseParams: USECASE_DEFAULT_PARAMS, createConverseCommandInput: createConverseCommandInput, createConverseStreamCommandInput: createConverseStreamCommandInput, From da068306378b77d9da33a436d18361bad94d5a3c Mon Sep 17 00:00:00 2001 From: Keisuke Minami Date: Sat, 11 Oct 2025 13:55:29 +0700 Subject: [PATCH 3/3] remove comment --- packages/cdk/lambda/utils/models.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/cdk/lambda/utils/models.ts b/packages/cdk/lambda/utils/models.ts index 1d1d82fdc..1194dbf7c 100644 --- a/packages/cdk/lambda/utils/models.ts +++ b/packages/cdk/lambda/utils/models.ts @@ -209,7 +209,7 @@ const DEEPSEEK_DEFAULT_PARAMS: ConverseInferenceParams = { // Qwen3 model parameters based on actual AWS Bedrock limits const QWEN_16K_DEFAULT_PARAMS: ConverseInferenceParams = { inferenceConfig: { - maxTokens: 16384, // Actual limit for qwen3-32b in Bedrock + maxTokens: 16384, temperature: 0.7, topP: 0.9, }, @@ -217,7 +217,7 @@ const QWEN_16K_DEFAULT_PARAMS: ConverseInferenceParams = { const QWEN_64K_DEFAULT_PARAMS: ConverseInferenceParams = { inferenceConfig: { - maxTokens: 65536, // Actual limit for qwen3-coder-480b in Bedrock + maxTokens: 65536, temperature: 0.7, topP: 0.9, }, @@ -225,7 +225,7 @@ const QWEN_64K_DEFAULT_PARAMS: ConverseInferenceParams = { const QWEN_192K_DEFAULT_PARAMS: ConverseInferenceParams = { inferenceConfig: { - maxTokens: 196608, // Actual limit for qwen3-235b and qwen3-coder-30b in Bedrock + maxTokens: 196608, temperature: 0.7, topP: 0.9, },