- +
+
-
@@ -622,81 +742,385 @@

Model Information

Extra Parameters

-
-
- - + +
+
📦 Model Parameters
+ +
+
+ + +
+
+ + +
-
- - + +
+
+ + +
+
+ + +
-
-
- - +
+
+ + +
+
🔧 Service Parameters
+
- + +
+ + + + + +
-
+ +
+
⚙️ Engine Parameters
+
- - + +
-
- - + + + + + +
-
- - + +
+
🌐 Framework Parameters
+ +
+
+ + +
+
+ + +
+
+ +
+ + +
-
- + +
+
🔧 General Options
+ +
+ +
-
- - -
- +
-
-
-
- No models configured yet. Add models from the left panel. +
+ +
+ + +
+
+ + +
+ Background
+
+
+
-
- Add models to the queue to generate deployment commands + +
+ +
+
+
+

Click any model to open CLI generator

- -
- +
+ Loading models...
+
+ + + + + + + + + + + + + + + + + +
Model IDTypeDescriptionInstancesEnginesServicesChina
+ Loading models... +
+
@@ -705,7 +1129,7 @@

Extra Parame

- + +// Handle model search input +function onModelSearchInput() { +const query = modelSearch.value.toLowerCase().trim(); + +if (query.length === 0) { + searchSuggestions.style.display = 'none'; + clearModelSelection(); + return; +} + +// Fuzzy search +const matches = Object.keys(supportedModels).filter(modelId => + modelId.toLowerCase().includes(query) +).slice(0, 10); + +if (matches.length > 0) { + showSearchSuggestions(matches); + + // If exact match, select it + if (matches.includes(modelSearch.value)) { + onModelSearchChange(); + } +} else { + searchSuggestions.style.display = 'none'; + clearModelSelection(); +} +} + +// Show search suggestions +function showSearchSuggestions(matches) { +searchSuggestions.innerHTML = ''; + +matches.forEach(modelId => { + const item = document.createElement('div'); + item.className = 'suggestion-item'; + item.textContent = modelId; + item.addEventListener('click', () => { + modelSearch.value = modelId; + searchSuggestions.style.display = 'none'; + onModelSearchChange(); + }); + searchSuggestions.appendChild(item); +}); + +searchSuggestions.style.display = 'block'; +} + +// Handle model search focus +function onModelSearchFocus() { +if (modelSearch.value.trim()) { + onModelSearchInput(); +} +} + +// Handle model search blur +function onModelSearchBlur() { +// Delay hiding to allow click on suggestions +setTimeout(() => { + searchSuggestions.style.display = 'none'; +}, 200); +} + +// Handle model search change +function onModelSearchChange() { +const modelId = modelSearch.value.trim(); + +if (!modelId || !supportedModels[modelId]) { + clearModelSelection(); + return; +} + +updateModelConfiguration(modelId); +} + +// Update model configuration (shared function) +function updateModelConfiguration(modelId) { + if (!modelId || !supportedModels[modelId]) { + clearModelSelection(); + return; + } + + const model = supportedModels[modelId]; + currentSelectedModel = modelId; + + // Show model information + showModelInfo(model); + + // Populate compatible options with auto-fill + populateInstanceSelect(model.supported_instances, true); + populateEngineSelect(model.supported_engines, true); + updateServiceSelect(model.supported_services, true); + + // Update conditional parameter visibility and generate command + setTimeout(() => { + updateServiceSpecificParams(); + updateEngineSpecificParams(); + generateCommand(); + }, 100); // Small delay to ensure selects are populated +} + +// Show model information +function showModelInfo(model) { +document.getElementById('modelInfoTitle').textContent = `${model.model_id} Information`; +document.getElementById('modelInfoDescription').textContent = model.description; + +// Create badges +const badgesContainer = document.getElementById('modelInfoBadges'); +badgesContainer.innerHTML = ''; + +const badges = [ + { label: 'Type', value: model.model_type.toUpperCase() }, + { label: 'Series', value: model.model_series?.name || 'N/A' }, + { label: 'China Region', value: model.allow_china_region ? 'Yes' : 'No' } +]; + +if (model.application_scenario) { + badges.push({ label: 'Use Cases', value: model.application_scenario }); +} + +badges.forEach(badge => { + const span = document.createElement('span'); + span.className = 'model-badge'; + span.textContent = `${badge.label}: ${badge.value}`; + badgesContainer.appendChild(span); +}); + +// Add model links +const linksContainer = document.getElementById('modelInfoLinks'); +linksContainer.innerHTML = ''; + +if (model.huggingface_model_id) { + const hfLink = document.createElement('a'); + hfLink.href = `https://huggingface.co/${model.huggingface_model_id}`; + hfLink.target = '_blank'; + hfLink.textContent = '🤗 Hugging Face'; + linksContainer.appendChild(hfLink); +} + +if (model.modelscope_model_id) { + const msLink = document.createElement('a'); + msLink.href = `https://modelscope.cn/models/${model.modelscope_model_id}`; + msLink.target = '_blank'; + msLink.textContent = '🔬 ModelScope'; + linksContainer.appendChild(msLink); +} + +selectedModelInfo.style.display = 'block'; +} + +// Clear model selection +function clearModelSelection() { + currentSelectedModel = null; + selectedModelInfo.style.display = 'none'; + instanceSelect.innerHTML = ''; + engineSelect.innerHTML = ''; + + // Clear table selection + document.querySelectorAll('.models-table tbody tr').forEach(row => { + row.classList.remove('selected'); + }); + + // Clear command output + commandOutput.value = 'Select a model from the table to generate deployment command'; + commandOutput.className = 'command-output empty'; + copyButton.style.display = 'none'; +} + +// Populate instance select with real instance data +function populateInstanceSelect(supportedInstanceTypes, autoFill = false) { +instanceSelect.innerHTML = ''; + +supportedInstanceTypes.forEach((instanceType, index) => { + const instanceDef = window.EMD_HELPERS.getInstance(instanceType); + if (instanceDef) { + const option = document.createElement('option'); + option.value = instanceType; + + // Enhanced option text with real specifications + const specs = window.EMD_HELPERS.getInstanceSpecs(instanceType); + option.textContent = `${instanceType} (${specs.text})`; + + // Auto-select first option if autoFill is true + if (autoFill && index === 0) { + option.selected = true; + } + + instanceSelect.appendChild(option); + } +}); +} + +// Populate engine select with real engine data +function populateEngineSelect(supportedEngineTypes, autoFill = false) { +engineSelect.innerHTML = ''; + +supportedEngineTypes.forEach((engineType, index) => { + const engineDef = window.EMD_HELPERS.getEngine(engineType); + if (engineDef) { + const option = document.createElement('option'); + option.value = engineType; + option.textContent = `${engineType} - ${engineDef.description}`; + + // Auto-select first option if autoFill is true + if (autoFill && index === 0) { + option.selected = true; + } + + engineSelect.appendChild(option); + } +}); +} + +// Update service select with real service data +function updateServiceSelect(supportedServiceTypes, autoFill = false) { +const options = serviceSelect.querySelectorAll('option'); +let firstAvailableOption = null; + +options.forEach(option => { + if (option.value === '') return; + + if (supportedServiceTypes.includes(option.value)) { + option.disabled = false; + option.style.display = 'block'; + + // Update option text with real service info + const serviceDef = window.EMD_HELPERS.getService(option.value); + if (serviceDef) { + option.textContent = serviceDef.name; + } + + // Track first available option for auto-fill + if (!firstAvailableOption) { + firstAvailableOption = option; + } + } else { + option.disabled = true; + option.style.display = 'none'; + } +}); + +// Reset selection if current value is not supported +if (serviceSelect.value && !supportedServiceTypes.includes(serviceSelect.value)) { + serviceSelect.value = ''; +} + +// Auto-select first available option if autoFill is true +if (autoFill && firstAvailableOption && !serviceSelect.value) { + firstAvailableOption.selected = true; +} +} + + +// Generate single command +function generateCommand() { + const modelId = currentSelectedModel; + const instanceType = instanceSelect.value; + const engineType = engineSelect.value; + const serviceType = serviceSelect.value; + + if (!modelId || !instanceType || !engineType || !serviceType) { + commandOutput.value = 'Please select all required parameters to generate deployment command'; + commandOutput.className = 'command-output empty'; + copyButton.style.display = 'none'; + return; + } + + let command = `emd deploy --model-id ${modelId} --instance-type ${instanceType} --engine-type ${engineType} --service-type ${serviceType}`; + + // Add command-line arguments + const modelTag = document.getElementById('modelTag').value.trim(); + if (modelTag) { + command += ` --model-tag ${modelTag}`; + } + + // Add skip-confirm if checked + const skipConfirm = document.getElementById('skipConfirm').checked; + if (skipConfirm) { + command += ` --skip-confirm`; + } + + // Build extra parameters + const extraParams = buildExtraParams(); + if (Object.keys(extraParams).length > 0) { + command += ` --extra-params '${JSON.stringify(extraParams, null, 2)}'`; + } + + // Check if run in background is enabled + const runInBackground = document.getElementById('runInBackground').checked; + if (runInBackground) { + command = `nohup ${command} > /dev/null 2>&1 &`; + } + + // Update command output + commandOutput.value = command; + commandOutput.className = 'command-output'; + copyButton.style.display = 'inline-block'; +} + +// Build extra parameters object +function buildExtraParams() { + const extraParams = {}; + + // Model parameters + const modelParams = {}; + const modelFilesS3Path = document.getElementById('modelFilesS3Path').value.trim(); + const modelFilesDownloadSource = document.getElementById('modelFilesDownloadSource').value.trim(); + const huggingfaceModelId = document.getElementById('huggingfaceModelId').value.trim(); + const modelscopeModelId = document.getElementById('modelscopeModelId').value.trim(); + const needPrepareModel = document.getElementById('needPrepareModel').checked; + + if (modelFilesS3Path) modelParams.model_files_s3_path = modelFilesS3Path; + if (modelFilesDownloadSource) modelParams.model_files_download_source = modelFilesDownloadSource; + if (huggingfaceModelId) modelParams.huggingface_model_id = huggingfaceModelId; + if (modelscopeModelId) modelParams.modelscope_model_id = modelscopeModelId; + if (needPrepareModel) modelParams.need_prepare_model = false; // Checkbox is "skip preparation" + + if (Object.keys(modelParams).length > 0) { + extraParams.model_params = modelParams; + } + + // Service parameters + const serviceParams = {}; + const apiKey = document.getElementById('apiKey').value.trim(); + + // SageMaker-specific parameters + const maxCapacity = document.getElementById('maxCapacity').value.trim(); + const minCapacity = document.getElementById('minCapacity').value.trim(); + const autoScalingTargetValue = document.getElementById('autoScalingTargetValue').value.trim(); + const sagemakerEndpointName = document.getElementById('sagemakerEndpointName').value.trim(); + const sagemakerVpcId = document.getElementById('sagemakerVpcId').value.trim(); + const sagemakerSubnetIds = document.getElementById('sagemakerSubnetIds').value.trim(); + + // ECS-specific parameters + const desiredCapacity = document.getElementById('desiredCapacity').value.trim(); + const maxSize = document.getElementById('maxSize').value.trim(); + const vpcId = document.getElementById('vpcId').value.trim(); + const subnetIds = document.getElementById('subnetIds').value.trim(); + const useSpot = document.getElementById('useSpot').checked; + + if (apiKey) serviceParams.api_key = apiKey; + + // Add SageMaker params if service is SageMaker + const serviceType = serviceSelect.value; + if (serviceType === 'sagemaker_realtime' || serviceType === 'sagemaker_async') { + if (maxCapacity) serviceParams.max_capacity = parseInt(maxCapacity); + if (minCapacity) serviceParams.min_capacity = parseInt(minCapacity); + if (autoScalingTargetValue) serviceParams.auto_scaling_target_value = parseInt(autoScalingTargetValue); + if (sagemakerEndpointName) serviceParams.sagemaker_endpoint_name = sagemakerEndpointName; + if (sagemakerVpcId) serviceParams.vpc_id = sagemakerVpcId; + if (sagemakerSubnetIds) serviceParams.subnet_ids = sagemakerSubnetIds.split(',').map(s => s.trim()); + } + + // Add ECS params if service is ECS + if (serviceType === 'ecs') { + if (desiredCapacity) serviceParams.desired_capacity = parseInt(desiredCapacity); + if (maxSize) serviceParams.max_size = parseInt(maxSize); + if (vpcId) serviceParams.vpc_id = vpcId; + if (subnetIds) serviceParams.subnet_ids = subnetIds.split(',').map(s => s.trim()); + if (useSpot) serviceParams.use_spot = true; + } + + if (Object.keys(serviceParams).length > 0) { + extraParams.service_params = serviceParams; + } + + // Engine parameters + const engineParams = {}; + const envVars = document.getElementById('environmentVariables').value.trim(); + + // vLLM-specific parameters + const maxModelLen = document.getElementById('maxModelLen').value.trim(); + const maxNumSeqs = document.getElementById('maxNumSeqs').value.trim(); + const gpuMemoryUtilization = document.getElementById('gpuMemoryUtilization').value.trim(); + const toolCallParser = document.getElementById('toolCallParser').value.trim(); + const reasoningParser = document.getElementById('reasoningParser').value.trim(); + const chatTemplate = document.getElementById('chatTemplate').value.trim(); + const disableLogStats = document.getElementById('disableLogStats').checked; + const enableAutoToolChoice = document.getElementById('enableAutoToolChoice').checked; + const enableReasoning = document.getElementById('enableReasoning').checked; + + // TGI-specific parameters + const maxTotalTokens = document.getElementById('maxTotalTokens').value.trim(); + const maxConcurrentRequests = document.getElementById('maxConcurrentRequests').value.trim(); + const maxBatchSize = document.getElementById('maxBatchSize').value.trim(); + const maxInputTokens = document.getElementById('maxInputTokens').value.trim(); + + if (envVars) engineParams.environment_variables = envVars; + + // Build default CLI args based on engine type + const engineType = engineSelect.value; + const defaultCliArgs = []; + + if (engineType === 'vllm') { + if (maxModelLen) defaultCliArgs.push(`--max_model_len ${maxModelLen}`); + if (maxNumSeqs) defaultCliArgs.push(`--max_num_seqs ${maxNumSeqs}`); + if (gpuMemoryUtilization) defaultCliArgs.push(`--gpu_memory_utilization ${gpuMemoryUtilization}`); + if (toolCallParser) defaultCliArgs.push(`--tool_call_parser ${toolCallParser}`); + if (reasoningParser) defaultCliArgs.push(`--reasoning_parser ${reasoningParser}`); + if (chatTemplate) defaultCliArgs.push(`--chat_template ${chatTemplate}`); + if (disableLogStats) defaultCliArgs.push(`--disable_log_stats`); + if (enableAutoToolChoice) defaultCliArgs.push(`--enable_auto_tool_choice`); + if (enableReasoning) defaultCliArgs.push(`--enable_reasoning`); + } else if (engineType === 'tgi') { + if (maxTotalTokens) defaultCliArgs.push(`--max_total_tokens ${maxTotalTokens}`); + if (maxConcurrentRequests) defaultCliArgs.push(`--max_concurrent_requests ${maxConcurrentRequests}`); + if (maxBatchSize) defaultCliArgs.push(`--max_batch_size ${maxBatchSize}`); + if (maxInputTokens) defaultCliArgs.push(`--max_input_tokens ${maxInputTokens}`); + } + + if (defaultCliArgs.length > 0) { + engineParams.default_cli_args = defaultCliArgs.join(' '); + } + + if (Object.keys(engineParams).length > 0) { + extraParams.engine_params = engineParams; + } + + // Framework parameters + const frameworkParams = {}; + const limitConcurrency = document.getElementById('limitConcurrency').value.trim(); + const timeoutKeepAlive = document.getElementById('timeoutKeepAlive').value.trim(); + const uvicornLogLevel = document.getElementById('uvicornLogLevel').value.trim(); + + if (limitConcurrency) frameworkParams.limit_concurrency = parseInt(limitConcurrency); + if (timeoutKeepAlive) frameworkParams.timeout_keep_alive = parseInt(timeoutKeepAlive); + if (uvicornLogLevel) frameworkParams.uvicorn_log_level = uvicornLogLevel; + + if (Object.keys(frameworkParams).length > 0) { + extraParams.framework_params = frameworkParams; + } + + return extraParams; +} + +// Copy commands to clipboard +function copyCommands() { +const commands = commandOutput.value; +navigator.clipboard.writeText(commands).then(() => { + showCopySuccess(); +}).catch(err => { + console.error('Failed to copy commands: ', err); + // Fallback for older browsers + const textArea = document.createElement('textarea'); + textArea.value = commands; + document.body.appendChild(textArea); + textArea.select(); + document.execCommand('copy'); + document.body.removeChild(textArea); + showCopySuccess(); +}); +} + +// Show copy success message +function showCopySuccess() { +copySuccess.classList.add('show'); +setTimeout(() => { + copySuccess.classList.remove('show'); +}, 3000); +} + +// Reset form +function resetForm() { + modelConfigForm.reset(); + modelSearch.value = ''; + clearModelSelection(); + searchSuggestions.style.display = 'none'; + + // Reset service type to default + serviceSelect.value = 'sagemaker_realtime'; + + // Hide all conditional parameter sections + document.getElementById('sagemakerParams').style.display = 'none'; + document.getElementById('ecsParams').style.display = 'none'; + document.getElementById('vllmParams').style.display = 'none'; + document.getElementById('tgiParams').style.display = 'none'; +} + +// Initialize when page loads +document.addEventListener('DOMContentLoaded', function() { +// Wait a bit for the config script to load +setTimeout(initializeDynamicConfig, 100); +}); + +// Keyboard shortcuts +document.addEventListener('keydown', function(e) { + // ESC to close panel + if (e.key === 'Escape' && isPanelOpen) { + closePanel(); + } +}); + + diff --git a/docs/en/supported_models.md b/docs/en/supported_models.md index ed78a163..1f7c2b6c 100644 --- a/docs/en/supported_models.md +++ b/docs/en/supported_models.md @@ -1,80 +1,64 @@ -| ModeId | ModelSeries | ModelType | Supported Instances | Supported Services | Support China Region | -|:-------------------------------------|:-------------------------|:------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------|:-----------------------| -| glm-4-9b-chat | glm4 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| GLM-4-9B-0414 | glm4 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| GLM-4-32B-0414 | glm4 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| GLM-Z1-9B-0414 | glm4 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| GLM-Z1-32B-0414 | glm4 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| GLM-Z1-Rumination-32B-0414 | glm4 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| internlm2_5-20b-chat-4bit-awq | internlm2.5 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| internlm2_5-20b-chat | internlm2.5 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| internlm2_5-7b-chat | internlm2.5 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge,g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| internlm2_5-7b-chat-4bit | internlm2.5 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge,g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ❎ | -| internlm2_5-1_8b-chat | internlm2.5 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge,g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-7B-Instruct | qwen2.5 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge,inf2.8xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-72B-Instruct-AWQ | qwen2.5 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge,inf2.24xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-72B-Instruct | qwen2.5 | llm | g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-72B-Instruct-AWQ-128k | qwen2.5 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-32B-Instruct | qwen2.5 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-0.5B-Instruct | qwen2.5 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge,inf2.8xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-1.5B-Instruct | qwen2.5 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-3B-Instruct | qwen2.5 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-14B-Instruct-AWQ | qwen2.5 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge,g4dn.2xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2.5-14B-Instruct | qwen2.5 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| QwQ-32B-Preview | qwen reasoning model | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| QwQ-32B | qwen reasoning model | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-8B | qwen3 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge,g4dn.2xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-0.6B | qwen3 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge,g4dn.2xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-1.7B | qwen3 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-4B | qwen3 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge,g4dn.2xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-14B-AWQ | qwen3 | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge,g4dn.2xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-14B | qwen3 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-32B-AWQ | qwen3 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-32B | qwen3 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-30B-A3B | qwen3 | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen3-235B-A22B | qwen3 | llm | | | ✅ | -| Qwen3-235B-A22B-FP8 | qwen3 | llm | | | ✅ | -| llama-3.3-70b-instruct-awq | llama | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ❎ | -| DeepSeek-R1-Distill-Qwen-32B | deepseek reasoning model | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| DeepSeek-R1-Distill-Qwen-14B | deepseek reasoning model | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| DeepSeek-R1-Distill-Qwen-7B | deepseek reasoning model | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| DeepSeek-R1-Distill-Qwen-1.5B | deepseek reasoning model | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| DeepSeek-R1-Distill-Qwen-1.5B_ollama | deepseek reasoning model | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| DeepSeek-R1-Distill-Qwen-1.5B-GGUF | deepseek reasoning model | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| DeepSeek-R1-Distill-Qwen-32B-GGUF | deepseek reasoning model | llm | g5.12xlarge,g5.24xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| DeepSeek-R1-Distill-Llama-8B | deepseek reasoning model | llm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| deepseek-r1-distill-llama-70b-awq | deepseek reasoning model | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| deepseek-r1-671b-1.58bit_gguf | deepseek reasoning model | llm | g5.8xlarge,g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge,g6.8xlarge,g6.12xlarge,g6.16xlarge,g6.24xlarge,g6.48xlarge,g6e.4xlarge,g6e.8xlarge,g6e.12xlarge,g6e.16xlarge,g6e.24xlarge,g6e.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| deepseek-r1-671b-2.51bit_gguf | deepseek reasoning model | llm | g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge,g6.12xlarge,g6.16xlarge,g6.24xlarge,g6.48xlarge,g6e.8xlarge,g6e.12xlarge,g6e.16xlarge,g6e.24xlarge,g6e.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| DeepSeek-R1 | deepseek reasoning model | llm | | | ✅ | -| deepseek-r1-671b-4bit_gguf | deepseek reasoning model | llm | g5.24xlarge,g5.48xlarge,g6.24xlarge,g6.48xlarge,g6e.16xlarge,g6e.24xlarge,g6e.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| DeepSeek-R1-0528-Qwen3-8B | deepseek reasoning model | llm | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| deepseek-v3-UD-IQ1_M_ollama | deepseek v3 | llm | g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ❎ | -| Baichuan-M1-14B-Instruct | baichuan | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| ReaderLM-v2 | jina | llm | g4dn.2xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge,inf2.8xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| txgemma-9b-chat | txgemma | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| txgemma-27b-chat | txgemma | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Qwen2-VL-72B-Instruct-AWQ | qwen2vl | vlm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async | ✅ | -| Qwen2.5-VL-72B-Instruct-AWQ | qwen2vl | vlm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async | ✅ | -| Qwen2.5-VL-32B-Instruct | qwen2vl | vlm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async | ✅ | -| QVQ-72B-Preview-AWQ | qwen reasoning model | vlm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async | ❎ | -| Qwen2-VL-7B-Instruct | qwen2vl | vlm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge,g6e.2xlarge | sagemaker_realtime,sagemaker_async | ✅ | -| UI-TARS-1.5-7B | agent | vlm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge,g6e.2xlarge | sagemaker_realtime,sagemaker_async | ✅ | -| InternVL2_5-78B-AWQ | internvl2.5 | vlm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async | ❎ | -| gemma-3-4b-it | gemma3 | vlm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| gemma-3-12b-it | gemma3 | vlm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| gemma-3-27b-it | gemma3 | vlm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ✅ | -| Mistral-Small-3.1-24B-Instruct-2503 | mistral | vlm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ❎ | -| txt2video-LTX | comfyui | video | g5.4xlarge,g5.8xlarge,g6e.2xlarge | sagemaker_async | ❎ | -| whisper | whisper | whisper | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_async | ❎ | -| bce-embedding-base_v1 | bce | embedding | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| bge-base-en-v1.5 | bge | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| bge-m3 | bge | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| jina-embeddings-v3 | jina | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| jina-embeddings-v4 | jina | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| Qwen3-Embedding-0.6B | qwen3 | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| Qwen3-Embedding-4B | qwen3 | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| Qwen3-Embedding-8B | qwen3 | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| bge-reranker-v2-m3 | bge | rerank | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| bge-reranker-large | bge | rerank | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| jina-reranker-v2-base-multilingual | jina | rerank | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | + + + + + diff --git a/docs/mkdocs.en.yml b/docs/mkdocs.en.yml index 5a662a48..439322cd 100644 --- a/docs/mkdocs.en.yml +++ b/docs/mkdocs.en.yml @@ -5,7 +5,7 @@ theme: language: en nav: - Quick Start: installation.md - - Supported Models: model-generator.html + - Supported Models: supported_models.md - Best Deployment Practices: best_deployment_practices.md - CLI Commands: commands.md - API Reference: api.md