InternLM
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README_ja.md‎
Lines changed: 1 addition & 0 deletions b/‎README_ja.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README_zh-CN.md‎
Lines changed: 1 addition & 0 deletions b/‎README_zh-CN.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmark/profile_throughput.py‎
Lines changed: 8 additions & 0 deletions b/‎benchmark/profile_throughput.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/en/supported_models/supported_models.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/en/supported_models/supported_models.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/zh_cn/supported_models/supported_models.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/zh_cn/supported_models/supported_models.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lmdeploy/cli/cli.py‎
Lines changed: 1 addition & 0 deletions b/‎lmdeploy/cli/cli.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lmdeploy/cli/serve.py‎
Lines changed: 8 additions & 0 deletions b/‎lmdeploy/cli/serve.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎lmdeploy/cli/utils.py‎
Lines changed: 30 additions & 0 deletions b/‎lmdeploy/cli/utils.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎lmdeploy/messages.py‎
Lines changed: 12 additions & 0 deletions b/‎lmdeploy/messages.py‎
Lines changed: 12 additions & 0 deletions
@@ -150,6 +150,7 @@ LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by
   <li>Phi-3.5-MoE (16x3.8B)</li>
   <li>Phi-4-mini (3.8B)</li>
   <li>MiniCPM3 (4B)</li>
+  <li>SDAR (1.7B-30B)</li>
   <li>gpt-oss (20B, 120B)</li>
 </ul>
 </td>
 
@@ -137,6 +137,7 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
   <li>Phi-3.5-MoE (16x3.8B)</li>
   <li>Phi-4-mini (3.8B)</li>
   <li>MiniCPM3 (4B)</li>
+  <li>SDAR (1.7B-30B)</li>
 </ul>
 </td>
 <td>
 
@@ -151,6 +151,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力，在各种规模的模型
   <li>Phi-3.5-MoE (16x3.8B)</li>
   <li>Phi-4-mini (3.8B)</li>
   <li>MiniCPM3 (4B)</li>
+  <li>SDAR (1.7B-30B)</li>
   <li>gpt-oss (20B, 120B)</li>
 </ul>
 </td>
 
@@ -307,6 +307,10 @@ def parse_args():
     # pytorch engine args
     pt_group = parser.add_argument_group('PyTorch engine arguments')
     ArgumentHelper.eager_mode(pt_group)
+    ArgumentHelper.dllm_block_length(pt_group)
+    ArgumentHelper.dllm_unmasking_strategy(pt_group)
+    ArgumentHelper.dllm_denoising_steps(pt_group)
+    ArgumentHelper.dllm_confidence_threshold(pt_group)
 
     tp_act = ArgumentHelper.tp(pt_group)
     cache_count_act = ArgumentHelper.cache_max_entry_count(pt_group)
@@ -363,6 +367,10 @@ def main():
             quant_policy=args.quant_policy,
             dtype=args.dtype,
             distributed_executor_backend=args.distributed_executor_backend,
+            dllm_block_length=args.dllm_block_length,
+            dllm_unmasking_strategy=args.dllm_unmasking_strategy,
+            dllm_denoising_steps=args.dllm_denoising_steps,
+            dllm_confidence_threshold=args.dllm_confidence_threshold,
         )
 
     if args.use_uvloop:
 
@@ -120,6 +120,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
 |          Phi-3.5-mini          |      3.8B       | LLM  |    Yes    |   Yes   |   No    |  -   |   -   |
 |          Phi-3.5-MoE           |     16x3.8B     | LLM  |    Yes    |   Yes   |   No    |  -   |   -   |
 |         Phi-3.5-vision         |      4.2B       | MLLM |    Yes    |   Yes   |   No    |  -   |   -   |
+|              SDAR              |    1.7B-30B     | LLM  |    Yes    |   Yes   |   No    |  -   |   -   |
 
 ```{note}
 * [1] Currently Mono-InternVL does not support FP16 due to numerical instability. Please use BF16 instead.
 
@@ -120,6 +120,7 @@
 |          Phi-3.5-mini          |      3.8B       | LLM  |    Yes    |   Yes   |   No    |  -   |   -   |
 |          Phi-3.5-MoE           |     16x3.8B     | LLM  |    Yes    |   Yes   |   No    |  -   |   -   |
 |         Phi-3.5-vision         |      4.2B       | MLLM |    Yes    |   Yes   |   No    |  -   |   -   |
+|              SDAR              |    1.7B-30B     | LLM  |    Yes    |   Yes   |   No    |  -   |   -   |
 
 ```{note}
 * [1] 目前，Mono-InternVL不支持FP16，因为数值不稳定。请改用BF16
 
@@ -55,6 +55,7 @@ def add_parser_chat():
         ArgumentHelper.adapters(pt_group)
         ArgumentHelper.device(pt_group)
         ArgumentHelper.eager_mode(pt_group)
+        ArgumentHelper.dllm_block_length(pt_group)
         # common engine args
         dtype_act = ArgumentHelper.dtype(pt_group)
         tp_act = ArgumentHelper.tp(pt_group)
 
@@ -92,6 +92,10 @@ def add_parser_api_server():
         ArgumentHelper.eager_mode(pt_group)
         ArgumentHelper.disable_vision_encoder(pt_group)
         ArgumentHelper.logprobs_mode(pt_group)
+        ArgumentHelper.dllm_block_length(pt_group)
+        ArgumentHelper.dllm_unmasking_strategy(pt_group)
+        ArgumentHelper.dllm_denoising_steps(pt_group)
+        ArgumentHelper.dllm_confidence_threshold(pt_group)
 
         # common engine args
         dtype_act = ArgumentHelper.dtype(pt_group)
@@ -219,6 +223,10 @@ def api_server(args):
                 hf_overrides=args.hf_overrides,
                 disable_vision_encoder=args.disable_vision_encoder,
                 logprobs_mode=args.logprobs_mode,
+                dllm_block_length=args.dllm_block_length,
+                dllm_unmasking_strategy=args.dllm_unmasking_strategy,
+                dllm_denoising_steps=args.dllm_denoising_steps,
+                dllm_confidence_threshold=args.dllm_confidence_threshold,
             )
         else:
             from lmdeploy.messages import TurbomindEngineConfig
 
@@ -624,6 +624,36 @@ def logprobs_mode(parser):
                                    choices=[None, 'raw_logits', 'raw_logprobs'],
                                    help='The mode of logprobs.')
 
+    @staticmethod
+    def dllm_block_length(parser):
+        """dllm_block_length for dllm."""
+        return parser.add_argument('--dllm-block-length', type=int, default=None, help='Block length for dllm')
+
+    @staticmethod
+    def dllm_unmasking_strategy(parser):
+        """Dllm unmasking strategy."""
+        return parser.add_argument('--dllm-unmasking-strategy',
+                                   type=str,
+                                   default='low_confidence_dynamic',
+                                   choices=['low_confidence_dynamic', 'low_confidence_static', 'sequential'],
+                                   help='The unmasking strategy for dllm.')
+
+    @staticmethod
+    def dllm_denoising_steps(parser):
+        """Dllm denoising steps."""
+        return parser.add_argument('--dllm-denoising-steps',
+                                   type=int,
+                                   default=None,
+                                   help='The number of denoising steps for dllm.')
+
+    @staticmethod
+    def dllm_confidence_threshold(parser):
+        """Dllm confidence threshold."""
+        return parser.add_argument('--dllm-confidence-threshold',
+                                   type=float,
+                                   default=0.85,
+                                   help='The confidence threshold for dllm.')
+
 
 # adapted from https://github.com/vllm-project/vllm/blob/main/vllm/utils/__init__.py
 class FlexibleArgumentParser(argparse.ArgumentParser):
 
@@ -336,6 +336,12 @@ class PytorchEngineConfig:
         disable_vision_encoder (bool): Whether to disable loading vision
             encoder. Default to False.
         logprobs_mode (str): The mode of logprob, options: ['raw_logits', 'raw_logprobs']
+        dllm_block_length (int): Block size of block diffusion model.
+        dllm_unmasking_strategy (str): Dllm unmasking strategy, options:
+            ['low_confidence_dynamic', 'low_confidence_static', 'sequential'].
+        dllm_denoising_steps (int): Dllm denoising steps.
+        dllm_confidence_threshold (float): dllm unmasking threshold for
+            dynamic unmasking.
     """
     dtype: str = 'auto'
     tp: int = 1
@@ -371,6 +377,12 @@ class PytorchEngineConfig:
     disable_vision_encoder: bool = False
     logprobs_mode: str = None
 
+    # dllm
+    dllm_block_length: int = None
+    dllm_unmasking_strategy: str = 'low_confidence_dynamic'
+    dllm_denoising_steps: int = None
+    dllm_confidence_threshold: float = 0.85
+
     role: EngineRole = EngineRole.Hybrid
     migration_backend: MigrationBackend = MigrationBackend.DLSlime