fix gemma3 (#3772)

grimoire · web-flow · commit 1d6854bc1a1f · 2025-07-30T10:53:23.000+08:00
* fix gemma3

* add comment

* fix gemma3

* fix transformers&gt;=4.54.0
diff --git a/lmdeploy/pytorch/configurations/gemma.py b/lmdeploy/pytorch/configurations/gemma.py
@@ -31,5 +31,7 @@ def build(cls, hf_config, model_path: str = None, **kwargs):
         """Build gemma."""
         hf_config.text_config.architectures = ['Gemma3ForCausalLM']
         cfg = DefaultModelConfigBuilder.build(hf_config.text_config, model_path, **kwargs)
+        # gemma 3 does not enable sliding window on every layers
+        cfg.sliding_window = -1
         cfg.hf_config = hf_config
         return cfg
diff --git a/lmdeploy/pytorch/engine/engine.py b/lmdeploy/pytorch/engine/engine.py
@@ -1189,7 +1189,7 @@ async def async_loop(self):
                                         has_runable_event=has_runable_event,
                                         inputs_maker=inputs_maker)
         except Exception as e:
-            logger.error(f'exception happened: {type(e)} {e}')
+            logger.exception(f'exception happened: {type(e)} {e}')
         finally:
             self._loop_finally()
 
diff --git a/lmdeploy/pytorch/engine/executor/base.py b/lmdeploy/pytorch/engine/executor/base.py
@@ -27,6 +27,10 @@ def __init__(self,
                  device_type: str = 'cuda'):
         """Initialize Executor."""
         cache_config.window_size = model_config.sliding_window
+        if cache_config.window_size is not None and cache_config.window_size > 0:
+            # do not support sliding window prefix caching
+            logger.warning('Sliding window prefix caching is not supported.')
+            cache_config.enable_prefix_caching = False
         self.model_config = model_config
         self.cache_config = cache_config
         self.backend_config = backend_config
diff --git a/lmdeploy/pytorch/models/gemma.py b/lmdeploy/pytorch/models/gemma.py
@@ -56,7 +56,8 @@ def __init__(self,
         if hasattr(config, 'query_pre_attn_scalar'):
             self.scaling = config.query_pre_attn_scalar**-0.5
         if self.model_type == 'gemma3_text':
-            is_sliding = bool((layer_idx + 1) % config.sliding_window_pattern)
+            sliding_window_pattern = getattr(config, 'sliding_window_pattern', 6)
+            is_sliding = bool((layer_idx + 1) % sliding_window_pattern)
             self.sliding_window = (getattr(config, 'sliding_window', -1) if is_sliding else -1)
         else:
             self.sliding_window = (getattr(config, 'sliding_window', -1) if not bool(layer_idx % 2) else -1)
@@ -388,7 +389,7 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
                 emb_type = RopeType.DynamicNTKScaling
             else:
                 raise RuntimeError(f'Unsupported rope type: {rope_type}')
-            scaling_factor = rope_scaling.get('scaling_factor', scaling_factor)
+            scaling_factor = rope_scaling.get('scaling_factor', rope_scaling.get('factor', scaling_factor))
 
         rope_dim = config.head_dim
         rope_max_pos_emb = config.max_position_embeddings
@@ -406,8 +407,8 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
                 rope_dim,
                 rope_max_pos_emb,
                 config.rope_local_base_freq,
-                scaling_factor,
-                emb_type=emb_type,
+                1.0,
+                emb_type=RopeType.LinearScaling,
             )
 
     def forward(