fix lora name and rearange wqkv for internlm2 (#2912)

RunningLeon · web-flow · commit 33f5b19a9aa6 · 2024-12-20T11:19:47.000+08:00
* fix lora name and rearange lora_b for wqkv

* update for internvl

* fix torchvision mismatch torch
diff --git a/lmdeploy/pytorch/models/internlm2.py b/lmdeploy/pytorch/models/internlm2.py
@@ -397,6 +397,32 @@ def prepare_inputs_for_generation(
             inputs_embeds=inputs_embeds,
         )
 
+    def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]],
+                          adapter_id: int):
+        """load lora weights."""
+
+        from lmdeploy.pytorch.adapter.adapter import load_lora_weights
+
+        num_heads = self.config.num_attention_heads
+        num_key_value_heads = self.config.num_key_value_heads
+        hidden_size = self.config.hidden_size
+        head_dim = hidden_size // num_heads
+        group_size = num_heads // num_key_value_heads
+
+        def _rearange_wqkv(weights):
+            for name, loaded_weight in weights:
+                if 'wqkv.lora_B' in name:
+                    loaded_weight = loaded_weight.unflatten(
+                        0, (-1, 2 + group_size, head_dim))
+                    q = loaded_weight[:, :-2].flatten(0, 2)
+                    k = loaded_weight[:, -2].flatten(0, 1)
+                    v = loaded_weight[:, -1].flatten(0, 1)
+                    loaded_weight = torch.cat([q, k, v], dim=0)
+                yield name, loaded_weight
+
+        weights_iter = _rearange_wqkv(weights)
+        load_lora_weights(self, weights_iter, adapter_id)
+
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         """load weights."""
         # modify from vllm
diff --git a/lmdeploy/pytorch/models/internvl.py b/lmdeploy/pytorch/models/internvl.py
@@ -516,6 +516,17 @@ def prepare_inputs_for_generation(
                 inputs_embeds=inputs_embeds,
             )
 
+    def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]],
+                          adapter_id: int):
+        """load lora weights."""
+
+        if hasattr(self.language_model, 'load_lora_weights'):
+            return self.language_model.load_lora_weights(weights, adapter_id)
+        else:
+            from lmdeploy.pytorch.adapter.adapter import load_lora_weights
+
+            return load_lora_weights(weights, adapter_id)
+
     def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
         """load weights."""
 
diff --git a/lmdeploy/pytorch/models/patch.py b/lmdeploy/pytorch/models/patch.py
@@ -251,6 +251,10 @@ def add_adapters(model: torch.nn.Module,
         ranks, scalings = get_ranks_and_scalings(target_name,
                                                  adapter_cfgs,
                                                  device=device)
+        # split in case target_name has '.' like 'attention.wo'
+        # which cannot be used as name of a module
+        # and it's not aligned with key in model.packed_modules_mapping
+        target_name = target_name.split('.')[-1]
         found_mods, pack_idx = find_all_target(model, target_name)
         sum_rank = ranks.sum().item()
 
diff --git a/requirements/runtime_ascend.txt b/requirements/runtime_ascend.txt
@@ -18,6 +18,6 @@ shortuuid
 tiktoken
 torch<=2.4.0,>=2.3.1
 torch-npu==2.3.1
-torchvision<=0.19.0,>=0.15.0
+torchvision<=0.19.0,>=0.18.1
 transformers
 uvicorn
diff --git a/requirements/runtime_cuda.txt b/requirements/runtime_cuda.txt
@@ -16,7 +16,7 @@ sentencepiece
 shortuuid
 tiktoken
 torch<=2.5.1,>=2.0.0
-torchvision<=0.19.0,>=0.15.0
+torchvision<=0.20.1,>=0.15.0
 transformers
 triton==3.0.0; sys_platform == "linux"
 uvicorn