Skip to content

Commit 7cca720

Browse files
committed
eplb quick-fix
1 parent 84d7f5a commit 7cca720

File tree

4 files changed

+22
-10
lines changed

4 files changed

+22
-10
lines changed

vllm_ascend/eplb/adaptor/vllm_adaptor.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,20 +194,34 @@ def _export_tensor_to_file(self, expert_maps, expert_map_record_path: str):
194194
json.dump(record, f, indent=4)
195195

196196
def do_update_expert_map(self, layer_id, updated_expert_map):
197-
self.expert_map_per_layer[layer_id] = updated_expert_map.clone()
198-
self.expert_map_per_layer_cpu[layer_id] = updated_expert_map.clone()
197+
pad_len = self.expert_map_per_layer[layer_id].shape[0] - updated_expert_map.shape[0]
198+
updated_expert_map_padded = torch.nn.functional.pad(
199+
updated_expert_map,
200+
pad=(0,pad_len),
201+
mode='constant',
202+
value=-1
203+
)
204+
self.expert_map_per_layer[layer_id].copy_(updated_expert_map_padded)
205+
self.expert_map_per_layer_cpu[layer_id].copy_(updated_expert_map)
199206

200207
def do_update_expert_weight(self, layer_id, local_expert_to_replace,
201208
buffer_tensor_id):
202209
for expert_tensor, buffer_tensor in zip(
203210
self.expert_param_per_layer[layer_id][local_expert_to_replace],
204211
self.buffer_tensor_list[buffer_tensor_id]):
205-
expert_tensor = buffer_tensor.clone()
212+
expert_tensor.copy_(buffer_tensor)
206213
logger.debug(f"Expert tensor shape is :{expert_tensor.shape}")
207214

208215
def do_update_log2phy_map(self, layer_id, updated_log2phy_map):
209216
if self.log2phy_map_per_layer[layer_id] is not None:
210-
self.log2phy_map_per_layer[layer_id].copy_(updated_log2phy_map)
217+
pad_len = self.log2phy_map_per_layer[layer_id].shape[0] - updated_log2phy_map.shape[0]
218+
updated_log2phy_map_padded = torch.nn.functional.pad(
219+
updated_log2phy_map,
220+
pad=(0,pad_len),
221+
mode='constant',
222+
value=-1
223+
)
224+
self.log2phy_map_per_layer[layer_id].copy_(updated_log2phy_map_padded)
211225

212226
def global2local(self, placement: torch.Tensor,
213227
E_local: int) -> torch.Tensor:

vllm_ascend/eplb/core/eplb_device_transfer_loader.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,6 @@ def generate_expert_d2d_transfer_task(self, expert_send_info,
5050
)
5151
return
5252

53-
# If neither send nor receive task is needed for this layer on this rank, return
54-
if not (expert_send_info or expert_recv_info):
55-
return
56-
5753
self.updated_expert_map = updated_expert_map
5854

5955
self.layer_id = layer_id

vllm_ascend/ops/fused_moe/moe_mlp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,16 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
112112
if quantized_hidden_states is not None:
113113
dispose_tensor(quantized_hidden_states)
114114
# act_fn: swiglu
115+
group_diff = torch.diff(group_list, dim=0)
116+
new_group = torch.cat([group_list[0].unsqueeze(0), group_diff],dim=0)
115117
hidden_states, swiglu_out_scale = torch_npu.npu_dequant_swiglu_quant(
116118
x=hidden_states,
117119
weight_scale=w1_scale,
118120
activation_scale=pertoken_scale,
119121
bias=None,
120122
quant_scale=None,
121123
quant_offset=None,
122-
group_index=group_list,
124+
group_index=new_group,
123125
activate_left=True,
124126
quant_mode=1,
125127
)

vllm_ascend/quantization/w8a8_dynamic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def apply(
238238
hidden_states=x,
239239
pertoken_scale=pertoken_scale,
240240
w1=layer.w13_weight,
241-
w1_scale=layer.w13_weight_scale_fp32,
241+
w1_scale=layer.w13_weight_scale.to(torch.float32),
242242
w2=layer.w2_weight,
243243
w2_scale=layer.w2_weight_scale,
244244
topk_weights=topk_weights,

0 commit comments

Comments
 (0)