diff --git a/vllm/distributed/kv_transfer/kv_connector/simple_connector.py b/vllm/distributed/kv_transfer/kv_connector/simple_connector.py index 7780e2dfa317..2f204e058101 100644 --- a/vllm/distributed/kv_transfer/kv_connector/simple_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/simple_connector.py @@ -156,7 +156,7 @@ def send_kv_caches_and_hidden_states( ) -> None: input_tokens_tensor = model_input.input_tokens - seq_lens = model_input.attn_metadata.seq_lens + seq_lens = (model_input.attn_metadata.seq_lens_tensor - model_input.attn_metadata.context_lens_tensor).tolist() slot_mapping_flat = model_input.attn_metadata.slot_mapping.flatten() start_layer = model_executable.model.start_layer end_layer = model_executable.model.end_layer @@ -212,7 +212,7 @@ def recv_kv_caches_and_hidden_states( bypass_model_exec = True input_tokens_tensor = model_input.input_tokens - seq_lens = model_input.attn_metadata.seq_lens + seq_lens = (model_input.attn_metadata.seq_lens_tensor - model_input.attn_metadata.context_lens_tensor).tolist() slot_mapping = model_input.attn_metadata.slot_mapping.flatten() hidden_or_intermediate_states_for_one_req = []