fix fia error.

wangxiaoxin-sherie · wangxiaoxin-sherie · commit abcb13fe2521 · 2025-12-01T16:31:52.000+08:00
Signed-off-by: wangxiaoxin-sherie &lt;wangxiaoxin7@huawei.com&gt;
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
@@ -331,9 +331,8 @@ def build(
             padded_num_tokens = common_attn_metadata.num_input_tokens - num_actual_tokens
             seq_lens = torch.cat([
                 seq_lens,
-                torch.ones(padded_num_tokens,
-                           dtype=seq_lens.dtype,
-                           device=seq_lens.device)
+                torch.tensor([padded_num_tokens
+                              ]).to(seq_lens.device).to(seq_lens.dtype)
             ])
             block_table_padding = torch.zeros(
                 (padded_num_tokens, ) + block_table.shape[1:],
@@ -342,10 +341,8 @@ def build(
             block_table = torch.cat([block_table, block_table_padding], dim=0)
             query_start_loc_cpu = torch.cat([
                 query_start_loc_cpu,
-                torch.arange(query_start_loc_cpu[-1] + 1,
-                             query_start_loc_cpu[-1] + padded_num_tokens,
-                             dtype=query_start_loc_cpu.dtype,
-                             device=query_start_loc_cpu.device)
+                torch.tensor([query_start_loc_cpu[-1] + padded_num_tokens]).to(
+                    query_start_loc_cpu.device).to(query_start_loc_cpu.dtype)
             ])
 
         query_start_loc = query_start_loc_cpu.to(self.device,
@@ -621,7 +618,6 @@ def full_graph_attention(self,
             actual_seq_lengths_kv = attn_metadata.seq_lens_list
 
         num_tokens = attn_metadata.query_start_loc_list[-1]
-        query = query[:num_tokens]
         graph_params = get_graph_params()
         query_start_loc = attn_metadata.query_start_loc_list
         # Prepare tensors for attention output