PaddlePaddle · XieYunshen · Sep 3, 2025 · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025
diff --git a/.github/workflows/_unit_test_coverage.yml b/.github/workflows/_unit_test_coverage.yml
@@ -168,10 +168,7 @@ jobs:
           python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
           pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
-          python -m pip install coverage
-          python -m pip install diff-cover
-          python -m pip install pytest-cov
-          python -m pip install jsonschema aistudio_sdk==0.3.5
+          python -m pip install -r scripts/unittest_requirement.txt
           python -m pip install ${fd_wheel_url}
           rm -rf fastdeploy
           # coverage subprocess use

diff --git a/scripts/unittest_requirement.txt b/scripts/unittest_requirement.txt
@@ -5,3 +5,5 @@ anyio
 coverage
 diff-cover
 partial_json_parser
+jsonschema
+aistudio_sdk==0.3.5
diff --git a/tests/cov_pytest.ini b/tests/cov_pytest.ini
@@ -3,22 +3,5 @@
 addopts =
     --ignore=tests/ci_use
     --ignore=tests/ce
-    --ignore=tests/layers/test_append_attention.py
-    --ignore=tests/layers/test_attention.py
-    --ignore=tests/operators/test_rejection_top_p_sampling.py
-    --ignore=tests/operators/test_perchannel_gemm.py
-    --ignore=tests/operators/test_scaled_gemm_f8_i4_f16.py
-    --ignore=tests/operators/test_topp_sampling.py
-    --ignore=tests/operators/test_stop_generation.py
-    --ignore=tests/operators/test_air_topp_sampling.py
     --ignore=tests/operators/test_fused_moe.py
-    --ignore=tests/operators/test_stop_generation_multi_ends.py
-    --ignore=tests/graph_optimization/test_cuda_graph.py
-    --ignore=tests/graph_optimization/test_cuda_graph_dynamic_subgraph.py
-    --ignore=tests/graph_optimization/test_cuda_graph_spec_decode
-    --ignore=tests/layers/test_quant_layer.py
-    --ignore=tests/operators/test_token_penalty.py
-    --ignore=tests/operators/test_split_fuse.py
-    --ignore=tests/operators/test_flash_mask_attn.py
     --ignore=tests/operators/test_w4afp8_gemm.py
-    --ignore=tests/operators/test_tree_mask.py
diff --git a/tests/operators/test_tree_mask.py b/tests/operators/test_tree_mask.py
@@ -271,7 +271,7 @@ def run_append_c16_attention(self, q_len, kv_len, prefill=False, attn_mask=None)
             paddle.device.synchronize()
         e_time = time.time()
         print(f"mean infer time: {np.mean((e_time - s_time) * 1000 / self.run_time):.2f}")
-        return out[0].reshape([token_num, self.num_q_head, self.head_dim])
+        return out.reshape([token_num, self.num_q_head, self.head_dim])
 
     def test_naive_speculative_decoding(self):
         prefill_len = 8192