diff --git a/.github/workflows/_unit_test_coverage.yml b/.github/workflows/_unit_test_coverage.yml index 132d0ab156..31ed7fa59a 100644 --- a/.github/workflows/_unit_test_coverage.yml +++ b/.github/workflows/_unit_test_coverage.yml @@ -168,10 +168,7 @@ jobs: python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple - python -m pip install coverage - python -m pip install diff-cover - python -m pip install pytest-cov - python -m pip install jsonschema aistudio_sdk==0.3.5 + python -m pip install -r scripts/unittest_requirement.txt python -m pip install ${fd_wheel_url} rm -rf fastdeploy # coverage subprocess use diff --git a/scripts/unittest_requirement.txt b/scripts/unittest_requirement.txt index 52b0c03f7d..43894ff72e 100644 --- a/scripts/unittest_requirement.txt +++ b/scripts/unittest_requirement.txt @@ -5,3 +5,5 @@ anyio coverage diff-cover partial_json_parser +jsonschema +aistudio_sdk==0.3.5 diff --git a/tests/cov_pytest.ini b/tests/cov_pytest.ini index 363132aa3f..4e0c31e584 100644 --- a/tests/cov_pytest.ini +++ b/tests/cov_pytest.ini @@ -3,22 +3,5 @@ addopts = --ignore=tests/ci_use --ignore=tests/ce - --ignore=tests/layers/test_append_attention.py - --ignore=tests/layers/test_attention.py - --ignore=tests/operators/test_rejection_top_p_sampling.py - --ignore=tests/operators/test_perchannel_gemm.py - --ignore=tests/operators/test_scaled_gemm_f8_i4_f16.py - --ignore=tests/operators/test_topp_sampling.py - --ignore=tests/operators/test_stop_generation.py - --ignore=tests/operators/test_air_topp_sampling.py --ignore=tests/operators/test_fused_moe.py - --ignore=tests/operators/test_stop_generation_multi_ends.py - --ignore=tests/graph_optimization/test_cuda_graph.py - --ignore=tests/graph_optimization/test_cuda_graph_dynamic_subgraph.py - --ignore=tests/graph_optimization/test_cuda_graph_spec_decode - --ignore=tests/layers/test_quant_layer.py - --ignore=tests/operators/test_token_penalty.py - --ignore=tests/operators/test_split_fuse.py - --ignore=tests/operators/test_flash_mask_attn.py --ignore=tests/operators/test_w4afp8_gemm.py - --ignore=tests/operators/test_tree_mask.py diff --git a/tests/operators/test_tree_mask.py b/tests/operators/test_tree_mask.py index 10e55a4b19..79b19c43f7 100644 --- a/tests/operators/test_tree_mask.py +++ b/tests/operators/test_tree_mask.py @@ -271,7 +271,7 @@ def run_append_c16_attention(self, q_len, kv_len, prefill=False, attn_mask=None) paddle.device.synchronize() e_time = time.time() print(f"mean infer time: {np.mean((e_time - s_time) * 1000 / self.run_time):.2f}") - return out[0].reshape([token_num, self.num_q_head, self.head_dim]) + return out.reshape([token_num, self.num_q_head, self.head_dim]) def test_naive_speculative_decoding(self): prefill_len = 8192