Skip to content

Commit 0d7269e

Browse files
authored
[Infra][Docs] - Some clean-up for the CI pipeline and docs (#4419)
* [Docs] - Some clean-up for the docs Signed-off-by: Yanchao Lu <[email protected]> * [Infra] - Some clean-up for the CI pipeline Signed-off-by: Yanchao Lu <[email protected]> --------- Signed-off-by: Yanchao Lu <[email protected]>
1 parent 27afcb9 commit 0d7269e

File tree

4 files changed

+24
-5
lines changed

4 files changed

+24
-5
lines changed

docker/common/install_pytorch.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ set -ex
44

55
# Use latest stable version from https://pypi.org/project/torch/#history
66
# and closest to the version specified in
7-
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-03.html#rel-25-03
7+
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-04.html#rel-25-04
88
TORCH_VERSION="2.7.0"
99
SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
1010

docs/source/installation/linux.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,9 @@ There are some known limitations when you pip install pre-built TensorRT-LLM whe
5656
```bash
5757
[ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt
5858
```
59+
60+
PyTorch NGC Container typically includes a pre-installed `tensorrt` Python package. If there is a version mismatch between this pre-installed package and the version required by the TensorRT-LLM wheel, you will need to uninstall the existing `tensorrt` package before installing TensorRT-LLM.
61+
62+
```bash
63+
pip uninstall -y tensorrt
64+
```

jenkins/L0_Test.groovy

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,7 @@ def launchTestListCheck(pipeline)
635635
sh "tar -zxf ${tarName}"
636636
def llmPath = sh (script: "realpath .", returnStdout: true).trim()
637637
def llmSrc = "${llmPath}/TensorRT-LLM/src"
638-
sh "NVIDIA_TRITON_SERVER_VERSION=25.03 LLM_ROOT=${llmSrc} LLM_BACKEND_ROOT=${llmSrc}/triton_backend python3 ${llmSrc}/scripts/check_test_list.py --l0 --qa"
638+
sh "NVIDIA_TRITON_SERVER_VERSION=25.04 LLM_ROOT=${llmSrc} LLM_BACKEND_ROOT=${llmSrc}/triton_backend python3 ${llmSrc}/scripts/check_test_list.py --l0 --qa"
639639
} catch (InterruptedException e) {
640640
throw e
641641
} catch (Exception e) {
@@ -1383,7 +1383,6 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
13831383
"A10-TensorRT-6": ["a10", "l0_a10", 6, 6],
13841384
"A30-PyTorch-1": ["a30", "l0_a30", 1, 2],
13851385
"A30-PyTorch-2": ["a30", "l0_a30", 2, 2],
1386-
"A30-Triton-Python-[Post-Merge]-1": ["a30", "l0_a30", 1, 1],
13871386
"A30-CPP-1": ["a30", "l0_a30", 1, 2],
13881387
"A30-CPP-2": ["a30", "l0_a30", 2, 2],
13891388
"A30-TensorRT-1": ["a30", "l0_a30", 1, 4],
@@ -1395,7 +1394,6 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
13951394
"A100X-TensorRT-2": ["a100x", "l0_a100", 2, 4],
13961395
"A100X-TensorRT-3": ["a100x", "l0_a100", 3, 4],
13971396
"A100X-TensorRT-4": ["a100x", "l0_a100", 4, 4],
1398-
"A100X-Triton-Python-[Post-Merge]-1": ["a100x", "l0_a100", 1, 1],
13991397
"L40S-PyTorch-1": ["l40s", "l0_l40s", 1, 1],
14001398
"L40S-TensorRT-1": ["l40s", "l0_l40s", 1, 3],
14011399
"L40S-TensorRT-2": ["l40s", "l0_l40s", 2, 3],
@@ -1413,7 +1411,6 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
14131411
"B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 2],
14141412
"B200_PCIe-TensorRT-1": ["b100-ts2", "l0_b200", 1, 2],
14151413
"B200_PCIe-TensorRT-2": ["b100-ts2", "l0_b200", 2, 2],
1416-
"B200_PCIe-Triton-Python-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1],
14171414
"RTX5090-PyTorch-1": ["rtx-5090", "l0_gb202", 1, 1],
14181415
"RTX5080-TensorRT-1": ["rtx-5080", "l0_gb203", 1, 2],
14191416
"RTX5080-TensorRT-2": ["rtx-5080", "l0_gb203", 2, 2],
@@ -1424,14 +1421,19 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
14241421
"A30-TensorRT-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
14251422
"A30-TensorRT-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
14261423
"A30-CPP-[Post-Merge]-1": ["a30", "l0_a30", 1, 1],
1424+
"A30-Triton-Python-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
1425+
"A30-Triton-Python-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
14271426
"A100X-TensorRT-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
14281427
"A100X-TensorRT-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
1428+
"A100X-Triton-Python-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
1429+
"A100X-Triton-Python-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
14291430
"L40S-TensorRT-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2],
14301431
"L40S-TensorRT-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2],
14311432
"H100_PCIe-PyTorch-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 1],
14321433
"H100_PCIe-CPP-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 1],
14331434
"H100_PCIe-TensorRT-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 2],
14341435
"H100_PCIe-TensorRT-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 2],
1436+
"B200_PCIe-Triton-Python-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1],
14351437
"DGX_H100-4_GPUs-PyTorch-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
14361438
"DGX_H100-4_GPUs-TensorRT-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
14371439
"A100_80GB_PCIE-TensorRT-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1],

tests/integration/test_lists/waives.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,3 +494,14 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp_
494494
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False] SKIP (https://nvbugs/5286795)
495495
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True] SKIP (https://nvbugs/5286795)
496496
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True] SKIP (https://nvbugs/5286795)
497+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False] SKIP (https://nvbugs/5285965)
498+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=False-overlap_scheduler=False] SKIP (https://nvbugs/5285965)
499+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False] SKIP (https://nvbugs/5285965)
500+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=True] SKIP (https://nvbugs/5285965)
501+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True] SKIP (https://nvbugs/5285965)
502+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=False] SKIP (https://nvbugs/5285965)
503+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=False-overlap_scheduler=False] SKIP (https://nvbugs/5285965)
504+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=True-overlap_scheduler=False] SKIP (https://nvbugs/5285965)
505+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=True] SKIP (https://nvbugs/5285965)
506+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True] SKIP (https://nvbugs/5285965)
507+
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[latency] SKIP (https://nvbugs/5285965)

0 commit comments

Comments
 (0)