@@ -1246,10 +1246,10 @@ def create_dockerfile_linux(
12461246
12471247WORKDIR /opt/tritonserver
12481248COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .
1249- RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \\
1250- "tritonserver-*.whl" | xargs -I {} pip install --upgrade {}[all ] && \ \
1251- find /opt/tritonserver/python -maxdepth 1 -type f -name \\
1252- "tritonfrontend-*.whl" | xargs -I {} pip install --upgrade {}[all]
1249+ RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \
1250+ "tritonserver-*.whl" | xargs -I {} pip install --upgrade {}[$VARIANT ] && \
1251+ find /opt/tritonserver/python -maxdepth 1 -type f -name \
1252+ "tritonfrontend-*.whl" | xargs -I {} pip install --upgrade {}[$VARIANT];
12531253
12541254RUN pip3 install -r python/openai/requirements.txt
12551255
@@ -1297,6 +1297,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
12971297 df = """
12981298ARG TRITON_VERSION
12991299ARG TRITON_CONTAINER_VERSION
1300+ ARG VARIANT=all
13001301
13011302ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
13021303ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
@@ -1512,8 +1513,9 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
15121513 target_platform () not in ["igpu" , "windows" , "rhel" ]
15131514 and "tensorrtllm" not in backends
15141515 ):
1515- repo_arch = "sbsa" if target_machine == "aarch64" else "x86_64"
1516- df += f"""
1516+ if FLAGS .build_variant != "cpu" :
1517+ repo_arch = "sbsa" if target_machine == "aarch64" else "x86_64"
1518+ df += f"""
15171519RUN curl -o /tmp/cuda-keyring.deb \\
15181520 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/{ repo_arch } /cuda-keyring_1.1-1_all.deb \\
15191521 && apt install /tmp/cuda-keyring.deb \\
@@ -1911,6 +1913,10 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_
19111913 f"--secret id=NVPL_SLIM_URL" ,
19121914 f"--build-arg BUILD_PUBLIC_VLLM={ build_public_vllm } " ,
19131915 ]
1916+ if FLAGS .build_variant :
1917+ finalargs += [
1918+ "--build-arg VARIANT=" + (FLAGS .build_variant ),
1919+ ]
19141920 finalargs += [
19151921 "-t" ,
19161922 "tritonserver" ,
@@ -2776,6 +2782,13 @@ def enable_all():
27762782 default = DEFAULT_TRITON_VERSION_MAP ["rhel_py_version" ],
27772783 help = "This flag sets the Python version for RHEL platform of Triton Inference Server to be built. Default: the latest supported version." ,
27782784 )
2785+ parser .add_argument (
2786+ "--build_variant" ,
2787+ required = False ,
2788+ type = str ,
2789+ default = "all" ,
2790+ help = "Can be set to all or cpu,Default value is all."
2791+ )
27792792 parser .add_argument (
27802793 "--build-secret" ,
27812794 action = "append" ,
@@ -2815,6 +2828,9 @@ def enable_all():
28152828 FLAGS .extra_backend_cmake_arg = []
28162829 if FLAGS .build_secret is None :
28172830 FLAGS .build_secret = []
2831+ if hasattr (FLAGS , 'build_variant' ) and FLAGS .build_variant not in ["all" , "cpu" ]:
2832+ raise ValueError (f"Invalid build_variant value: { FLAGS .build_variant } . Expected 'all' or 'cpu'." )
2833+
28182834
28192835 FLAGS .boost_url = os .getenv (
28202836 "TRITON_BOOST_URL" ,
0 commit comments