diff --git a/.gitlab/release.yml b/.gitlab/release.yml index 4bf5480e0..fb3b2e040 100644 --- a/.gitlab/release.yml +++ b/.gitlab/release.yml @@ -5,10 +5,12 @@ build-and-upload-wheels: stage: release timeout: 15m tags: [type/docker, os/linux] # Use a runner with these tags + needs: [] rules: - if: $JET_ONLY != null when: never - if: $CI_COMMIT_TAG =~ /^\d+\.\d+\.\d+$/ + when: manual variables: RELEASE: "true" TWINE_USERNAME: svc-dl-algo-ammo @@ -16,6 +18,7 @@ build-and-upload-wheels: REPO_URL: https://urm.nvidia.com/artifactory/api/pypi/sw-dl-algo-ammo-pypi-local - if: $CI_PIPELINE_SOURCE == "schedule" variables: + when: manual RELEASE: "false" TWINE_USERNAME: gitlab-ci-token TWINE_PASSWORD: $CI_JOB_TOKEN diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8a517895a..886ab7499 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,13 +1,5 @@ Model Optimizer Changelog (Linux) ================================= -0.41 (2025-12-xx) -^^^^^^^^^^^^^^^^^ - -**Deprecations** - -**New Features** -- Add FP8/NVFP4 KV cache quantization support for Megatron Core models. - 0.40 (2025-12-xx) ^^^^^^^^^^^^^^^^^ @@ -20,8 +12,9 @@ Model Optimizer Changelog (Linux) - Add MoE (e.g. Qwen3-30B-A3B) pruning support for ``num_moe_experts``, ``moe_ffn_hidden_size`` and ``moe_shared_expert_intermediate_size`` parameters in Minitron pruning (``mcore_minitron``). - Add ``specdec_bench`` example to benchmark speculative decoding performance. See `examples/specdec_bench/README.md `_ for more details. +- Add FP8/NVFP4 KV cache quantization support for Megatron Core models. -0.39 (2025-11-14) +0.39 (2025-11-11) ^^^^^^^^^^^^^^^^^ **Deprecations**