microsoft · whyvineet · Sep 9, 2025 · Sep 9, 2025 · Sep 10, 2025 · Sep 10, 2025
diff --git a/onnxscript/rewriter/__init__.py b/onnxscript/rewriter/__init__.py
@@ -19,8 +19,8 @@
 ]
 
 import onnx
-import onnx_ir.passes.common as common_passes
 
+import onnxscript.ir.passes.common as common_passes
 from onnxscript import ir
 from onnxscript.rewriter import pattern
 from onnxscript.rewriter._basics import MatchContext, MatchingTracer, MatchResult, MatchStatus
@@ -41,6 +41,7 @@
     _min_max_to_clip,
     _no_op,
     _redundant_scatter_nd,
+    _remove_zero_bias,
 )
 
 _ModelProtoOrIr = TypeVar("_ModelProtoOrIr", onnx.ModelProto, ir.Model)
@@ -55,6 +56,7 @@
     *_redundant_scatter_nd.rules,
     *_fuse_pad_into_conv.rules,
     *_fuse_batchnorm.rules,
+    *_remove_zero_bias.rules,
 )
 
 

diff --git a/onnxscript/rewriter/onnxruntime/bfloat16_utils/bfloat16_converter_test.py b/onnxscript/rewriter/onnxruntime/bfloat16_utils/bfloat16_converter_test.py
@@ -6,20 +6,23 @@
 import onnx
 import onnx.checker
 import onnx.shape_inference
+import onnx_ir as ir
 import onnxruntime
 
-from onnxscript import ir
 from onnxscript.rewriter.onnxruntime.bfloat16_utils import bfloat16_converter
 
 
 class Bfloat16ConversionTest(unittest.TestCase):
     def setUp(self) -> None:
-        self.v0 = ir.val(name="v0", shape=ir.Shape([2, 3, 4]))
-        self.v0.dtype = ir.DataType.BFLOAT16
-        self.v1 = ir.val(name="v1", shape=ir.Shape([2, 3, 4]))
-        self.v1.dtype = ir.DataType.BFLOAT16
-        self.v2 = ir.val(name="v2", shape=ir.Shape([2, 3, 4]))
-        self.v2.dtype = ir.DataType.BFLOAT16
+        self.v0 = ir.Value(
+            name="v0", shape=ir.Shape([2, 3, 4]), type=ir.TensorType(ir.DataType.BFLOAT16)
+        )
+        self.v1 = ir.Value(
+            name="v1", shape=ir.Shape([2, 3, 4]), type=ir.TensorType(ir.DataType.BFLOAT16)
+        )
+        self.v2 = ir.Value(
+            name="v2", shape=ir.Shape([2, 3, 4]), type=ir.TensorType(ir.DataType.BFLOAT16)
+        )
 
         self.add_node = ir.Node("", "Add", inputs=(self.v0, self.v1), num_outputs=1)
         self.add_node.outputs[0].dtype = ir.DataType.BFLOAT16

diff --git a/onnxscript/rewriter/ort_fusions/_core.py b/onnxscript/rewriter/ort_fusions/_core.py
@@ -33,14 +33,23 @@
     fuse_skip_layer_normalization,
     fuse_skip_rms_normalization,
 )
-from onnxscript.rewriter.rules.common import _gemm_to_matmul_add
+from onnxscript.rewriter.rules.common import (
+    _fuse_batchnorm,
+    _fuse_pad_into_conv,
+    _gemm_to_matmul_add,
+    _remove_zero_bias,
+)
 
 ORT_PATTERN_REWRITE_RULES = [
     *softmax.rules.rules,
     *instance_to_group_normalization.rules.rules,
     # NOTE: group normalization merge silu should be applied after instance to group normalization
     # *group_normalization_merge_silu.rules.rules,
     *fused_matmul_rule_sets.fused_matmul_rule_sets(),
+    # Add Conv fusion rules for better ORT optimization
+    *_fuse_batchnorm.rules.rules,
+    *_fuse_pad_into_conv.rules.rules,
+    *_remove_zero_bias.rules.rules,
 ]
 
 

diff --git a/onnxscript/rewriter/rules/common/__init__.py b/onnxscript/rewriter/rules/common/__init__.py
@@ -34,6 +34,10 @@
     "normalize_pad_format_conv_integer_rule",
     "normalize_pad_format_conv_rule",
     "one_reshape_matmul_reshape_rule",
+    "remove_zero_bias_from_conv_rule",
+    "remove_zero_bias_from_conv_transpose_rule",
+    "remove_zero_bias_from_qlinear_conv_rule",
+    "remove_zero_bias_from_gemm_rule",
     "reshape_reshape_rule",
     "slice_split_rule",
     "squeeze_reshape_1d_rule",
@@ -121,3 +125,9 @@
     no_op_dynamic_scatter_nd_rule,
     no_op_static_scatter_nd_rule,
 )
+from onnxscript.rewriter.rules.common._remove_zero_bias import (
+    remove_zero_bias_from_conv_rule,
+    remove_zero_bias_from_conv_transpose_rule,
+    remove_zero_bias_from_gemm_rule,
+    remove_zero_bias_from_qlinear_conv_rule,
+)
diff --git a/onnxscript/rewriter/rules/common/_basic_rules_test.py b/onnxscript/rewriter/rules/common/_basic_rules_test.py
@@ -8,11 +8,11 @@
 import numpy as np
 import onnx
 import onnx.reference
+import onnx_ir as ir
 import parameterized
 
 import onnxscript
 import onnxscript.onnx_types as ot
-from onnxscript import ir
 from onnxscript.onnx_opset import opset18
 from onnxscript.rewriter import MatchingTracer, testing
 from onnxscript.rewriter import pattern as orp
@@ -421,14 +421,18 @@ def _convert_shape(shape, name):
             if isinstance(shape, np.ndarray):
                 shape = tape.initializer(ir.Tensor(shape, name=name))
             elif isinstance(shape, (list, tuple)):
-                shape = ir.val(name, ir.DataType.INT64, ir.Shape(shape))
+                shape = ir.Value(
+                    name=name, type=ir.TensorType(ir.DataType.INT64), shape=ir.Shape(shape)
+                )
                 tape.graph_like.inputs.append(shape)
             else:
                 raise TypeError(f"Unsupported type {type(shape)} for shape.")
             return shape
 
-        x = ir.val("X", ir.DataType.FLOAT, ir.Shape(input_shape))
-        y = ir.val("Y", ir.DataType.FLOAT)
+        x = ir.Value(
+            name="X", type=ir.TensorType(ir.DataType.FLOAT), shape=ir.Shape(input_shape)
+        )
+        y = ir.Value(name="Y", type=ir.TensorType(ir.DataType.FLOAT))
         tape = ir.tape.Tape(ir.Graph([x], [y], nodes=[], opset_imports={"": 20}))
 
         # Build the graph.
@@ -554,8 +558,10 @@ def test_unsupported_reshape_reshape(self, shape2, error_msg):
 class Flatten2ReshapeTest(unittest.TestCase):
     @staticmethod
     def create_model(input_shape, axis=1):
-        x = ir.val("X", ir.DataType.FLOAT, ir.Shape(input_shape))
-        y = ir.val("Y", ir.DataType.FLOAT)
+        x = ir.Value(
+            name="X", type=ir.TensorType(ir.DataType.FLOAT), shape=ir.Shape(input_shape)
+        )
+        y = ir.Value(name="Y", type=ir.TensorType(ir.DataType.FLOAT))
         tape = ir.tape.Tape(ir.Graph([x], [y], nodes=[], opset_imports={"": 20}))
 
         # Build the graph.

diff --git a/onnxscript/rewriter/rules/common/_fuse_pad_into_conv_test.py b/onnxscript/rewriter/rules/common/_fuse_pad_into_conv_test.py
@@ -61,13 +61,13 @@ def build_model(
 
         # Register operations in the tape
         idtype = ir.DataType.UINT8 if op_type == "ConvInteger" else ir.DataType.FLOAT
-        x = ir.val("X", shape=input_shape, type=ir.TensorType(idtype))
+        x = ir.Value(name="X", shape=input_shape, type=ir.TensorType(idtype))
         y = tape.op("Pad", inputs=[x, *pad_inputs], attributes=pad_attributes)
         y = tape.op(
             op_type,
             inputs=[y, self.get_conv_weights(weight_shape, tape)],
             attributes=conv_attributes,
-            output=ir.val("Y", shape=output_shape, type=ir.TensorType(x.dtype)),
+            output=ir.Value(name="Y", shape=output_shape, type=ir.TensorType(x.dtype)),
         )
         if op_type == "ConvInteger":
             y.dtype = ir.DataType.INT32
@@ -290,12 +290,12 @@ def build_model(
                 raise ValueError(f"Unsupported type for pad input ({x}): {type(x)}.")
 
         # Register operations in the tape
-        x = ir.val("X", shape=input_shape, type=ir.TensorType(ir.DataType.FLOAT))
+        x = ir.Value(name="X", shape=input_shape, type=ir.TensorType(ir.DataType.FLOAT))
         y = tape.op(
             "Conv",
             inputs=[x, *conv_inputs],
             attributes=conv_attributes,
-            output=ir.val("Y", shape=output_shape, type=x.type),
+            output=ir.Value(name="Y", shape=output_shape, type=x.type),
         )
 
         # Build the model

diff --git a/onnxscript/rewriter/rules/common/_matmul_add_to_gemm_test.py b/onnxscript/rewriter/rules/common/_matmul_add_to_gemm_test.py
@@ -5,10 +5,10 @@
 
 import numpy as np
 import onnx
+import onnx_ir as ir
 from onnx_ir.passes.common import onnx_checker, shape_inference
 from parameterized import parameterized
 
-from onnxscript import ir
 from onnxscript.rewriter import MatchingTracer, MatchStatus, testing
 from onnxscript.rewriter.rules.common import _matmul_add_to_gemm
 
@@ -46,10 +46,10 @@ def get_test_model(
         bias_shape = weight_shape[0] if transB else weight_shape[-1]
         output_shape = ir.Shape(("?",) * input_shape.rank())
 
-        x = ir.val("X", shape=input_shape, type=ir.TensorType(ir.DataType.FLOAT))
+        x = ir.Value(name="X", shape=input_shape, type=ir.TensorType(ir.DataType.FLOAT))
 
         if weight_as_inputs:
-            w = ir.val("W", shape=weight_shape, type=ir.TensorType(ir.DataType.FLOAT))
+            w = ir.Value(name="W", shape=weight_shape, type=ir.TensorType(ir.DataType.FLOAT))
             inputs.append(w)
         else:
             w = ir.tensor(
@@ -58,8 +58,8 @@ def get_test_model(
             w = tape.initializer(w)
 
         if bias_as_inputs:
-            b = ir.val(
-                "B", shape=ir.Shape([bias_shape]), type=ir.TensorType(ir.DataType.FLOAT)
+            b = ir.Value(
+                name="B", shape=ir.Shape([bias_shape]), type=ir.TensorType(ir.DataType.FLOAT)
             )
             inputs.append(b)
         else:
@@ -77,7 +77,9 @@ def get_test_model(
         y = tape.op(
             "Add",
             inputs=[y, b],
-            output=ir.val("Y", shape=output_shape, type=ir.TensorType(ir.DataType.FLOAT)),
+            output=ir.Value(
+                name="Y", shape=output_shape, type=ir.TensorType(ir.DataType.FLOAT)
+            ),
         )
 
         # Build the model

diff --git a/onnxscript/rewriter/rules/common/_remove_zero_bias.py b/onnxscript/rewriter/rules/common/_remove_zero_bias.py
@@ -0,0 +1,124 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Remove optional bias when it is all zero from Conv and related operations."""
+
+from __future__ import annotations
+
+from typing import ClassVar
+
+import numpy as np
+
+from onnxscript import ir
+from onnxscript.ir import convenience
+from onnxscript.rewriter._basics import MatchResult
+from onnxscript.rewriter._rewrite_rule import RewriteRuleClassBase, RewriteRuleSet
+
+
+class _RemoveZeroBiasBase(RewriteRuleClassBase):
+    """Base class for removing zero bias from operations."""
+
+    def rewrite(self, op: ir.tape.Tape, out: ir.Value, **_) -> ir.Value:
+        """Remove the bias input from the operation."""
+        node = out.producer()
+
+        return op.op(
+            self.op_type,
+            inputs=node.inputs[:-1],
+            attributes=node.attributes,
+        )
+
+    def _check_bias_is_zero(self, bias_value: ir.Value) -> MatchResult:
+        """Check if the bias value is present and is all zeros."""
+        check_result = MatchResult()
+
+        # Check if bias is a constant/initializer
+        bias_tensor = convenience.get_const_tensor(bias_value)
+        if bias_tensor is None:
+            return check_result.fail("Bias is not a constant/initializer.")
+
+        # Check if bias is all zeros
+        bias_array = bias_tensor.numpy()
+        if not np.allclose(bias_array, 0.0, atol=1e-8):
+            return check_result.fail("Bias is not all zeros.")
+
+        return check_result
+
+    def check(self, context, x: ir.Value, w: ir.Value, b: ir.Value, **_) -> MatchResult:
+        """Check if the bias is present and is all zeros."""
+        del context  # Unused
+        return self._check_bias_is_zero(b)
+
+
+class RemoveZeroBiasFromConv(_RemoveZeroBiasBase):
+    """Remove zero bias from Conv operations."""
+
+    op_type: ClassVar = "Conv"
+
+    def pattern(self, op: ir.tape.Tape, x: ir.Value, w: ir.Value, b: ir.Value) -> ir.Value:
+        return op.Conv(x, w, b, _outputs=["out"])
+
+
+class RemoveZeroBiasFromConvTranspose(_RemoveZeroBiasBase):
+    """Remove zero bias from ConvTranspose operations."""
+
+    op_type: ClassVar = "ConvTranspose"
+
+    def pattern(self, op: ir.tape.Tape, x: ir.Value, w: ir.Value, b: ir.Value) -> ir.Value:
+        return op.ConvTranspose(x, w, b, _outputs=["out"])
+
+
+class RemoveZeroBiasFromQLinearConv(_RemoveZeroBiasBase):
+    """Remove zero bias from QLinearConv operations."""
+
+    op_type: ClassVar = "QLinearConv"
+
+    def pattern(
+        self,
+        op: ir.tape.Tape,
+        x,
+        x_scale,
+        x_zero_point,
+        w,
+        w_scale,
+        w_zero_point,
+        y_scale,
+        y_zero_point,
+        b: ir.Value,
+    ) -> ir.Value:
+        return op.QLinearConv(
+            x,
+            x_scale,
+            x_zero_point,
+            w,
+            w_scale,
+            w_zero_point,
+            y_scale,
+            y_zero_point,
+            b,
+            _outputs=["out"],
+        )
+
+
+class RemoveZeroBiasFromGemm(_RemoveZeroBiasBase):
+    """Remove zero bias from Gemm operations."""
+
+    op_type: ClassVar = "Gemm"
+
+    def pattern(self, op: ir.tape.Tape, x: ir.Value, w: ir.Value, b: ir.Value) -> ir.Value:
+        return op.Gemm(x, w, b, _outputs=["out"])
+
+
+# Create rule instances
+remove_zero_bias_from_conv_rule = RemoveZeroBiasFromConv().rule()
+remove_zero_bias_from_conv_transpose_rule = RemoveZeroBiasFromConvTranspose().rule()
+remove_zero_bias_from_qlinear_conv_rule = RemoveZeroBiasFromQLinearConv().rule()
+remove_zero_bias_from_gemm_rule = RemoveZeroBiasFromGemm().rule()
+
+rules = RewriteRuleSet(
+    [
+        remove_zero_bias_from_conv_rule,
+        remove_zero_bias_from_conv_transpose_rule,
+        remove_zero_bias_from_qlinear_conv_rule,
+        remove_zero_bias_from_gemm_rule,
+    ]
+)