microsoft · whyvineet · Sep 9, 2025 · Sep 9, 2025 · Sep 10, 2025 · Sep 10, 2025
diff --git a/onnxscript/rewriter/__init__.py b/onnxscript/rewriter/__init__.py
@@ -19,8 +19,8 @@
 ]
 
 import onnx
-import onnx_ir.passes.common as common_passes
 
+import onnxscript.ir.passes.common as common_passes
 from onnxscript import ir
 from onnxscript.rewriter import pattern
 from onnxscript.rewriter._basics import MatchContext, MatchingTracer, MatchResult, MatchStatus
@@ -35,11 +35,13 @@
     _broadcast_to_matmul,
     _cast_constant_of_shape,
     _collapse_slices,
+    _fuse_batchnorm,
     _fuse_pad_into_conv,
     _fuse_relus_clips,
     _min_max_to_clip,
     _no_op,
     _redundant_scatter_nd,
+    _remove_zero_bias,
 )
 
 _ModelProtoOrIr = TypeVar("_ModelProtoOrIr", onnx.ModelProto, ir.Model)
@@ -53,6 +55,8 @@
     *_basic_rules.basic_optimization_rules(),
     *_redundant_scatter_nd.rules,
     *_fuse_pad_into_conv.rules,
+    *_fuse_batchnorm.rules,
+    *_remove_zero_bias.rules,
 )
 
 

diff --git a/onnxscript/rewriter/ort_fusions/_core.py b/onnxscript/rewriter/ort_fusions/_core.py
@@ -33,14 +33,23 @@
     fuse_skip_layer_normalization,
     fuse_skip_rms_normalization,
 )
-from onnxscript.rewriter.rules.common import _gemm_to_matmul_add
+from onnxscript.rewriter.rules.common import (
+    _fuse_batchnorm,
+    _fuse_pad_into_conv,
+    _gemm_to_matmul_add,
+    _remove_zero_bias,
+)
 
 ORT_PATTERN_REWRITE_RULES = [
     *softmax.rules.rules,
     *instance_to_group_normalization.rules.rules,
     # NOTE: group normalization merge silu should be applied after instance to group normalization
     # *group_normalization_merge_silu.rules.rules,
     *fused_matmul_rule_sets.fused_matmul_rule_sets(),
+    # Add Conv fusion rules for better ORT optimization
+    *_fuse_batchnorm.rules.rules,
+    *_fuse_pad_into_conv.rules.rules,
+    *_remove_zero_bias.rules.rules,
 ]
 
 

diff --git a/onnxscript/rewriter/rules/common/__init__.py b/onnxscript/rewriter/rules/common/__init__.py
@@ -31,6 +31,10 @@
     "normalize_pad_format_conv_integer_rule",
     "normalize_pad_format_conv_rule",
     "one_reshape_matmul_reshape_rule",
+    "remove_zero_bias_from_conv_rule",
+    "remove_zero_bias_from_conv_transpose_rule",
+    "remove_zero_bias_from_qlinear_conv_rule",
+    "remove_zero_bias_from_gemm_rule",
     "reshape_reshape_rule",
     "slice_split_rule",
     "squeeze_reshape_1d_rule",
@@ -113,3 +117,9 @@
     no_op_dynamic_scatter_nd_rule,
     no_op_static_scatter_nd_rule,
 )
+from onnxscript.rewriter.rules.common._remove_zero_bias import (
+    remove_zero_bias_from_conv_rule,
+    remove_zero_bias_from_conv_transpose_rule,
+    remove_zero_bias_from_gemm_rule,
+    remove_zero_bias_from_qlinear_conv_rule,
+)
diff --git a/onnxscript/rewriter/rules/common/_remove_zero_bias.py b/onnxscript/rewriter/rules/common/_remove_zero_bias.py
@@ -0,0 +1,203 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Remove optional bias when it is all zero from Conv and related operations."""
+
+from __future__ import annotations
+
+import numpy as np
+
+from onnxscript import ir
+from onnxscript.rewriter._basics import MatchResult
+from onnxscript.rewriter._rewrite_rule import RewriteRuleClassBase, RewriteRuleSet
+
+
+class _RemoveZeroBiasBase(RewriteRuleClassBase):
+    """Base class for removing zero bias from operations."""
+
+    def __init__(self, op_type: str):
+        super().__init__(remove_nodes=False)
+        self.op_type = op_type
+
+    def rewrite(self, op: ir.tape.Tape, x: ir.Value, w: ir.Value, b: ir.Value) -> ir.Value:
+        """Remove the bias input from the operation."""
+        return op.op(
+            self.op_type,
+            inputs=[x, w],  # Remove bias input
+        )
+
+    def check(self, context, x: ir.Value, w: ir.Value, b: ir.Value, **_) -> MatchResult:
+        """Check if the bias is present and is all zeros."""
+        del context  # Unused
+        check_result = MatchResult()
+
+        # Check if bias is a constant/initializer
+        if b.const_value is None:
+            return check_result.fail("Bias is not a constant/initializer.")
+
+        # Check if bias is all zeros
+        bias_array = b.const_value.numpy()
+        if not np.allclose(bias_array, 0.0, atol=1e-8):
+            return check_result.fail("Bias is not all zeros.")
+
+        return check_result
+
+
+class RemoveZeroBiasFromConv(_RemoveZeroBiasBase):
+    """Remove zero bias from Conv operations."""
+
+    def __init__(self):
+        super().__init__("Conv")
+
+    def pattern(self, op: ir.tape.Tape, x: ir.Value, w: ir.Value, b: ir.Value) -> ir.Value:
+        return op.Conv(x, w, b, _outputs=["conv_out"])
+
+    def check(self, context, x: ir.Value, w: ir.Value, b: ir.Value, conv_out: ir.Value, **_) -> MatchResult:
+        """Check if the bias is present and is all zeros."""
+        del context  # Unused
+        check_result = MatchResult()
+
+        # Check if bias is a constant/initializer
+        if b.const_value is None:
+            return check_result.fail("Bias is not a constant/initializer.")
+
+        # Check if bias is all zeros
+        bias_array = b.const_value.numpy()
+        if not np.allclose(bias_array, 0.0, atol=1e-8):
+            return check_result.fail("Bias is not all zeros.")
+
+        return check_result
+
+    def rewrite(self, op: ir.tape.Tape, x: ir.Value, w: ir.Value, b: ir.Value, conv_out: ir.Value) -> ir.Value:
+        """Remove the bias input from the operation."""
+        # Get the Conv node that produced conv_out to access its attributes
+        conv_node = conv_out.producer()
+
+        # Create new Conv with preserved attributes but without bias
+        return op.op(
+            "Conv",
+            inputs=[x, w],  # Remove bias input
+            attributes=conv_node.attributes,
+            domain=conv_node.domain,
+        )
+
+
+class RemoveZeroBiasFromConvTranspose(_RemoveZeroBiasBase):
+    """Remove zero bias from ConvTranspose operations."""
+
+    def __init__(self):
+        super().__init__("ConvTranspose")
+
+    def pattern(self, op: ir.tape.Tape, x: ir.Value, w: ir.Value, b: ir.Value) -> ir.Value:
+        return op.ConvTranspose(x, w, b, _allow_other_inputs=False, _outputs=["conv_out"])
+
+    def rewrite(self, op: ir.tape.Tape, x: ir.Value, w: ir.Value, b: ir.Value, conv_out: ir.Value) -> ir.Value:
+        """Remove the bias input from the operation."""
+        # Get the ConvTranspose node that produced conv_out to access its attributes
+        conv_node = conv_out.producer()
+
+        # Create new ConvTranspose with preserved attributes but without bias
+        return op.op(
+            "ConvTranspose",
+            inputs=[x, w],  # Remove bias input
+            attributes=conv_node.attributes,
+            domain=conv_node.domain,
+        )
+
+
+class RemoveZeroBiasFromQLinearConv(_RemoveZeroBiasBase):
+    """Remove zero bias from QLinearConv operations."""
+
+    def __init__(self):
+        super().__init__("QLinearConv")
+
+    def pattern(self, op: ir.tape.Tape, x, x_scale, x_zero_point, w, w_scale, w_zero_point,
+                y_scale, y_zero_point, b: ir.Value) -> ir.Value:
+        return op.QLinearConv(
+            x, x_scale, x_zero_point, w, w_scale, w_zero_point,
+            y_scale, y_zero_point, b, _allow_other_inputs=False, _outputs=["conv_out"]
+        )
+
+    def check(self, context, x, x_scale, x_zero_point, w, w_scale, w_zero_point,
+              y_scale, y_zero_point, b: ir.Value, conv_out: ir.Value, **_) -> MatchResult:
+        """Check if the bias (b) is present and is all zeros."""
+        del context  # Unused
+        check_result = MatchResult()
+
+        # Check if bias is a constant/initializer
+        if b.const_value is None:
+            return check_result.fail("Bias is not a constant/initializer.")
+
+        # Check if bias is all zeros
+        bias_array = b.const_value.numpy()
+        if not np.allclose(bias_array, 0.0, atol=1e-8):
+            return check_result.fail("Bias is not all zeros.")
+
+        return check_result
+
+    def rewrite(self, op: ir.tape.Tape, x, x_scale, x_zero_point, w, w_scale, w_zero_point,
+                y_scale, y_zero_point, b: ir.Value, conv_out: ir.Value) -> ir.Value:
+        """Remove the bias input from the operation."""
+        # Get the QLinearConv node that produced conv_out to access its attributes
+        conv_node = conv_out.producer()
+
+        # Create new QLinearConv with preserved attributes but without bias
+        return op.op(
+            "QLinearConv",
+            inputs=[x, x_scale, x_zero_point, w, w_scale, w_zero_point,
+                    y_scale, y_zero_point],  # Remove bias input
+            attributes=conv_node.attributes,
+            domain=conv_node.domain,
+        )
+
+
+class RemoveZeroBiasFromGemm(_RemoveZeroBiasBase):
+    """Remove zero bias from Gemm operations."""
+
+    def __init__(self):
+        super().__init__("Gemm")
+
+    def pattern(self, op: ir.tape.Tape, a: ir.Value, b: ir.Value, c: ir.Value) -> ir.Value:
+        return op.Gemm(a, b, c, _allow_other_inputs=False, _outputs=["gemm_out"])
+
+    def check(self, context, a: ir.Value, b: ir.Value, c: ir.Value, gemm_out: ir.Value, **_) -> MatchResult:
+        """Check if the bias (c) is present and is all zeros."""
+        del context  # Unused
+        check_result = MatchResult()
+
+        # Check if bias is a constant/initializer
+        if c.const_value is None:
+            return check_result.fail("Bias is not a constant/initializer.")
+
+        # Check if bias is all zeros
+        bias_array = c.const_value.numpy()
+        if not np.allclose(bias_array, 0.0, atol=1e-8):
+            return check_result.fail("Bias is not all zeros.")
+
+        return check_result
+
+    def rewrite(self, op: ir.tape.Tape, a: ir.Value, b: ir.Value, c: ir.Value, gemm_out: ir.Value) -> ir.Value:
+        """Remove the bias input from the operation."""
+        # Get the Gemm node that produced gemm_out to access its attributes
+        gemm_node = gemm_out.producer()
+
+        # Create new Gemm with preserved attributes but without bias
+        return op.op(
+            "Gemm",
+            inputs=[a, b],  # Remove bias input
+            attributes=gemm_node.attributes,
+            domain=gemm_node.domain,
+        )
+
+
+# Create rule instances
+remove_zero_bias_from_conv_rule = RemoveZeroBiasFromConv().rule()
+remove_zero_bias_from_conv_transpose_rule = RemoveZeroBiasFromConvTranspose().rule()
+remove_zero_bias_from_qlinear_conv_rule = RemoveZeroBiasFromQLinearConv().rule()
+remove_zero_bias_from_gemm_rule = RemoveZeroBiasFromGemm().rule()
+
+rules = RewriteRuleSet([
+    remove_zero_bias_from_conv_rule,
+    remove_zero_bias_from_conv_transpose_rule,
+    remove_zero_bias_from_qlinear_conv_rule,
+    remove_zero_bias_from_gemm_rule,
+])
diff --git a/onnxscript/rewriter/rules/common/_remove_zero_bias_test.py b/onnxscript/rewriter/rules/common/_remove_zero_bias_test.py
@@ -0,0 +1,87 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Tests for removing zero bias from Conv and related operations."""
+
+import onnx
+import onnx.parser
+import onnx_ir as ir
+
+from onnxscript.rewriter.rules.common._remove_zero_bias import (
+    remove_zero_bias_from_conv_rule,
+)
+
+
+def test_remove_zero_bias_from_conv():
+    """Test that zero bias is removed from Conv operations."""
+    # Create a simple Conv with zero bias using ONNX parser
+    model_proto = onnx.parser.parse_model(
+        """
+        <ir_version: 7, opset_import: [ "" : 17]>
+        agraph (float[1, 2, 4, 4] x) => (float[1, 2, 2, 2] y)
+        {
+            weight = Constant <value = float[2, 2, 3, 3] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36}>()
+            bias = Constant <value = float[2] {0, 0}>()
+            y = Conv(x, weight, bias)
+        }
+        """
+    )
+
+    # Convert to IR model
+    model = ir.serde.deserialize_model(model_proto)
+
+    # Apply the rule
+    count = remove_zero_bias_from_conv_rule.apply_to_model(model)
+
+    # Check that the rule was applied
+    assert count == 1, f"Expected 1 application, got {count}"
+
+    # Check that bias input was removed
+    conv_node = None
+    for node in model.graph:
+        if node.op_type == "Conv":
+            conv_node = node
+            break
+
+    assert conv_node is not None, "Conv node not found"
+    assert len(conv_node.inputs) == 2, f"Expected 2 inputs after optimization, got {len(conv_node.inputs)}"
+
+
+def test_conv_with_non_zero_bias_unchanged():
+    """Test that Conv with non-zero bias is not modified."""
+    # Create a Conv with non-zero bias using ONNX parser
+    model_proto = onnx.parser.parse_model(
+        """
+        <ir_version: 7, opset_import: [ "" : 17]>
+        agraph (float[1, 2, 4, 4] x) => (float[1, 2, 2, 2] y)
+        {
+            weight = Constant <value = float[2, 2, 3, 3] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36}>()
+            bias = Constant <value = float[2] {1, 1}>()
+            y = Conv(x, weight, bias)
+        }
+        """
+    )
+
+    # Convert to IR model
+    model = ir.serde.deserialize_model(model_proto)
+
+    # Apply the rule
+    count = remove_zero_bias_from_conv_rule.apply_to_model(model)
+
+    # Check that the rule was NOT applied
+    assert count == 0, f"Expected 0 applications, got {count}"
+
+    # Check that bias input is still present
+    conv_node = None
+    for node in model.graph:
+        if node.op_type == "Conv":
+            conv_node = node
+            break
+
+    assert conv_node is not None, "Conv node not found"
+    assert len(conv_node.inputs) == 3, f"Expected 3 inputs, got {len(conv_node.inputs)}"
+
+
+if __name__ == "__main__":
+    test_remove_zero_bias_from_conv()
+    test_conv_with_non_zero_bias_unchanged()
+    print("All tests passed!")
diff --git a/tools/ir/model_zoo_test/model_zoo_test.py b/tools/ir/model_zoo_test/model_zoo_test.py
@@ -26,7 +26,7 @@
 from onnxscript import ir
 
 
-def test_model(model_info: hub.ModelInfo) -> float:
+def validate_model(model_info: hub.ModelInfo) -> float:
     model_name = model_info.model
     with tempfile.TemporaryDirectory() as temp_dir, contextlib.redirect_stdout(None):
         # For parallel testing, this must be in a separate process because hub.set_dir
@@ -58,7 +58,7 @@ def run_one_test(model_info: hub.ModelInfo) -> tuple[str, str | None]:
     model_path = model_info.model_path
     message = f"\n----Testing: {model_name} @ {model_path}----"
     try:
-        time_passed = test_model(model_info)
+        time_passed = validate_model(model_info)
         message += green(f"\n[PASS]: {model_name} roundtrip test passed.")
     except Exception as e:  # pylint: disable=broad-exception-caught
         time_passed = -1