Merge branch 'main' into justinchu/fix-slice-rewrite

justinchuby · web-flow · commit 6ecebdc115e8 · 2025-09-30T14:39:16.000-07:00
diff --git a/docs/api/optimizer.md b/docs/api/optimizer.md
@@ -15,5 +15,4 @@
     optimizer.inline
     optimizer.basic_constant_propagation
     optimizer.fold_constants
-    optimizer.remove_unused_nodes
 ```
diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -162,9 +162,15 @@ def aten_acosh(self: TFloat) -> TFloat:
 
 
 @torch_op(("aten::add.Tensor", "aten::add.Scalar", "_operator::add"), trace_only=True)
-def aten_add(self: TReal, other: TReal, alpha: float = 1.0) -> TReal:
+def aten_add(self: TTensor, other: TTensor, alpha: float = 1.0) -> TTensor:
     """add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor"""
-    # TODO(microsoft/onnxruntime#15977): Improve fp16 precision
+
+    if self.dtype == ir.DataType.BOOL:
+        # alpha can also be bool
+        if alpha == 0:
+            return op.Identity(self)
+        return op.Or(self, other)
+
     if alpha != 1.0:
         alpha = op.CastLike(alpha, other)
         other = op.Mul(other, alpha)
@@ -1237,11 +1243,16 @@ def aten_binomial(
     ),
     trace_only=True,
 )
-def aten_bitwise_and(self: TInt, other: TInt) -> TInt:
+def aten_bitwise_and(self: TTensor, other: TTensor) -> TTensor:
     """bitwise_and.Tensor(Tensor self, Tensor other) -> Tensor"""
-    # logical_and implements the BOOL variant
 
-    return op.BitwiseAnd(self, other)
+    assert self.dtype == other.dtype
+
+    if self.dtype.is_integer():
+        return op.BitwiseAnd(self, other)
+    if self.dtype == ir.DataType.BOOL:
+        return op.And(self, other)
+    raise NotImplementedError(f"Not implemented for types {self.dtype} and {other.dtype}")
 
 
 @torch_op(
@@ -1329,11 +1340,14 @@ def aten_bitwise_left_shift_int8(self: INT8, other: INT8) -> INT8:
 
 
 @torch_op("aten::bitwise_not", trace_only=True)
-def aten_bitwise_not(self: TInt) -> TInt:
+def aten_bitwise_not(self: TTensor) -> TTensor:
     """bitwise_not(Tensor self) -> Tensor"""
-    # logical_not implements the BOOL variant
 
-    return op.BitwiseNot(self)
+    if self.dtype == ir.DataType.BOOL:
+        return op.Not(self)
+    if self.dtype.is_integer():
+        return op.BitwiseNot(self)
+    raise NotImplementedError(f"Not implemented for type {self.dtype}")
 
 
 @torch_op(
@@ -1345,11 +1359,16 @@ def aten_bitwise_not(self: TInt) -> TInt:
     ),
     trace_only=True,
 )
-def aten_bitwise_or(self: TInt, other: TInt) -> TInt:
+def aten_bitwise_or(self: TTensor, other: TTensor) -> TTensor:
     """bitwise_or.Tensor(Tensor self, Tensor other) -> Tensor"""
-    # logical_or implements the BOOL variant
 
-    return op.BitwiseOr(self, other)
+    assert self.dtype == other.dtype
+
+    if self.dtype.is_integer():
+        return op.BitwiseOr(self, other)
+    if self.dtype == ir.DataType.BOOL:
+        return op.Or(self, other)
+    raise NotImplementedError(f"Not implemented for types {self.dtype} and {other.dtype}")
 
 
 @torch_op(
@@ -1487,11 +1506,15 @@ def aten_bitwise_right_shift_int8(self: INT8, other: INT8) -> INT8:
     ),
     trace_only=True,
 )
-def aten_bitwise_xor(self: TInt, other: TInt) -> TInt:
+def aten_bitwise_xor(self: TTensor, other: TTensor) -> TTensor:
     """bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor"""
-    # logical_xor implements the BOOL variant
+    assert self.dtype == other.dtype
 
-    return op.BitwiseXor(self, other)
+    if self.dtype.is_integer():
+        return op.BitwiseXor(self, other)
+    if self.dtype == ir.DataType.BOOL:
+        return op.Xor(self, other)
+    raise NotImplementedError(f"Not implemented for types {self.dtype} and {other.dtype}")
 
 
 @torch_op("aten::blackman_window", trace_only=True)
@@ -5010,58 +5033,46 @@ def aten_logdet(self: TFloat) -> TFloat:
     return op.Log(op.Det(self))
 
 
-@torch_op(
-    (
-        "aten::logical_and",
-        "aten::bitwise_and.Tensor",
-        "aten::bitwise_and.Scalar",
-        "aten::bitwise_and.Scalar_Tensor",
-    ),
-    trace_only=True,
-)
-def aten_logical_and(self: BOOL, other: BOOL) -> BOOL:
+@torch_op("aten::logical_and", trace_only=True)
+def aten_logical_and(self: TTensor, other: TTensor) -> BOOL:
     """logical_and(Tensor self, Tensor other) -> Tensor"""
 
-    return op.And(self, other)
+    assert self.dtype == other.dtype
+
+    if self.dtype == ir.DataType.BOOL:
+        return op.And(self, other)
+    return op.And(op.Cast(self, to=BOOL.dtype), op.Cast(other, to=BOOL.dtype))
 
 
-@torch_op(("aten::logical_not", "aten::bitwise_not"), trace_only=True)
-def aten_logical_not(self: BOOL) -> BOOL:
+@torch_op("aten::logical_not", trace_only=True)
+def aten_logical_not(self: TTensor) -> BOOL:
     """logical_not(Tensor self) -> Tensor"""
 
-    return op.Not(self)
+    if self.dtype == ir.DataType.BOOL:
+        return op.Not(self)
+    return op.Not(op.Cast(self, to=BOOL.dtype))
 
 
-@torch_op(
-    (
-        "aten::logical_or",
-        "aten::bitwise_or.Tensor",
-        "aten::bitwise_or.Scalar",
-        "aten::bitwise_or.Scalar_Tensor",
-        "aten::add.Tensor",
-        "aten::add.Scalar",
-    ),
-    trace_only=True,
-)
-def aten_logical_or(self: BOOL, other: BOOL) -> BOOL:
+@torch_op("aten::logical_or", trace_only=True)
+def aten_logical_or(self: TTensor, other: TTensor) -> BOOL:
     """logical_or(Tensor self, Tensor other) -> Tensor"""
 
-    return op.Or(self, other)
+    assert self.dtype == other.dtype
 
+    if self.dtype == ir.DataType.BOOL:
+        return op.Or(self, other)
+    return op.Or(op.Cast(self, to=BOOL.dtype), op.Cast(other, to=BOOL.dtype))
 
-@torch_op(
-    (
-        "aten::logical_xor",
-        "aten::bitwise_xor.Tensor",
-        "aten::bitwise_xor.Scalar",
-        "aten::bitwise_xor.Scalar_Tensor",
-    ),
-    trace_only=True,
-)
-def aten_logical_xor(self: BOOL, other: BOOL) -> BOOL:
+
+@torch_op("aten::logical_xor", trace_only=True)
+def aten_logical_xor(self: TTensor, other: TTensor) -> BOOL:
     """logical_xor(Tensor self, Tensor other) -> Tensor"""
 
-    return op.Xor(self, other)
+    assert self.dtype == other.dtype
+
+    if self.dtype == ir.DataType.BOOL:
+        return op.Xor(self, other)
+    return op.Xor(op.Cast(self, to=BOOL.dtype), op.Cast(other, to=BOOL.dtype))
 
 
 @torch_op("aten::logit", private=True)
diff --git a/onnxscript/optimizer/__init__.py b/onnxscript/optimizer/__init__.py
@@ -19,12 +19,8 @@
 
 import onnxscript.optimizer._constant_folding as constant_folding
 from onnxscript import ir
-from onnxscript.optimizer._constant_folding import (
-    basic_constant_propagation,
-)
-from onnxscript.optimizer._constant_folding import (
-    fold_constants as fold_constants_ir,
-)
+from onnxscript.optimizer._constant_folding import basic_constant_propagation
+from onnxscript.optimizer._constant_folding import fold_constants as fold_constants_ir
 from onnxscript.optimizer._optimizer import optimize_ir
 
 _ModelProtoOrIr = TypeVar("_ModelProtoOrIr", onnx.ModelProto, ir.Model)
diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py
@@ -5,6 +5,13 @@
 
 from __future__ import annotations
 
+__all__ = [
+    "basic_constant_propagation",
+    "fold_constants",
+    "FoldConstantsPass",
+    "FOLDED_FROM_KEY",
+]
+
 import dataclasses
 import logging
 import math
@@ -23,6 +30,9 @@
 
 DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 512 * 512
 
+# Key used to store the metadata
+FOLDED_FROM_KEY = "pkg.onnxscript.optimizer.folded_from"
+
 
 _NON_DETERMINISTIC_OPS = frozenset(
     {
@@ -491,9 +501,7 @@ def cast(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
     # should handle this. Only the optimization to eliminate redundant Cast ops
     # should be needed here.
 
-    input_shape = input.shape
-    if input_shape is not None:
-        output.shape = input_shape.copy()
+    output.shape = _merge_shapes(output.shape, input.shape)
 
     input_dtype = _get_input_element_type(node, 0)
     output_dtype = _get_int_attribute(node, "to", None)
@@ -600,6 +608,9 @@ def identity(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
     input = node.inputs[0]
     output = node.outputs[0]
     if input is not None and output is not None:
+        input.shape = _merge_shapes(input.shape, output.shape)
+        if input.type is None:
+            input.type = output.type
         state.set_sym_value(output, input)
     return None
 
@@ -914,6 +925,24 @@ def merge_dims(dim1, dim2):
     return ir.Shape([merge_dims(dim1, dim2) for dim1, dim2 in zip(shape1, shape2)])
 
 
+def _record_contributing_values(original_node: ir.Node, replacement: Replacement) -> None:
+    """Record the set of original input values that contributed to the constant-folded outputs."""
+    folded_from: set[str] = set()
+    for input in original_node.inputs:
+        if input is None:
+            continue
+        folded_from.update(input.meta.get(FOLDED_FROM_KEY, set()))
+        assert input.name is not None
+        folded_from.add(input.name)
+
+    for new_output in replacement.new_outputs:
+        if new_output is None:
+            continue
+        new_output.meta[FOLDED_FROM_KEY] = folded_from
+        # Store the string representation of the set to metadata_props to persist it across serialization
+        new_output.metadata_props[FOLDED_FROM_KEY] = repr(sorted(folded_from))
+
+
 class FoldConstantsPass(ir.passes.InPlacePass):
     """A pass that folds constant expressions in the model.
 
@@ -1203,9 +1232,14 @@ def convert(av):
             )
         return None
 
-    def replace_node(self, node: ir.Node, replacement, root: ir.Graph | ir.Function) -> None:
+    def replace_node(
+        self, node: ir.Node, replacement: Replacement, root: ir.Graph | ir.Function
+    ) -> None:
         logger.debug("Replacing node: %s::%s %s", node.domain, node.op_type, node.name)
 
+        # Record the names of the values that has contributed to the replacement
+        _record_contributing_values(node, replacement)
+
         ir.convenience.replace_nodes_and_values(
             root, node, [node], replacement.new_nodes, node.outputs, replacement.new_outputs
         )
diff --git a/onnxscript/rewriter/_ir_utils.py b/onnxscript/rewriter/_ir_utils.py
@@ -78,23 +78,34 @@ def get_numpy_value(val: ir.Value | None) -> np.ndarray | None:
     return None
 
 
-def get_singleton_value(val: ir.Value | None, rank: int | None = None):
+def get_singleton_value(val: ir.Value | None, rank: int | Sequence[int] | None = None):
     """Returns element of a single element tensor constant value, and None otherwise.
 
-    If rank is specified, it checks that the value has the given rank.
+    If an int rank is specified, it checks that the value has the given rank.
+    If the rank is a sequence of ints, it checks that the value has one of the given ranks.
+
+    Thus, `rank=0` checks for a scalar, `rank=1` checks for a 1D tensor, and
+    `rank=(0,1)` checks for either a scalar or a 1D tensor.
     """
     np_val = get_numpy_value(val)
     if np_val is not None and np_val.size == 1:
-        if rank is None or (np_val.ndim == rank):
-            return np_val.item()
+        value = np_val.item()
+        if (rank is None) or (isinstance(rank, int) and (np_val.ndim == rank)):
+            return value
+        if isinstance(rank, Sequence) and (np_val.ndim in rank):
+            return value
     return None
 
 
 def is_singleton_value(
-    val: ir.Value | None, expected: float | int | Callable, *, rtol: float | None = None
+    val: ir.Value | None,
+    expected: float | int | Callable,
+    *,
+    rtol: float | None = None,
+    rank: int | Sequence[int] | None = None,
 ) -> bool:
     """Returns True if the value is a single element tensor with given value, and False otherwise."""
-    scalar = get_singleton_value(val)
+    scalar = get_singleton_value(val, rank=rank)
     if scalar is None:
         return False
     if callable(expected):
diff --git a/onnxscript/rewriter/rules/fusion/_rotary_embedding.py b/onnxscript/rewriter/rules/fusion/_rotary_embedding.py
@@ -43,19 +43,9 @@ def pattern(self, op, x, freqs, start1, end1, start2, end2, one1, one2):
     def check(self, op, x, start1, end1, start2, end2, one1, one2, **_) -> pattern.MatchResult:  # type: ignore[name-defined]
         check_result = pattern.MatchResult()
 
-        def is_one(val):
-            """Check if val is a 0/1 dimensional tensor with a single element equal to 1."""
-            np_val = _ir_utils.get_numpy_value(val)
-            return (
-                np_val is not None
-                and np_val.size == 1
-                and np_val.ndim <= 1
-                and np_val.item() == 1
-            )
-
-        if not is_one(one1):
+        if not _ir_utils.is_singleton_value(one1, 1):
             return check_result.fail("Unsqueeze axes is not [1]", one1)
-        if not is_one(one2):
+        if not _ir_utils.is_singleton_value(one2, 1):
             return check_result.fail("Unsqueeze axes is not [1]", one2)
 
         # x needs to be a 4D tensor with known last dimension size (== head_size) and known second dimension (num_heads)
diff --git a/tests/function_libs/torch_lib/ops_test_data.py b/tests/function_libs/torch_lib/ops_test_data.py
@@ -1631,6 +1631,10 @@ def _where_input_wrangler(
         dtypes=(torch.float32 if sys.platform != "linux" else torch.complex64,),
         reason="fixme: test is unstable on macosx, windows",
     ),
+    TorchLibOpInfo("logical_and", core_ops.aten_logical_and),
+    TorchLibOpInfo("logical_not", core_ops.aten_logical_not),
+    TorchLibOpInfo("logical_or", core_ops.aten_logical_or),
+    TorchLibOpInfo("logical_xor", core_ops.aten_logical_xor),
     TorchLibOpInfo("logit", core_ops.aten_logit, tolerance={torch.float16: (1e-1, 7e-4)}),
     TorchLibOpInfo("max_dim", core_ops.aten_max_dim)
     .xfail(