add factor scalars

snonk · snonk · commit 9bfcbae5dccb · 2025-11-22T12:49:23.000-06:00
diff --git a/src/enzyme_ad/jax/Passes/EnzymeHLOOpt.cpp b/src/enzyme_ad/jax/Passes/EnzymeHLOOpt.cpp
@@ -21701,6 +21701,94 @@ struct GatherElementwise
   }
 };
 
+struct FactorScalarsInDotGeneral final
+    : public CheckedOpRewritePattern<stablehlo::DotGeneralOp,
+                                     FactorScalarsInDotGeneral> {
+  using CheckedOpRewritePattern<
+      stablehlo::DotGeneralOp,
+      FactorScalarsInDotGeneral>::CheckedOpRewritePattern;
+
+  LogicalResult matchAndRewriteImpl(stablehlo::DotGeneralOp op,
+                                    PatternRewriter &rewriter) const {
+    auto lhs = op.getLhs();
+    auto rhs = op.getRhs();
+
+    // From v, extract scalar * tensor, and return true if the operation is not
+    // used elsewhere.
+    auto extractMul = [&](Value v, Value &scalar, Value &z) -> bool {
+      auto mulOp = v.getDefiningOp<stablehlo::MulOp>();
+      if (!mulOp) { // set default scalar to 1
+        scalar = nullptr;
+        z = v;
+        return true;
+      }
+      if (!isOnlyUsedInOperation(mulOp, op)) {
+        return false;
+      }
+
+      Value mLhs = mulOp.getLhs();
+      Value mRhs = mulOp.getRhs();
+
+      SplatElementsAttr splatAttr;
+      auto mLhsIsSplat = matchPattern(mLhs, m_Constant(&splatAttr));
+      auto mRhsIsSplat = matchPattern(mRhs, m_Constant(&splatAttr));
+
+      if (mLhsIsSplat) {
+        scalar = mLhs;
+        z = mRhs;
+      } else if (mRhsIsSplat) {
+        scalar = mRhs;
+        z = mLhs;
+      } else {
+        // If both are non-scalar, treat whole v as Z, no scalar
+        scalar = nullptr;
+        z = v;
+      }
+      return true;
+    };
+
+    Value lhsScalar, lhsZ;
+    Value rhsScalar, rhsZ;
+
+    if (!extractMul(lhs, lhsScalar, lhsZ) || !extractMul(rhs, rhsScalar, rhsZ))
+      return failure();
+
+    if (!lhsScalar && !rhsScalar) { // nothing to do
+      return failure();
+    }
+
+    auto rhsZT = rhsZ.getDefiningOp<stablehlo::TransposeOp>();
+    auto lhsZT = lhsZ.getDefiningOp<stablehlo::TransposeOp>();
+    if (lhsZ == rhsZ || rhsZT && rhsZT.getOperand() == lhsZ ||
+        lhsZT && lhsZT.getOperand() == rhsZ) {
+      auto precision =
+          op.getPrecisionConfig().value_or(stablehlo::PrecisionConfigAttr());
+      auto algorithm =
+          op.getAlgorithm().value_or(stablehlo::DotAlgorithmAttr());
+
+      auto newDot = rewriter.create<stablehlo::DotGeneralOp>(
+          op.getLoc(), op.getType(), lhsZ, rhsZ, op.getDotDimensionNumbers(),
+          precision, algorithm);
+
+      Value combinedScalar;
+      if (lhsScalar && rhsScalar) {
+        combinedScalar = rewriter.create<stablehlo::MulOp>(
+            op.getLoc(), lhsScalar, rhsScalar);
+      } else {
+        combinedScalar = lhsScalar ? lhsScalar : rhsScalar;
+      }
+
+      // Multiply (a*b) * dot_general(Z, Z)
+      Value result = rewriter.create<stablehlo::MulOp>(
+          op.getLoc(), combinedScalar, newDot.getResult());
+      rewriter.replaceOp(op, result);
+      return success();
+    }
+
+    return failure();
+  }
+};
+
 struct ChainedMultiplyToPower final
     : public CheckedOpRewritePattern<stablehlo::MulOp, ChainedMultiplyToPower> {
   using CheckedOpRewritePattern<
@@ -26164,6 +26252,7 @@ struct EnzymeHLOOptPass
         (no_nan || all_finite), context);
 
     patterns.add<TransposeSymmetricSimplify>(context);
+    patterns.add<FactorScalarsInDotGeneral>(context);
 
     // clang-format off
     patterns.add<
diff --git a/src/enzyme_ad/jax/TransformOps/TransformOps.td b/src/enzyme_ad/jax/TransformOps/TransformOps.td
@@ -604,6 +604,11 @@ def ApplyTransposeSymmetricSimplify : EnzymeHLOPatternOp<
   let patterns = ["TransposeSymmetricSimplify"];
 }
 
+def ApplyFactorScalarsInDotGeneral : EnzymeHLOPatternOp<
+    "factor_scalars_in_dot_general"> {
+  let patterns = ["FactorScalarsInDotGeneral"];
+}
+
 def ApplyTransposeElementwisePatterns : EnzymeHLOParameterizedPatternOp<
     "transpose_elementwise"> {
   let arguments = (ins OptionalAttr<I64Attr>:$benefit, BoolAttr:$parameter);
diff --git a/test/lit_tests/structured_tensors/factor_scalars.mlir b/test/lit_tests/structured_tensors/factor_scalars.mlir
@@ -0,0 +1,68 @@
+// RUN: enzymexlamlir-opt --enzyme-hlo-generate-td="patterns=factor_scalars_in_dot_general" --transform-interpreter --enzyme-hlo-remove-transform %s | FileCheck %s
+
+func.func @pass1(%arg0: tensor<10x10xf64> {enzymexla.memory_effects = []}) -> tensor<10x10xf64> attributes {enzymexla.memory_effects = []} {
+    %0 = stablehlo.constant dense<4.0> : tensor<10x10xf64>
+    %1 = stablehlo.multiply %0, %arg0 : tensor<10x10xf64>
+    %2 = stablehlo.dot_general %1, %arg0, contracting_dims = [0] x [0], precision = [DEFAULT, DEFAULT] : (tensor<10x10xf64>, tensor<10x10xf64>) -> tensor<10x10xf64>
+    return %2 : tensor<10x10xf64>
+}
+
+// CHECK: func.func @pass1(%arg0: tensor<10x10xf64> {enzymexla.memory_effects = []}) -> tensor<10x10xf64> attributes {enzymexla.memory_effects = []} {
+// CHECK-NEXT:   %cst = stablehlo.constant dense<4.000000e+00> : tensor<10x10xf64>
+// CHECK-NEXT:   %0 = stablehlo.dot_general %arg0, %arg0, contracting_dims = [0] x [0], precision = [DEFAULT, DEFAULT] : (tensor<10x10xf64>, tensor<10x10xf64>) -> tensor<10x10xf64>
+// CHECK-NEXT:   %1 = stablehlo.multiply %cst, %0 : tensor<10x10xf64>
+// CHECK-NEXT:   return %1 : tensor<10x10xf64>
+// CHECK-NEXT: }
+
+func.func @pass2(%arg0: tensor<10x10xf64> {enzymexla.memory_effects = []}) -> tensor<10x10xf64> attributes {enzymexla.memory_effects = []} {
+    %0 = stablehlo.constant dense<4.0> : tensor<10x10xf64>
+    %1 = stablehlo.multiply %0, %arg0 : tensor<10x10xf64>
+    %2 = stablehlo.constant dense<2.0> : tensor<10x10xf64>
+    %3 = stablehlo.multiply %0, %arg0 : tensor<10x10xf64>
+    %4 = stablehlo.dot_general %1, %3, contracting_dims = [1] x [0], precision = [DEFAULT, DEFAULT] : (tensor<10x10xf64>, tensor<10x10xf64>) -> tensor<10x10xf64>
+    return %4 : tensor<10x10xf64>
+}
+
+// CHECK: func.func @pass2(%arg0: tensor<10x10xf64> {enzymexla.memory_effects = []}) -> tensor<10x10xf64> attributes {enzymexla.memory_effects = []} {
+// CHECK-NEXT:     %cst = stablehlo.constant dense<4.000000e+00> : tensor<10x10xf64>
+// CHECK-NEXT:     %0 = stablehlo.dot_general %arg0, %arg0, contracting_dims = [1] x [0], precision = [DEFAULT, DEFAULT] : (tensor<10x10xf64>, tensor<10x10xf64>) -> tensor<10x10xf64>
+// CHECK-NEXT:     %1 = stablehlo.multiply %cst, %cst : tensor<10x10xf64>
+// CHECK-NEXT:     %2 = stablehlo.multiply %1, %0 : tensor<10x10xf64>
+// CHECK-NEXT:     return %2 : tensor<10x10xf64>
+// CHECK-NEXT: }
+
+func.func @pass3(%arg0: tensor<10x10xf64> {enzymexla.memory_effects = []}) -> tensor<10x10xf64> attributes {enzymexla.memory_effects = []} {
+    %0 = stablehlo.constant dense<4.0> : tensor<10x10xf64>
+    %1 = stablehlo.multiply %0, %arg0 : tensor<10x10xf64>
+    %2 = stablehlo.constant dense<2.0> : tensor<10x10xf64>
+    %3 = stablehlo.transpose %arg0, dims = [1, 0] : (tensor<10x10xf64>) -> tensor<10x10xf64>
+    %4 = stablehlo.multiply %2, %3 : tensor<10x10xf64>
+    %5 = stablehlo.dot_general %1, %4, contracting_dims = [1] x [0], precision = [DEFAULT, DEFAULT] : (tensor<10x10xf64>, tensor<10x10xf64>) -> tensor<10x10xf64>
+    return %5 : tensor<10x10xf64>
+}
+
+//CHECK: func.func @pass3(%arg0: tensor<10x10xf64> {enzymexla.memory_effects = []}) -> tensor<10x10xf64> attributes {enzymexla.memory_effects = []} {
+//CHECK-NEXT:   %cst = stablehlo.constant dense<2.000000e+00> : tensor<10x10xf64>
+//CHECK-NEXT:   %cst_0 = stablehlo.constant dense<4.000000e+00> : tensor<10x10xf64>
+//CHECK-NEXT:   %0 = stablehlo.transpose %arg0, dims = [1, 0] : (tensor<10x10xf64>) -> tensor<10x10xf64>
+//CHECK-NEXT:   %1 = stablehlo.dot_general %arg0, %0, contracting_dims = [1] x [0], precision = [DEFAULT, DEFAULT] : (tensor<10x10xf64>, tensor<10x10xf64>) -> tensor<10x10xf64>
+//CHECK-NEXT:   %2 = stablehlo.multiply %cst_0, %cst : tensor<10x10xf64>
+//CHECK-NEXT:   %3 = stablehlo.multiply %2, %1 : tensor<10x10xf64>
+//CHECK-NEXT:   return %3 : tensor<10x10xf64>
+//CHECK-NEXT: }
+
+func.func @fail1(%arg0: tensor<10x10xf64> {enzymexla.memory_effects = []}) -> tensor<10x10xf64> attributes {enzymexla.memory_effects = []} {
+    %0 = stablehlo.constant dense<4.0> : tensor<10x10xf64>
+    %1 = stablehlo.multiply %0, %arg0 : tensor<10x10xf64>
+    %2 = stablehlo.dot_general %1, %arg0, contracting_dims = [0] x [0], precision = [DEFAULT, DEFAULT] : (tensor<10x10xf64>, tensor<10x10xf64>) -> tensor<10x10xf64>
+    %3 = stablehlo.add %2, %1 : tensor<10x10xf64>
+    return %3 : tensor<10x10xf64>
+}
+
+// CHECK:  func.func @fail1(%arg0: tensor<10x10xf64> {enzymexla.memory_effects = []}) -> tensor<10x10xf64> attributes {enzymexla.memory_effects = []} {
+// CHECK-NEXT:    %cst = stablehlo.constant dense<4.000000e+00> : tensor<10x10xf64>
+// CHECK-NEXT:    %0 = stablehlo.multiply %cst, %arg0 : tensor<10x10xf64>
+// CHECK-NEXT:    %1 = stablehlo.dot_general %0, %arg0, contracting_dims = [0] x [0], precision = [DEFAULT, DEFAULT] : (tensor<10x10xf64>, tensor<10x10xf64>) -> tensor<10x10xf64>
+// CHECK-NEXT:    %2 = stablehlo.add %1, %0 : tensor<10x10xf64>
+// CHECK-NEXT:    return %2 : tensor<10x10xf64>
+// CHECK-NEXT:  }