Simplify aten_bilinear implementation by leveraging N-dimensional MatMul support

Copilot · justinchuby · Copilot · commit 095967e169c6 · 2025-09-24T18:43:55.000Z
Co-authored-by: justinchuby &lt;11205048+justinchuby@users.noreply.github.com&gt;
diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -1205,51 +1205,59 @@ def aten_bilinear(
     # bias shape: (out_features) - optional
     # output shape: (..., out_features)
 
-    # Decompose bilinear into MatMul operations:
-    # 1. Create outer product of input1 and input2
-    # 2. Reshape to flatten feature dimensions
-    # 3. Use MatMul with reshaped weight
-
-    # Get shapes for reshaping
-    input1_shape = op.Shape(input1)
-    weight_shape = op.Shape(weight)
+    # Leveraging N-dimensional MatMul, we can compute this as:
+    # 1. weight @ input2.T -> [out_features, in1_features, ...batch_dims]
+    # 2. input1 @ result -> [...batch_dims, out_features]
 
     # Get dimensions
+    weight_shape = op.Shape(weight)
     out_features = op.Gather(weight_shape, 0, axis=0)
     in1_features = op.Gather(weight_shape, 1, axis=0)
     in2_features = op.Gather(weight_shape, 2, axis=0)
 
-    # Get batch dimensions (everything except the last dimension)
-    input1_rank = Rank(input1)
-    batch_dims = op.Slice(input1_shape, [0], [input1_rank - 1])
-    batch_size = op.ReduceProd(batch_dims, keepdims=False)
+    # Step 1: Reshape weight for matrix multiplication
+    # weight: [out_features, in1_features, in2_features] -> [out_features * in1_features, in2_features]
+    weight_2d = op.Reshape(weight, op.Concat([op.Mul(out_features, in1_features)], [in2_features], axis=0))
 
-    # Create outer product: input1[..., i] * input2[..., j] -> [..., i, j]
-    # Reshape inputs to [batch_size, features] for easier handling
-    input1_2d = op.Reshape(input1, op.Concat([batch_size], [in1_features], axis=0))
+    # Get input2 shape for transpose
+    input2_shape = op.Shape(input2)
+    input2_rank = Rank(input2)
+    batch_dims = op.Slice(input2_shape, [0], [input2_rank - 1])
+
+    # Reshape input2 to 2D: [...batch_dims, in2_features] -> [batch_size, in2_features]
+    batch_size = op.ReduceProd(batch_dims, keepdims=False)
     input2_2d = op.Reshape(input2, op.Concat([batch_size], [in2_features], axis=0))
 
-    # Create outer product using unsqueeze and broadcasting
-    input1_expanded = op.Unsqueeze(input1_2d, axes=[2])  # [batch_size, in1_features, 1]
-    input2_expanded = op.Unsqueeze(input2_2d, axes=[1])  # [batch_size, 1, in2_features]
+    # Transpose input2_2d: [batch_size, in2_features] -> [in2_features, batch_size]
+    input2_t = op.Transpose(input2_2d, perm=[1, 0])
+
+    # First MatMul: weight_2d @ input2_t
+    # [out_features * in1_features, in2_features] @ [in2_features, batch_size]
+    # -> [out_features * in1_features, batch_size]
+    temp = op.MatMul(weight_2d, input2_t)
 
-    # Outer product via broadcasting multiplication
-    outer_product = op.Mul(input1_expanded, input2_expanded)  # [batch_size, in1_features, in2_features]
+    # Reshape temp: [out_features * in1_features, batch_size] -> [out_features, in1_features, batch_size]
+    temp = op.Reshape(temp, op.Concat([out_features], [in1_features], [batch_size], axis=0))
 
-    # Flatten the feature dimensions
-    features_total = op.Mul(in1_features, in2_features)
-    outer_flat = op.Reshape(outer_product, op.Concat([batch_size], [features_total], axis=0))
+    # Transpose temp for second matmul: [out_features, in1_features, batch_size] -> [batch_size, in1_features, out_features]
+    temp_t = op.Transpose(temp, perm=[2, 1, 0])
+
+    # Step 2: Prepare input1 for second MatMul
+    # Reshape input1 to 2D: [...batch_dims, in1_features] -> [batch_size, in1_features]
+    input1_2d = op.Reshape(input1, op.Concat([batch_size], [in1_features], axis=0))
 
-    # Reshape weight to 2D: [out_features, in1_features * in2_features]
-    weight_2d = op.Reshape(weight, op.Concat([out_features], [features_total], axis=0))
+    # Expand input1 for batch matrix multiplication: [batch_size, in1_features] -> [batch_size, 1, in1_features]
+    input1_expanded = op.Unsqueeze(input1_2d, axes=[1])
 
-    # Transpose weight for MatMul: [in1_features * in2_features, out_features]
-    weight_t = op.Transpose(weight_2d, perm=[1, 0])
+    # Second MatMul: input1_expanded @ temp_t
+    # [batch_size, 1, in1_features] @ [batch_size, in1_features, out_features]
+    # -> [batch_size, 1, out_features]
+    result = op.MatMul(input1_expanded, temp_t)
 
-    # Matrix multiplication: [batch_size, out_features]
-    result = op.MatMul(outer_flat, weight_t)
+    # Remove singleton dimension: [batch_size, 1, out_features] -> [batch_size, out_features]
+    result = op.Squeeze(result, axes=[1])
 
-    # Reshape back to original batch dimensions + out_features
+    # Reshape back to original batch dimensions
     output_shape = op.Concat(batch_dims, [out_features], axis=0)
     result = op.Reshape(result, output_shape)