none results handled

a-sidorova · a-sidorova · commit f1d59b4ad419 · 2025-11-20T15:38:24.000+04:00
diff --git a/lib/Conversion/TorchToLinalg/Linear.cpp b/lib/Conversion/TorchToLinalg/Linear.cpp
@@ -1816,11 +1816,10 @@ class ConvertAtenConvolutionBackwardOp : public OpConversionPattern<AtenConvolut
       gradInput =
         tensor::CastOp::create(rewriter, loc, getTypeConverter()->convertType(op->getResult(0).getType()), gradInput);
     } else {
-      // If the backward-weight convolution is not needed, zero init the grad_input tensor.
-      SmallVector<Value> gradInputSizes =
-        getTensorSizes(rewriter, loc, input);
-      gradInput =
-        createZeroInitTensor(rewriter, loc, gradInputSizes, inputDTy);
+      // If input gradient is not needed, we replace torch.none with a constant zero.
+      // This constant will be eliminated by DCE.
+      gradInput = arith::ConstantOp::create(rewriter, loc,
+        rewriter.getI64IntegerAttr(0));
     }
 
     // Computing Backward-Weight Convolution.
@@ -1856,6 +1855,18 @@ class ConvertAtenConvolutionBackwardOp : public OpConversionPattern<AtenConvolut
       SmallVector<AffineMap> indexingMaps;
       SmallVector<IT> iteratorTypes;
 
+      // To calculate convolution backward-weight, we use generic operation.
+      // The generic operation is a generalization of the convolution operation
+      // that can handle any number of spatial dimensions.
+      // The generic operation is defined as follows:
+      // ```
+      //   dLdw[f, g, c, k] = sum(x[n, g, c, d0 * k + s0 * o] * dLdy[n, g, f, o] for n in range(batch_size) for o in range(output_spatial_dims))
+      // ```
+      // where `n` is the batch dimension, `g` is the group dimension,
+      // `c` is the input channel dimension, `k` is the output channel dimension,
+      // `o` is the output spatial dimension, d0 is dilation, s0 is stride.
+      // `x` is the input tensor, `dLdy` is the gradient of the output tensor.
+      // `dLdw` is the weight-gradient tensor.
       if (!isGroupedConvBwd) {
         if (numSpatialDims == 1) {
           AffineExpr f, c, k, n, o;
@@ -2011,11 +2022,10 @@ class ConvertAtenConvolutionBackwardOp : public OpConversionPattern<AtenConvolut
           getTypeConverter()->convertType(op->getResult(1).getType()),
           genericRes);
     } else {
-      // If the backward-weight convolution is not needed, zero init the grad_weight tensor.
-      SmallVector<Value> gradWeightSizes =
-        getTensorSizes(rewriter, loc, weight);
-      gradWeight =
-        createZeroInitTensor(rewriter, loc, gradWeightSizes, weightDTy);
+      // If weight gradient is not needed, we replace torch.none with a constant zero.
+      // This constant will be eliminated by DCE.
+      gradWeight = arith::ConstantOp::create(rewriter, loc,
+        rewriter.getI64IntegerAttr(0));
     }
 
     // Computing Backward-Bias Convolution.
@@ -2045,10 +2055,10 @@ class ConvertAtenConvolutionBackwardOp : public OpConversionPattern<AtenConvolut
       gradBias =
         tensor::CastOp::create(rewriter, loc, getTypeConverter()->convertType(op->getResult(2).getType()), gradBias);
     } else {
-      // If the bias are not needed, zero init the grad_bias tensor.
-      // TODO FIX IT
-      SmallVector<Value> gradBiasSizes = getTensorSizes(rewriter, loc, gradOutput);
-      gradBias = createZeroInitTensor(rewriter, loc, gradBiasSizes, gradOutputDTy);
+      // If bias gradient is not needed, we replace torch.none with a constant zero.
+      // This constant will be eliminated by DCE.
+      gradBias = arith::ConstantOp::create(rewriter, loc,
+        rewriter.getI64IntegerAttr(0));
     }
 
     rewriter.replaceOp(op, {gradInput, gradWeight, gradBias});