Skip to content

Commit b5387c0

Browse files
authored
[onnx] Lowering onnx.dequantize_linear to torch (#2759)
We can make the per-tensor version of the operation to the dequantize operation via marking with the make quantized tensor component. This introductions the `qint*` and `quint*` tensor type that can be lowered to teh appropriate dequantization behavior during the torch-to-linalg conversion.
1 parent bd11877 commit b5387c0

File tree

2 files changed

+96
-1
lines changed

2 files changed

+96
-1
lines changed

lib/Conversion/TorchOnnxToTorch/DefaultDomainAtoF.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,59 @@ void mlir::torch::onnx_c::populateDefaultDomainAtoF(
11561156
binder.op, resultType, transposedInput, reshapeSizesList);
11571157
return success();
11581158
});
1159+
patterns.onOp(
1160+
"DequantizeLinear", 1,
1161+
[](OpBinder binder, ConversionPatternRewriter &rewriter) {
1162+
Torch::ValueTensorType resultType;
1163+
llvm::SmallVector<Value> operands;
1164+
if (binder.tensorOperands(operands, 3) ||
1165+
binder.tensorResultType(resultType))
1166+
return failure();
1167+
1168+
Value operand = operands[0];
1169+
Value scale = operands[1];
1170+
Value zeropoint = operands[2];
1171+
1172+
auto operandTy = operand.getType().cast<Torch::ValueTensorType>();
1173+
1174+
auto scaleTy = scale.getType().dyn_cast<Torch::ValueTensorType>();
1175+
if (!scaleTy || !scaleTy.hasSizes())
1176+
return rewriter.notifyMatchFailure(binder.op, "requires known rank");
1177+
if (!resultType.hasDtype())
1178+
return rewriter.notifyMatchFailure(binder.op,
1179+
"requires known resulty dtype");
1180+
1181+
if (scaleTy.getSizes().size() == 0) {
1182+
Type qTy = operandTy.getDtype();
1183+
1184+
if (qTy.isUnsignedInteger(8)) {
1185+
qTy = rewriter.getType<Torch::QUInt8Type>();
1186+
} else if (qTy.isSignedInteger(8)) {
1187+
qTy = rewriter.getType<Torch::QInt8Type>();
1188+
} else if (qTy.isSignedInteger(32)) {
1189+
qTy = rewriter.getType<Torch::QInt32Type>();
1190+
} else {
1191+
return rewriter.notifyMatchFailure(binder.op,
1192+
"unsupported result dtype");
1193+
}
1194+
1195+
auto qTensorTy = rewriter.getType<Torch::ValueTensorType>(
1196+
resultType.getOptionalSizes(), qTy);
1197+
scale = rewriter.create<Torch::AtenItemOp>(
1198+
binder.getLoc(), rewriter.getType<Torch::FloatType>(), scale);
1199+
zeropoint = rewriter.create<Torch::AtenItemOp>(
1200+
binder.getLoc(), rewriter.getType<Torch::IntType>(), zeropoint);
1201+
1202+
auto quantize =
1203+
rewriter.create<Torch::Aten_MakePerTensorQuantizedTensorOp>(
1204+
binder.getLoc(), qTensorTy, operand, scale, zeropoint);
1205+
rewriter.replaceOpWithNewOp<Torch::AtenDequantizeSelfOp>(
1206+
binder.op, resultType, quantize);
1207+
return success();
1208+
}
1209+
1210+
return failure();
1211+
});
11591212
patterns.onOp("Div", 14,
11601213
[](OpBinder binder, ConversionPatternRewriter &rewriter) {
11611214
Torch::ValueTensorType resultType;

test/Conversion/TorchOnnxToTorch/simple_ops_a_to_f.mlir

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: torch-mlir-opt <%s -convert-torch-onnx-to-torch | FileCheck %s
1+
// RUN: torch-mlir-opt <%s --split-input-file -convert-torch-onnx-to-torch | FileCheck %s
22
// Generally, the test cases accumulated here come from running the importer
33
// over all included backend tests that involve simple ops with no model
44
// level constants. This is a pragmatic choice which lets us have a lot
@@ -438,6 +438,48 @@ func.func @test_cos(%arg0: !torch.vtensor<[3,4,5],f32>) -> !torch.vtensor<[3,4,5
438438
return %0 : !torch.vtensor<[3,4,5],f32>
439439
}
440440

441+
// -----
442+
443+
// CHECK-LABEL: @test_dequantizelinear_si8
444+
func.func @test_dequantizelinear_si8(%arg0: !torch.vtensor<[6],si8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],si8>) -> !torch.vtensor<[6],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 19 : si64} {
445+
%0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2) : (!torch.vtensor<[6],si8>, !torch.vtensor<[],f32>, !torch.vtensor<[],si8>) -> !torch.vtensor<[6],f32>
446+
// CHECK: %[[SCALE:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float
447+
// CHECK: %[[ZP:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],si8> -> !torch.int
448+
// CHECK: %[[MAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[ZP]]
449+
// CHECK: %[[DEQ:.+]] = torch.aten.dequantize.self %[[MAKE]]
450+
// CHECK: return %[[DEQ]]
451+
return %0 : !torch.vtensor<[6],f32>
452+
}
453+
454+
// -----
455+
456+
// CHECK-LABEL: @test_dequantizelinear_ui8
457+
func.func @test_dequantizelinear_ui8(%arg0: !torch.vtensor<[6],ui8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],ui8>) -> !torch.vtensor<[6],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 19 : si64} {
458+
%0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2) : (!torch.vtensor<[6],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>) -> !torch.vtensor<[6],f32>
459+
// CHECK: %[[SCALE:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float
460+
// CHECK: %[[ZP:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],ui8> -> !torch.int
461+
// CHECK: %[[MAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[ZP]]
462+
// CHECK: %[[DEQ:.+]] = torch.aten.dequantize.self %[[MAKE]]
463+
// CHECK: return %[[DEQ]]
464+
return %0 : !torch.vtensor<[6],f32>
465+
}
466+
467+
// -----
468+
469+
// CHECK-LABEL: @test_dequantizelinear_i32
470+
func.func @test_dequantizelinear_i32(%arg0: !torch.vtensor<[6],si32>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],si32>) -> !torch.vtensor<[6],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 19 : si64} {
471+
%0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2) : (!torch.vtensor<[6],si32>, !torch.vtensor<[],f32>, !torch.vtensor<[],si32>) -> !torch.vtensor<[6],f32>
472+
// CHECK: %[[SCALE:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float
473+
// CHECK: %[[ZP:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],si32> -> !torch.int
474+
// CHECK: %[[MAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[ZP]]
475+
// CHECK: %[[DEQ:.+]] = torch.aten.dequantize.self %[[MAKE]]
476+
// CHECK: return %[[DEQ]]
477+
return %0 : !torch.vtensor<[6],f32>
478+
}
479+
480+
// -----
481+
482+
441483
// CHECK-LABEL: @test_div_bcast
442484
func.func @test_div_bcast(%arg0: !torch.vtensor<[3,4,5],f32>, %arg1: !torch.vtensor<[5],f32>) -> !torch.vtensor<[3,4,5],f32> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 14 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
443485
// CHECK: torch.aten.div.Tensor %arg0, %arg1 : !torch.vtensor<[3,4,5],f32>, !torch.vtensor<[5],f32> -> !torch.vtensor<[3,4,5],f32>

0 commit comments

Comments
 (0)