llvm · vinayakdsci · Jun 18, 2024 · ScottTodd · Jun 20, 2024 · vinayakdsci
diff --git a/include/torch-mlir/Conversion/TorchOnnxToTorch/Utils.h b/include/torch-mlir/Conversion/TorchOnnxToTorch/Utils.h
@@ -36,6 +36,10 @@ Value createConstantIntList(OpBinder binder,
                             ConversionPatternRewriter &rewriter,
                             ArrayRef<int64_t> cstInput);
 
+Value createConstantFloatList(OpBinder binder,
+                              ConversionPatternRewriter &rewriter,
+                              ArrayRef<double> cstInput);
+
 Torch::ValueTensorType getQTorchTypeFromTorchIntType(Type ty);
 
 template <typename T>

diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp
@@ -2731,4 +2731,81 @@ void mlir::torch::onnx_c::populateDefaultDomainGtoP(
                       binder.op, tensorListResultType, input);
                   return success();
                 });
+  patterns.onOp(
+      "ImageDecoder", 20,
+      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
+        Value encodedImage;
+        Torch::ValueTensorType resultType;
+        std::string pixelFormat;
+        if (binder.tensorOperand(encodedImage) ||
+            binder.tensorResultType(resultType) ||
+            binder.customOpNameStringAttr(pixelFormat, "pixel_format", "RGB"))
+          return failure();
+
+        Value zero = rewriter.create<Torch::ConstantIntOp>(
+            binder.getLoc(), rewriter.getI64IntegerAttr(0));
+        Value one = rewriter.create<Torch::ConstantIntOp>(
+            binder.getLoc(), rewriter.getI64IntegerAttr(1));
+        Value floatType = rewriter.create<Torch::ConstantIntOp>(
+            binder.getLoc(), rewriter.getI64IntegerAttr(6));
+        Value none = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
+        Value cstFalse =
+            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), false);
+
+        auto encodedImageTy =
+            cast<Torch::ValueTensorType>(encodedImage.getType());
+        auto encodedImageShape = encodedImageTy.getSizes();
+
+        Value decodedImage;
+        if (pixelFormat == "BGR") {
+          // FLip the encoded image tensor across the last dimension.
+          Value axisToFlip = createConstantIntList(binder, rewriter, {2});
+          decodedImage = rewriter.create<Torch::AtenFlipOp>(
+              binder.getLoc(), resultType, encodedImage, axisToFlip);
+        } else if (pixelFormat == "RGB") {
+          // Do nothing, as this is already the default mode.
+          decodedImage = encodedImage;
+        } else if (pixelFormat == "Grayscale") {
+          if (encodedImageShape.size() != 3 || encodedImageShape[2] != 3)
+            return rewriter.notifyMatchFailure(
+                binder.op, "An input image of shape (H,W,3) is required "
+                           "for pixel_format='Grayscale'");
+
+          // This scaling list is created based on ITU-R Rec. 601-7.
+          // These scaling factors are used by torchvision as well.
+          Value scalingList = createConstantFloatList(binder, rewriter,
+                                                      {0.2989, 0.5870, 0.1140});
+          auto scalingListTy = rewriter.getType<Torch::ValueTensorType>(
+              ArrayRef<int64_t>{3}, rewriter.getF64Type());
+          scalingList = rewriter.create<Torch::AtenTensorOp>(
+              binder.getLoc(), scalingListTy, scalingList, floatType, none,
+              cstFalse);
+
+          // Unsqueeze the list of scaling factors.
+          auto unsqueezeResultTy = rewriter.getType<Torch::ValueTensorType>(
+              ArrayRef<int64_t>{3, 1}, rewriter.getF64Type());
+          scalingList = rewriter.create<Torch::AtenUnsqueezeOp>(
+              binder.getLoc(), unsqueezeResultTy, scalingList, one);
+
+          // The input encoded image has shape (H,W,3), and the scaling list has
+          // shape (3,1). A matmul operation will output a tensor of shape
+          // (H,W,1), which after squeezing at dim=2, will be equivalent to
+          // unbinding the channels of the image, multiplying each channel by
+          // the corresponding scaling factor, and then adding the resulting
+          // tensors. We do not squeeze the tensor, to preserve the resultType.
+          decodedImage = rewriter.create<Torch::AtenMatmulOp>(
+              binder.getLoc(), resultType, encodedImage, scalingList);
+        } else {
+          return rewriter.notifyMatchFailure(
+              binder.op, "Unsupported value for pixel_format");
+        }
+
+        // Cast the decoded image to uint8 type
+        rewriter.replaceOpWithNewOp<Torch::AtenToDtypeOp>(
+            binder.op, resultType, decodedImage, /*uInt8Type=*/zero,
+            /*non_blocking=*/cstFalse, /*copy=*/cstFalse,
+            /*memory_format=*/none);
+
+        return success();
+      });
 }
diff --git a/lib/Conversion/TorchOnnxToTorch/Utils.cpp b/lib/Conversion/TorchOnnxToTorch/Utils.cpp
@@ -28,6 +28,20 @@ Value mlir::torch::onnx_c::createConstantIntList(
       cstValue);
 }
 
+Value mlir::torch::onnx_c::createConstantFloatList(
+    OpBinder binder, ConversionPatternRewriter &rewriter,
+    ArrayRef<double> cstInput) {
+  SmallVector<Value> cstValue;
+  for (double i : cstInput) {
+    cstValue.push_back(rewriter.create<Torch::ConstantFloatOp>(
+        binder.getLoc(), rewriter.getF64FloatAttr(i)));
+  }
+  return rewriter.create<Torch::PrimListConstructOp>(
+      binder.getLoc(),
+      Torch::ListType::get(Torch::FloatType::get(binder.op->getContext())),
+      cstValue);
+}
+
 Torch::ValueTensorType
 mlir::torch::onnx_c::getQTorchTypeFromTorchIntType(Type ty) {
   Torch::ValueTensorType tty = dyn_cast<Torch::ValueTensorType>(ty);

diff --git a/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir b/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir
@@ -1543,3 +1543,56 @@ func.func @test_optional_get_element_tensor(%arg0: !torch.vtensor<[4],f32>) -> !
   %0 = torch.operator "onnx.OptionalGetElement"(%arg0) : (!torch.vtensor<[4],f32>) -> !torch.vtensor<[4],f32>
   return %0 : !torch.vtensor<[4],f32>
 }
+
+// -----
+
+// CHECK-LABEL: func.func @test_image_decoder_decode_jpeg_bgr
+func.func @test_image_decoder_decode_jpeg_bgr(%arg0: !torch.vtensor<[32,32,3],ui8>) -> !torch.vtensor<[32,32,3],ui8> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 20 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
+  // CHECK:  %[[INT0_0:.*]] = torch.constant.int 0
+  // CHECK:  %[[FLOATTYPE:.*]] = torch.constant.int 6
+  // CHECK:  %[[NONEVAL:.*]] = torch.constant.none
+  // CHECK:  %[[FALSEVAL:.*]] = torch.constant.bool false
+  // CHECK:  %[[INT2_0:.*]] = torch.constant.int 2
+  // CHECK:  %[[PRIMLIST:.*]] = torch.prim.ListConstruct %[[INT2_0]] : (!torch.int) -> !torch.list<int>
+  // CHECK:  %[[FLIP:.*]] = torch.aten.flip %arg0, %[[PRIMLIST]] : !torch.vtensor<[32,32,3],ui8>, !torch.list<int> -> !torch.vtensor<[32,32,3],ui8>
+  // CHECK:  %[[CAST:.*]] = torch.aten.to.dtype %[[FLIP]], %[[INT0_0]], %[[FALSEVAL]], %[[FALSEVAL]], %[[NONEVAL]] : !torch.vtensor<[32,32,3],ui8>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[32,32,3],ui8>
+  // CHECK:  return %[[CAST]] : !torch.vtensor<[32,32,3],ui8>
+  %0 = torch.operator "onnx.ImageDecoder"(%arg0) {torch.onnx.pixel_format = "BGR"} : (!torch.vtensor<[32,32,3],ui8>) -> !torch.vtensor<[32,32,3],ui8>
+  return %0 : !torch.vtensor<[32,32,3],ui8>
+}
+
+// -----
+
+// CHECK-LABEL: func.func @test_image_decoder_decode_rgb
+func.func @test_image_decoder_decode_rgb(%arg0: !torch.vtensor<[32,32,3],ui8>) -> !torch.vtensor<[32,32,3],ui8> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 20 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
+  // CHECK: %[[INT0_0:.*]] = torch.constant.int 0
+  // CHECK: %[[FLOATTYPE:.*]] = torch.constant.int 6
+  // CHECK: %[[NONEVAL:.*]] = torch.constant.none
+  // CHECK: %[[FALSEVAL:.*]] = torch.constant.bool false
+  // CHECK: %[[CAST:.*]] = torch.aten.to.dtype %arg0, %[[INT0_0]], %[[FALSEVAL]], %[[FALSEVAL]], %[[NONEVAL]] : !torch.vtensor<[32,32,3],ui8>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[32,32,3],ui8>
+  // CHECK: return %[[CAST]] : !torch.vtensor<[32,32,3],ui8>
+  %0 = torch.operator "onnx.ImageDecoder"(%arg0) {torch.onnx.pixel_format = "RGB"} : (!torch.vtensor<[32,32,3],ui8>) -> !torch.vtensor<[32,32,3],ui8>
+  return %0 : !torch.vtensor<[32,32,3],ui8>
+}
+
+// -----
+
+// CHECK-LABEL: func.func @test_image_decoder_decode_grayscale
+func.func @test_image_decoder_decode_grayscale(%arg0: !torch.vtensor<[32,32,3],ui8>) -> !torch.vtensor<[32,32,1],ui8> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 20 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
+  // CHECK: %[[INT0_0:.*]] = torch.constant.int 0
+  // CHECK: %[[INT1_0:.*]] = torch.constant.int 1
+  // CHECK: %[[FLOATTYPE:.*]] = torch.constant.int 6
+  // CHECK: %[[NONEVAL:.*]] = torch.constant.none
+  // CHECK: %[[FALSEVAL:.*]] = torch.constant.bool false
+  // CHECK: %[[RSCALE:.*]] = torch.constant.float 2.989000e-01
+  // CHECK: %[[GSCALE:.*]] = torch.constant.float 5.870000e-01
+  // CHECK: %[[BSCALE:.*]] = torch.constant.float 1.140000e-01
+  // CHECK: %[[SCALELIST:.*]] = torch.prim.ListConstruct %[[RSCALE]], %[[GSCALE]], %[[BSCALE]] : (!torch.float, !torch.float, !torch.float) -> !torch.list<float>
+  // CHECK: %[[SCALETENSOR:.*]] = torch.aten.tensor %[[SCALELIST]], %[[FLOATTYPE]], %[[NONEVAL]], %[[FALSEVAL]] : !torch.list<float>, !torch.int, !torch.none, !torch.bool -> !torch.vtensor<[3],f64>
+  // CHECK: %[[UNSQUEEZE:.*]] = torch.aten.unsqueeze %[[SCALETENSOR]], %[[INT1_0]] : !torch.vtensor<[3],f64>, !torch.int -> !torch.vtensor<[3,1],f64>
+  // CHECK: %[[MATMUL:.*]] = torch.aten.matmul %arg0, %[[UNSQUEEZE]] : !torch.vtensor<[32,32,3],ui8>, !torch.vtensor<[3,1],f64> -> !torch.vtensor<[32,32,1],ui8>
+  // CHECK: %[[CAST:.*]] = torch.aten.to.dtype %[[MATMUL]], %[[INT0_0]], %[[FALSEVAL]], %[[FALSEVAL]], %[[NONEVAL]] : !torch.vtensor<[32,32,1],ui8>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[32,32,1],ui8>
+  // CHECK: return %[[CAST]] : !torch.vtensor<[32,32,1],ui8>
+  %0 = torch.operator "onnx.ImageDecoder"(%arg0) {torch.onnx.pixel_format = "Grayscale"} : (!torch.vtensor<[32,32,3],ui8>) -> !torch.vtensor<[32,32,1],ui8>
+  return %0 : !torch.vtensor<[32,32,1],ui8>
+}