• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/kernels/image/image_utils.h"
17 #include <opencv2/imgproc/types_c.h>
18 #include <algorithm>
19 #include <limits>
20 #include <vector>
21 #include <stdexcept>
22 #include <opencv2/imgcodecs.hpp>
23 #include "utils/ms_utils.h"
24 #include "minddata/dataset/core/cv_tensor.h"
25 #include "minddata/dataset/core/tensor.h"
26 #include "minddata/dataset/core/tensor_shape.h"
27 #include "minddata/dataset/include/dataset/constants.h"
28 #include "minddata/dataset/kernels/image/math_utils.h"
29 #include "minddata/dataset/kernels/image/resize_cubic_op.h"
30 
31 const int32_t MAX_INT_PRECISION = 16777216;  // float int precision is 16777216
32 const int32_t DEFAULT_NUM_HEIGHT = 1;
33 const int32_t DEFAULT_NUM_WIDTH = 1;
34 
35 namespace mindspore {
36 namespace dataset {
GetCVInterpolationMode(InterpolationMode mode)37 int GetCVInterpolationMode(InterpolationMode mode) {
38   switch (mode) {
39     case InterpolationMode::kLinear:
40       return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
41     case InterpolationMode::kCubic:
42       return static_cast<int>(cv::InterpolationFlags::INTER_CUBIC);
43     case InterpolationMode::kArea:
44       return static_cast<int>(cv::InterpolationFlags::INTER_AREA);
45     case InterpolationMode::kNearestNeighbour:
46       return static_cast<int>(cv::InterpolationFlags::INTER_NEAREST);
47     default:
48       return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
49   }
50 }
51 
GetCVBorderType(BorderType type)52 int GetCVBorderType(BorderType type) {
53   switch (type) {
54     case BorderType::kConstant:
55       return static_cast<int>(cv::BorderTypes::BORDER_CONSTANT);
56     case BorderType::kEdge:
57       return static_cast<int>(cv::BorderTypes::BORDER_REPLICATE);
58     case BorderType::kReflect:
59       return static_cast<int>(cv::BorderTypes::BORDER_REFLECT101);
60     case BorderType::kSymmetric:
61       return static_cast<int>(cv::BorderTypes::BORDER_REFLECT);
62     default:
63       return static_cast<int>(cv::BorderTypes::BORDER_CONSTANT);
64   }
65 }
66 
GetConvertShape(ConvertMode convert_mode,const std::shared_ptr<CVTensor> & input_cv,std::vector<dsize_t> * node)67 Status GetConvertShape(ConvertMode convert_mode, const std::shared_ptr<CVTensor> &input_cv,
68                        std::vector<dsize_t> *node) {
69   std::vector<ConvertMode> one_channels = {ConvertMode::COLOR_BGR2GRAY, ConvertMode::COLOR_RGB2GRAY,
70                                            ConvertMode::COLOR_BGRA2GRAY, ConvertMode::COLOR_RGBA2GRAY};
71   std::vector<ConvertMode> three_channels = {
72     ConvertMode::COLOR_BGRA2BGR, ConvertMode::COLOR_RGBA2RGB, ConvertMode::COLOR_RGBA2BGR, ConvertMode::COLOR_BGRA2RGB,
73     ConvertMode::COLOR_BGR2RGB,  ConvertMode::COLOR_RGB2BGR,  ConvertMode::COLOR_GRAY2BGR, ConvertMode::COLOR_GRAY2RGB};
74   std::vector<ConvertMode> four_channels = {ConvertMode::COLOR_BGR2BGRA,  ConvertMode::COLOR_RGB2RGBA,
75                                             ConvertMode::COLOR_BGR2RGBA,  ConvertMode::COLOR_RGB2BGRA,
76                                             ConvertMode::COLOR_BGRA2RGBA, ConvertMode::COLOR_RGBA2BGRA,
77                                             ConvertMode::COLOR_GRAY2BGRA, ConvertMode::COLOR_GRAY2RGBA};
78   if (std::find(three_channels.begin(), three_channels.end(), convert_mode) != three_channels.end()) {
79     *node = {input_cv->shape()[0], input_cv->shape()[1], 3};
80   } else if (std::find(four_channels.begin(), four_channels.end(), convert_mode) != four_channels.end()) {
81     *node = {input_cv->shape()[0], input_cv->shape()[1], 4};
82   } else if (std::find(one_channels.begin(), one_channels.end(), convert_mode) != one_channels.end()) {
83     *node = {input_cv->shape()[0], input_cv->shape()[1]};
84   } else {
85     RETURN_STATUS_UNEXPECTED(
86       "The mode of image channel conversion must be in ConvertMode, which mainly includes "
87       "conversion between RGB, BGR, GRAY, RGBA etc.");
88   }
89   return Status::OK();
90 }
91 
CheckTensorShape(const std::shared_ptr<Tensor> & tensor,const int & channel)92 bool CheckTensorShape(const std::shared_ptr<Tensor> &tensor, const int &channel) {
93   if (tensor == nullptr) {
94     return false;
95   }
96   bool rc = false;
97   if (tensor->shape().Size() <= channel) {
98     return false;
99   }
100   if (tensor->Rank() != DEFAULT_IMAGE_RANK ||
101       (tensor->shape()[channel] != 1 && tensor->shape()[channel] != DEFAULT_IMAGE_CHANNELS)) {
102     rc = true;
103   }
104   return rc;
105 }
106 
Flip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output,int flip_code)107 Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code) {
108   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
109 
110   if (input_cv->Rank() == 1 || input_cv->mat().dims > 2) {
111     RETURN_STATUS_UNEXPECTED("Flip: shape of input is not <H,W,C> or <H,W>, but got rank:" +
112                              std::to_string(input_cv->Rank()));
113   }
114 
115   std::shared_ptr<CVTensor> output_cv;
116   RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
117 
118   if (input_cv->mat().data) {
119     try {
120       cv::flip(input_cv->mat(), output_cv->mat(), flip_code);
121       *output = std::static_pointer_cast<Tensor>(output_cv);
122       return Status::OK();
123     } catch (const cv::Exception &e) {
124       RETURN_STATUS_UNEXPECTED("Flip: " + std::string(e.what()));
125     }
126   } else {
127     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Flip: allocate memory failed.");
128   }
129 }
130 
HorizontalFlip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)131 Status HorizontalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
132   return Flip(std::move(input), output, 1);
133 }
134 
VerticalFlip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)135 Status VerticalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
136   return Flip(std::move(input), output, 0);
137 }
138 
Resize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t output_height,int32_t output_width,double fx,double fy,InterpolationMode mode)139 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height,
140               int32_t output_width, double fx, double fy, InterpolationMode mode) {
141   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
142   if (!input_cv->mat().data) {
143     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Resize: load image failed.");
144   }
145   RETURN_IF_NOT_OK(ValidateImageRank("Resize", input_cv->Rank()));
146 
147   cv::Mat in_image = input_cv->mat();
148   const uint32_t kResizeShapeLimits = 1000;
149   // resize image too large or too small, 1000 is arbitrarily chosen here to prevent open cv from segmentation fault
150   if (output_height > in_image.rows * kResizeShapeLimits || output_width > in_image.cols * kResizeShapeLimits) {
151     std::string err_msg =
152       "Resize: the resizing width or height is too big, it's 1000 times bigger than the original image, got output "
153       "height: " +
154       std::to_string(output_height) + ", width: " + std::to_string(output_width) +
155       ", and original image size:" + std::to_string(in_image.rows) + ", " + std::to_string(in_image.cols);
156     return Status(StatusCode::kMDShapeMisMatch, err_msg);
157   }
158   if (output_height == 0 || output_width == 0) {
159     std::string err_msg = "Resize: the resizing width or height is invalid, width or height is zero.";
160     return Status(StatusCode::kMDShapeMisMatch, err_msg);
161   }
162 
163   if (mode == InterpolationMode::kCubicPil) {
164     LiteMat imIn, imOut;
165     std::shared_ptr<Tensor> output_tensor;
166     TensorShape new_shape = TensorShape({output_height, output_width, 3});
167     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input_cv->type(), &output_tensor));
168     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
169     imOut.Init(output_width, output_height, input_cv->shape()[2], reinterpret_cast<void *>(buffer), LDataType::UINT8);
170     imIn.Init(input_cv->shape()[1], input_cv->shape()[0], input_cv->shape()[2], input_cv->mat().data, LDataType::UINT8);
171     if (ResizeCubic(imIn, imOut, output_width, output_height) == false) {
172       RETURN_STATUS_UNEXPECTED("Resize: failed to do resize, please check the error msg.");
173     }
174     *output = output_tensor;
175     return Status::OK();
176   }
177   try {
178     TensorShape shape{output_height, output_width};
179     int num_channels = input_cv->shape()[CHANNEL_INDEX];
180     if (input_cv->Rank() == DEFAULT_IMAGE_RANK) shape = shape.AppendDim(num_channels);
181     std::shared_ptr<CVTensor> output_cv;
182     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
183 
184     auto cv_mode = GetCVInterpolationMode(mode);
185     cv::resize(in_image, output_cv->mat(), cv::Size(output_width, output_height), fx, fy, cv_mode);
186     *output = std::static_pointer_cast<Tensor>(output_cv);
187     return Status::OK();
188   } catch (const cv::Exception &e) {
189     RETURN_STATUS_UNEXPECTED("Resize: " + std::string(e.what()));
190   }
191 }
192 
IsNonEmptyJPEG(const std::shared_ptr<Tensor> & input)193 bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input) {
194   const unsigned char *kJpegMagic = (unsigned char *)"\xFF\xD8\xFF";
195   constexpr dsize_t kJpegMagicLen = 3;
196   return input->SizeInBytes() > kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0;
197 }
198 
IsNonEmptyPNG(const std::shared_ptr<Tensor> & input)199 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input) {
200   const unsigned char *kPngMagic = (unsigned char *)"\x89\x50\x4E\x47";
201   constexpr dsize_t kPngMagicLen = 4;
202   return input->SizeInBytes() > kPngMagicLen && memcmp(input->GetBuffer(), kPngMagic, kPngMagicLen) == 0;
203 }
204 
Decode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)205 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
206   if (IsNonEmptyJPEG(input)) {
207     return JpegCropAndDecode(input, output);
208   } else {
209     return DecodeCv(input, output);
210   }
211 }
212 
DecodeCv(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)213 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
214   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
215   if (!input_cv->mat().data) {
216     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: load image failed.");
217   }
218   try {
219     cv::Mat img_mat = cv::imdecode(input_cv->mat(), cv::IMREAD_COLOR | cv::IMREAD_IGNORE_ORIENTATION);
220     if (img_mat.data == nullptr) {
221       std::string err = "Decode: image decode failed.";
222       RETURN_STATUS_UNEXPECTED(err);
223     }
224     cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB));
225     std::shared_ptr<CVTensor> output_cv;
226     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, 3, &output_cv));
227     *output = std::static_pointer_cast<Tensor>(output_cv);
228     return Status::OK();
229   } catch (const cv::Exception &e) {
230     RETURN_STATUS_UNEXPECTED("Decode: " + std::string(e.what()));
231   }
232 }
233 
JpegInitSource(j_decompress_ptr cinfo)234 static void JpegInitSource(j_decompress_ptr cinfo) {}
235 
JpegFillInputBuffer(j_decompress_ptr cinfo)236 static boolean JpegFillInputBuffer(j_decompress_ptr cinfo) {
237   if (cinfo->src->bytes_in_buffer == 0) {
238     // Under ARM platform raise runtime_error may cause core problem,
239     // so we catch runtime_error and just return FALSE.
240     try {
241       ERREXIT(cinfo, JERR_INPUT_EMPTY);
242     } catch (std::runtime_error &e) {
243       return FALSE;
244     }
245     return FALSE;
246   }
247   return TRUE;
248 }
249 
JpegTermSource(j_decompress_ptr cinfo)250 static void JpegTermSource(j_decompress_ptr cinfo) {}
251 
JpegSkipInputData(j_decompress_ptr cinfo,int64_t jump)252 static void JpegSkipInputData(j_decompress_ptr cinfo, int64_t jump) {
253   if (jump < 0) {
254     return;
255   }
256   if (static_cast<size_t>(jump) > cinfo->src->bytes_in_buffer) {
257     cinfo->src->bytes_in_buffer = 0;
258     return;
259   } else {
260     cinfo->src->bytes_in_buffer -= jump;
261     cinfo->src->next_input_byte += jump;
262   }
263 }
264 
JpegSetSource(j_decompress_ptr cinfo,const void * data,int64_t datasize)265 void JpegSetSource(j_decompress_ptr cinfo, const void *data, int64_t datasize) {
266   cinfo->src = static_cast<struct jpeg_source_mgr *>(
267     (*cinfo->mem->alloc_small)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)));
268   cinfo->src->init_source = JpegInitSource;
269   cinfo->src->fill_input_buffer = JpegFillInputBuffer;
270 #if defined(_WIN32) || defined(_WIN64) || defined(ENABLE_ARM32) || defined(__APPLE__)
271   cinfo->src->skip_input_data = reinterpret_cast<void (*)(j_decompress_ptr, long)>(JpegSkipInputData);
272 #else
273   cinfo->src->skip_input_data = JpegSkipInputData;
274 #endif
275   cinfo->src->resync_to_restart = jpeg_resync_to_restart;
276   cinfo->src->term_source = JpegTermSource;
277   cinfo->src->bytes_in_buffer = datasize;
278   cinfo->src->next_input_byte = static_cast<const JOCTET *>(data);
279 }
280 
JpegReadScanlines(jpeg_decompress_struct * const cinfo,int max_scanlines_to_read,JSAMPLE * buffer,int buffer_size,int crop_w,int crop_w_aligned,int offset,int stride)281 static Status JpegReadScanlines(jpeg_decompress_struct *const cinfo, int max_scanlines_to_read, JSAMPLE *buffer,
282                                 int buffer_size, int crop_w, int crop_w_aligned, int offset, int stride) {
283   // scanlines will be read to this buffer first, must have the number
284   // of components equal to the number of components in the image
285   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int64_t>::max() / cinfo->output_components) > crop_w_aligned,
286                                "JpegReadScanlines: multiplication out of bounds.");
287   int64_t scanline_size = crop_w_aligned * cinfo->output_components;
288   std::vector<JSAMPLE> scanline(scanline_size);
289   JSAMPLE *scanline_ptr = &scanline[0];
290   while (cinfo->output_scanline < static_cast<unsigned int>(max_scanlines_to_read)) {
291     int num_lines_read = 0;
292     try {
293       num_lines_read = jpeg_read_scanlines(cinfo, &scanline_ptr, 1);
294     } catch (std::runtime_error &e) {
295       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: image decode failed.");
296     }
297     if (cinfo->out_color_space == JCS_CMYK && num_lines_read > 0) {
298       for (int i = 0; i < crop_w; ++i) {
299         const int cmyk_pixel = 4 * i + offset;
300         const int c = scanline_ptr[cmyk_pixel];
301         const int m = scanline_ptr[cmyk_pixel + 1];
302         const int y = scanline_ptr[cmyk_pixel + 2];
303         const int k = scanline_ptr[cmyk_pixel + 3];
304         int r, g, b;
305         if (cinfo->saw_Adobe_marker) {
306           r = (k * c) / 255;
307           g = (k * m) / 255;
308           b = (k * y) / 255;
309         } else {
310           r = (255 - c) * (255 - k) / 255;
311           g = (255 - m) * (255 - k) / 255;
312           b = (255 - y) * (255 - k) / 255;
313         }
314         buffer[3 * i + 0] = r;
315         buffer[3 * i + 1] = g;
316         buffer[3 * i + 2] = b;
317       }
318     } else if (num_lines_read > 0) {
319       int copy_status = memcpy_s(buffer, buffer_size, scanline_ptr + offset, stride);
320       if (copy_status != 0) {
321         jpeg_destroy_decompress(cinfo);
322         RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: memcpy failed.");
323       }
324     } else {
325       jpeg_destroy_decompress(cinfo);
326       std::string err_msg = "[Internal ERROR] Decode: image decode failed.";
327       RETURN_STATUS_UNEXPECTED(err_msg);
328     }
329     buffer += stride;
330     buffer_size = buffer_size - stride;
331   }
332   return Status::OK();
333 }
334 
JpegSetColorSpace(jpeg_decompress_struct * cinfo)335 static Status JpegSetColorSpace(jpeg_decompress_struct *cinfo) {
336   switch (cinfo->num_components) {
337     case 1:
338       // we want to output 3 components if it's grayscale
339       cinfo->out_color_space = JCS_RGB;
340       return Status::OK();
341     case 3:
342       cinfo->out_color_space = JCS_RGB;
343       return Status::OK();
344     case 4:
345       // Need to manually convert to RGB
346       cinfo->out_color_space = JCS_CMYK;
347       return Status::OK();
348     default:
349       jpeg_destroy_decompress(cinfo);
350       std::string err_msg = "[Internal ERROR] Decode: image decode failed.";
351       RETURN_STATUS_UNEXPECTED(err_msg);
352   }
353 }
354 
JpegErrorExitCustom(j_common_ptr cinfo)355 void JpegErrorExitCustom(j_common_ptr cinfo) {
356   char jpeg_last_error_msg[JMSG_LENGTH_MAX];
357   (*(cinfo->err->format_message))(cinfo, jpeg_last_error_msg);
358   throw std::runtime_error(jpeg_last_error_msg);
359 }
360 
JpegCropAndDecode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int crop_x,int crop_y,int crop_w,int crop_h)361 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int crop_x, int crop_y,
362                          int crop_w, int crop_h) {
363   struct jpeg_decompress_struct cinfo;
364   auto DestroyDecompressAndReturnError = [&cinfo](const std::string &err) {
365     jpeg_destroy_decompress(&cinfo);
366     RETURN_STATUS_UNEXPECTED(err);
367   };
368   struct JpegErrorManagerCustom jerr;
369   cinfo.err = jpeg_std_error(&jerr.pub);
370   jerr.pub.error_exit = JpegErrorExitCustom;
371   try {
372     jpeg_create_decompress(&cinfo);
373     JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
374     (void)jpeg_read_header(&cinfo, TRUE);
375     RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo));
376     jpeg_calc_output_dimensions(&cinfo);
377   } catch (std::runtime_error &e) {
378     return DestroyDecompressAndReturnError(e.what());
379   }
380   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_w) > crop_x,
381                                "JpegCropAndDecode: addition(crop x and crop width) out of bounds.");
382   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_h) > crop_y,
383                                "JpegCropAndDecode: addition(crop y and crop height) out of bounds.");
384   if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) {
385     crop_w = cinfo.output_width;
386     crop_h = cinfo.output_height;
387   } else if (crop_w == 0 || static_cast<unsigned int>(crop_w + crop_x) > cinfo.output_width || crop_h == 0 ||
388              static_cast<unsigned int>(crop_h + crop_y) > cinfo.output_height) {
389     return DestroyDecompressAndReturnError("Crop: invalid crop size.");
390   }
391   const int mcu_size = cinfo.min_DCT_scaled_size;
392   CHECK_FAIL_RETURN_UNEXPECTED(mcu_size != 0, "JpegCropAndDecode: divisor mcu_size is zero.");
393   unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size;
394   unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned;
395   try {
396     (void)jpeg_start_decompress(&cinfo);
397     jpeg_crop_scanline(&cinfo, &crop_x_aligned, &crop_w_aligned);
398   } catch (std::runtime_error &e) {
399     return DestroyDecompressAndReturnError(e.what());
400   }
401   JDIMENSION skipped_scanlines = jpeg_skip_scanlines(&cinfo, crop_y);
402   // three number of output components, always convert to RGB and output
403   constexpr int kOutNumComponents = 3;
404   TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
405   std::shared_ptr<Tensor> output_tensor;
406   RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
407   const int buffer_size = output_tensor->SizeInBytes();
408   JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
409   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<float_t>::max() - skipped_scanlines) > crop_h,
410                                "JpegCropAndDecode: addition out of bounds.");
411   const int max_scanlines_to_read = skipped_scanlines + crop_h;
412   // stride refers to output tensor, which has 3 components at most
413   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / crop_w) > kOutNumComponents,
414                                "JpegCropAndDecode: multiplication out of bounds.");
415   const int stride = crop_w * kOutNumComponents;
416   // offset is calculated for scanlines read from the image, therefore
417   // has the same number of components as the image
418   int minius_value = crop_x - crop_x_aligned;
419   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<float_t>::max() / minius_value) > cinfo.output_components,
420                                "JpegCropAndDecode: multiplication out of bounds.");
421   const int offset = minius_value * cinfo.output_components;
422   RETURN_IF_NOT_OK(
423     JpegReadScanlines(&cinfo, max_scanlines_to_read, buffer, buffer_size, crop_w, crop_w_aligned, offset, stride));
424   *output = output_tensor;
425   jpeg_destroy_decompress(&cinfo);
426   return Status::OK();
427 }
428 
Rescale(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float rescale,float shift)429 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift) {
430   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
431   if (!input_cv->mat().data) {
432     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Rescale: load image failed.");
433   }
434   cv::Mat input_image = input_cv->mat();
435   std::shared_ptr<CVTensor> output_cv;
436   RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv));
437   try {
438     input_image.convertTo(output_cv->mat(), CV_32F, rescale, shift);
439     *output = std::static_pointer_cast<Tensor>(output_cv);
440   } catch (const cv::Exception &e) {
441     RETURN_STATUS_UNEXPECTED("Rescale: " + std::string(e.what()));
442   }
443   return Status::OK();
444 }
445 
Crop(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int x,int y,int w,int h)446 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h) {
447   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
448   if (!input_cv->mat().data) {
449     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Crop: load image failed.");
450   }
451   RETURN_IF_NOT_OK(ValidateImageRank("Crop", input_cv->Rank()));
452   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - y) > h,
453                                "Crop: addition(x and height) out of bounds.");
454   // account for integer overflow
455   if (y < 0 || (y + h) > input_cv->shape()[0] || (y + h) < 0) {
456     RETURN_STATUS_UNEXPECTED(
457       "Crop: invalid y coordinate value for crop, y coordinate value exceeds the boundary of the image, got y: " +
458       std::to_string(y));
459   }
460   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - x) > w, "Crop: addition out of bounds.");
461   // account for integer overflow
462   if (x < 0 || (x + w) > input_cv->shape()[1] || (x + w) < 0) {
463     RETURN_STATUS_UNEXPECTED(
464       "Crop: invalid x coordinate value for crop, "
465       "x coordinate value exceeds the boundary of the image, got x: " +
466       std::to_string(x));
467   }
468   try {
469     TensorShape shape{h, w};
470     if (input_cv->Rank() == DEFAULT_IMAGE_RANK) {
471       int num_channels = input_cv->shape()[CHANNEL_INDEX];
472       shape = shape.AppendDim(num_channels);
473     }
474     std::shared_ptr<CVTensor> output_cv;
475     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
476     cv::Rect roi(x, y, w, h);
477     (input_cv->mat())(roi).copyTo(output_cv->mat());
478     *output = std::static_pointer_cast<Tensor>(output_cv);
479     return Status::OK();
480   } catch (const cv::Exception &e) {
481     RETURN_STATUS_UNEXPECTED("Crop: " + std::string(e.what()));
482   }
483 }
484 
ConvertColor(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,ConvertMode convert_mode)485 Status ConvertColor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, ConvertMode convert_mode) {
486   try {
487     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
488     RETURN_IF_NOT_OK(ValidateImageRank("ConvertColor", input_cv->Rank()));
489     if (!input_cv->mat().data) {
490       RETURN_STATUS_UNEXPECTED("[Internal ERROR] ConvertColor: load image failed.");
491     }
492     if (input_cv->Rank() == DEFAULT_IMAGE_RANK) {
493       int num_channels = input_cv->shape()[CHANNEL_INDEX];
494       if (num_channels != DEFAULT_IMAGE_CHANNELS && num_channels != MAX_IMAGE_CHANNELS) {
495         RETURN_STATUS_UNEXPECTED("ConvertColor: number of channels of image should be 3 or 4, but got:" +
496                                  std::to_string(num_channels));
497       }
498     }
499     std::vector<dsize_t> node;
500     RETURN_IF_NOT_OK(GetConvertShape(convert_mode, input_cv, &node));
501     if (node.empty()) {
502       RETURN_STATUS_UNEXPECTED(
503         "ConvertColor: convert mode must be in ConvertMode, which mainly includes conversion "
504         "between RGB, BGR, GRAY, RGBA etc.");
505     }
506     TensorShape out_shape = TensorShape(node);
507     std::shared_ptr<CVTensor> output_cv;
508     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
509     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(convert_mode));
510     *output = std::static_pointer_cast<Tensor>(output_cv);
511     return Status::OK();
512   } catch (const cv::Exception &e) {
513     RETURN_STATUS_UNEXPECTED("ConvertColor: " + std::string(e.what()));
514   }
515 }
516 
HwcToChw(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)517 Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
518   try {
519     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
520     if (!input_cv->mat().data) {
521       RETURN_STATUS_UNEXPECTED("[Internal ERROR] HWC2CHW: load image failed.");
522     }
523     if (input_cv->Rank() == 2) {
524       // If input tensor is 2D, we assume we have hw dimensions
525       *output = input;
526       return Status::OK();
527     }
528     CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > CHANNEL_INDEX, "HWC2CHW: invalid shape.");
529     int num_channels = input_cv->shape()[CHANNEL_INDEX];
530     if (input_cv->shape().Size() < MIN_IMAGE_DIMENSION || input_cv->shape().Size() > DEFAULT_IMAGE_CHANNELS ||
531         (input_cv->shape().Size() == DEFAULT_IMAGE_CHANNELS && num_channels != DEFAULT_IMAGE_CHANNELS &&
532          num_channels != MIN_IMAGE_CHANNELS)) {
533       RETURN_STATUS_UNEXPECTED("HWC2CHW: image shape is not <H,W,C>, but got rank: " +
534                                std::to_string(input_cv->shape().Size()));
535     }
536     cv::Mat output_img;
537 
538     int height = input_cv->shape()[0];
539     int width = input_cv->shape()[1];
540 
541     std::shared_ptr<CVTensor> output_cv;
542     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(TensorShape{num_channels, height, width}, input_cv->type(), &output_cv));
543     for (int i = 0; i < num_channels; ++i) {
544       cv::Mat mat;
545       RETURN_IF_NOT_OK(output_cv->MatAtIndex({i}, &mat));
546       cv::extractChannel(input_cv->mat(), mat, i);
547     }
548     *output = std::move(output_cv);
549     return Status::OK();
550   } catch (const cv::Exception &e) {
551     RETURN_STATUS_UNEXPECTED("HWC2CHW: " + std::string(e.what()));
552   }
553 }
554 
MaskWithTensor(const std::shared_ptr<Tensor> & sub_mat,std::shared_ptr<Tensor> * input,int x,int y,int crop_width,int crop_height,ImageFormat image_format)555 Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Tensor> *input, int x, int y,
556                       int crop_width, int crop_height, ImageFormat image_format) {
557   if (image_format == ImageFormat::HWC) {
558     if (CheckTensorShape(*input, 2)) {
559       RETURN_STATUS_UNEXPECTED(
560         "CutMixBatch: MaskWithTensor failed: "
561         "input shape doesn't match <H,W,C> format.");
562     }
563     if (CheckTensorShape(sub_mat, 2)) {
564       RETURN_STATUS_UNEXPECTED(
565         "CutMixBatch: MaskWithTensor failed: "
566         "sub_mat shape doesn't match <H,W,C> format.");
567     }
568     int number_of_channels = (*input)->shape()[CHANNEL_INDEX];
569     for (int i = 0; i < crop_width; i++) {
570       for (int j = 0; j < crop_height; j++) {
571         for (int c = 0; c < number_of_channels; c++) {
572           RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i, c}, {y + j, x + i, c}));
573         }
574       }
575     }
576   } else if (image_format == ImageFormat::CHW) {
577     if (CheckTensorShape(*input, 0)) {
578       RETURN_STATUS_UNEXPECTED(
579         "CutMixBatch: MaskWithTensor failed: "
580         "input shape doesn't match <C,H,W> format.");
581     }
582     if (CheckTensorShape(sub_mat, 0)) {
583       RETURN_STATUS_UNEXPECTED(
584         "CutMixBatch: MaskWithTensor failed: "
585         "sub_mat shape doesn't match <C,H,W> format.");
586     }
587     int number_of_channels = (*input)->shape()[0];
588     for (int i = 0; i < crop_width; i++) {
589       for (int j = 0; j < crop_height; j++) {
590         for (int c = 0; c < number_of_channels; c++) {
591           RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {c, j, i}, {c, y + j, x + i}));
592         }
593       }
594     }
595   } else if (image_format == ImageFormat::HW) {
596     if ((*input)->Rank() != MIN_IMAGE_DIMENSION) {
597       RETURN_STATUS_UNEXPECTED(
598         "CutMixBatch: MaskWithTensor failed: "
599         "input shape doesn't match <H,W> format.");
600     }
601     if (sub_mat->Rank() != MIN_IMAGE_DIMENSION) {
602       RETURN_STATUS_UNEXPECTED(
603         "CutMixBatch: MaskWithTensor failed: "
604         "sub_mat shape doesn't match <H,W> format.");
605     }
606     for (int i = 0; i < crop_width; i++) {
607       for (int j = 0; j < crop_height; j++) {
608         RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i}, {y + j, x + i}));
609       }
610     }
611   } else {
612     RETURN_STATUS_UNEXPECTED(
613       "CutMixBatch: MaskWithTensor failed: "
614       "image format must be <C,H,W>, <H,W,C>, or <H,W>.");
615   }
616   return Status::OK();
617 }
618 
CopyTensorValue(const std::shared_ptr<Tensor> & source_tensor,std::shared_ptr<Tensor> * dest_tensor,const std::vector<int64_t> & source_indx,const std::vector<int64_t> & dest_indx)619 Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor,
620                        const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx) {
621   if (source_tensor->type() != (*dest_tensor)->type())
622     RETURN_STATUS_UNEXPECTED(
623       "CutMixBatch: CopyTensorValue failed: "
624       "source and destination tensor must have the same type.");
625   if (source_tensor->type() == DataType::DE_UINT8) {
626     uint8_t pixel_value = 0;
627     RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
628     RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
629   } else if (source_tensor->type() == DataType::DE_FLOAT32) {
630     float pixel_value = 0;
631     RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
632     RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
633   } else {
634     RETURN_STATUS_UNEXPECTED(
635       "CutMixBatch: CopyTensorValue failed: "
636       "Tensor type is not supported. Tensor type must be float32 or uint8.");
637   }
638   return Status::OK();
639 }
640 
SwapRedAndBlue(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)641 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
642   try {
643     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
644     CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > CHANNEL_INDEX, "SwapRedAndBlue: shape is invalid.");
645     int num_channels = input_cv->shape()[CHANNEL_INDEX];
646     if (input_cv->shape().Size() != 3 || num_channels != DEFAULT_IMAGE_CHANNELS) {
647       RETURN_STATUS_UNEXPECTED("SwapRedBlue: image shape is not <H,W,C>.");
648     }
649     std::shared_ptr<CVTensor> output_cv;
650     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
651 
652     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_BGR2RGB));
653     *output = std::static_pointer_cast<Tensor>(output_cv);
654     return Status::OK();
655   } catch (const cv::Exception &e) {
656     RETURN_STATUS_UNEXPECTED("SwapRedBlue: " + std::string(e.what()));
657   }
658 }
659 
CropAndResize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int x,int y,int crop_height,int crop_width,int target_height,int target_width,InterpolationMode mode)660 Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y,
661                      int crop_height, int crop_width, int target_height, int target_width, InterpolationMode mode) {
662   try {
663     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
664     if (!input_cv->mat().data) {
665       RETURN_STATUS_UNEXPECTED("[Internal ERROR] CropAndResize: load image failed.");
666     }
667     RETURN_IF_NOT_OK(ValidateImageRank("CropAndResize", input_cv->Rank()));
668     // image too large or too small, 1000 is arbitrary here to prevent opencv from segmentation fault
669     const uint32_t kCropShapeLimits = 1000;
670     if (crop_height == 0 || crop_width == 0 || target_height == 0 || target_height > crop_height * kCropShapeLimits ||
671         target_width == 0 || target_width > crop_width * kCropShapeLimits) {
672       std::string err_msg =
673         "CropAndResize: the resizing width or height 1) is too big, it's up to " + std::to_string(kCropShapeLimits) +
674         " times the original image; 2) can not be 0. Detail info is: crop_height: " + std::to_string(crop_height) +
675         ", crop_width: " + std::to_string(crop_width) + ", target_height: " + std::to_string(target_height) +
676         ", target_width: " + std::to_string(target_width);
677       RETURN_STATUS_UNEXPECTED(err_msg);
678     }
679     cv::Rect roi(x, y, crop_width, crop_height);
680     auto cv_mode = GetCVInterpolationMode(mode);
681     cv::Mat cv_in = input_cv->mat();
682 
683     if (mode == InterpolationMode::kCubicPil) {
684       cv::Mat input_roi = cv_in(roi);
685       std::shared_ptr<CVTensor> input_image;
686       RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, input_cv->Rank(), &input_image));
687       LiteMat imIn, imOut;
688       std::shared_ptr<Tensor> output_tensor;
689       TensorShape new_shape = TensorShape({target_height, target_width, 3});
690       RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input_cv->type(), &output_tensor));
691       uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
692       imOut.Init(target_width, target_height, input_cv->shape()[2], reinterpret_cast<void *>(buffer), LDataType::UINT8);
693       imIn.Init(input_image->shape()[1], input_image->shape()[0], input_image->shape()[2], input_image->mat().data,
694                 LDataType::UINT8);
695       if (ResizeCubic(imIn, imOut, target_width, target_height) == false) {
696         RETURN_STATUS_UNEXPECTED("Resize: failed to do resize, please check the error msg.");
697       }
698       *output = output_tensor;
699       return Status::OK();
700     }
701 
702     TensorShape shape{target_height, target_width};
703     int num_channels = input_cv->shape()[CHANNEL_INDEX];
704     if (input_cv->Rank() == DEFAULT_IMAGE_RANK) shape = shape.AppendDim(num_channels);
705     std::shared_ptr<CVTensor> cvt_out;
706     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &cvt_out));
707     cv::resize(cv_in(roi), cvt_out->mat(), cv::Size(target_width, target_height), 0, 0, cv_mode);
708     *output = std::static_pointer_cast<Tensor>(cvt_out);
709     return Status::OK();
710   } catch (const cv::Exception &e) {
711     RETURN_STATUS_UNEXPECTED("CropAndResize: " + std::string(e.what()));
712   }
713 }
714 
Rotate(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> center,float degree,InterpolationMode interpolation,bool expand,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)715 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> center,
716               float degree, InterpolationMode interpolation, bool expand, uint8_t fill_r, uint8_t fill_g,
717               uint8_t fill_b) {
718   try {
719     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
720     if (!input_cv->mat().data) {
721       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Rotate: load image failed.");
722     }
723     RETURN_IF_NOT_OK(ValidateImageRank("Rotate", input_cv->Rank()));
724 
725     cv::Mat input_img = input_cv->mat();
726     if (input_img.cols > (MAX_INT_PRECISION * 2) || input_img.rows > (MAX_INT_PRECISION * 2)) {
727       RETURN_STATUS_UNEXPECTED("Rotate: image is too large and center is not precise.");
728     }
729     float fx = 0, fy = 0;
730     if (center.empty()) {
731       // default to center of image
732       fx = (input_img.cols - 1) / 2.0;
733       fy = (input_img.rows - 1) / 2.0;
734     } else {
735       fx = center[0];
736       fy = center[1];
737     }
738     cv::Mat output_img;
739     cv::Scalar fill_color = cv::Scalar(fill_b, fill_g, fill_r);
740     // maybe don't use uint32 for image dimension here
741     cv::Point2f pc(fx, fy);
742     cv::Mat rot = cv::getRotationMatrix2D(pc, degree, 1.0);
743     std::shared_ptr<CVTensor> output_cv;
744     if (!expand) {
745       // this case means that the shape doesn't change, size stays the same
746       // We may not need this memcpy if it is in place.
747       RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
748       // using inter_nearest to comply with python default
749       cv::warpAffine(input_img, output_cv->mat(), rot, input_img.size(), GetCVInterpolationMode(interpolation),
750                      cv::BORDER_CONSTANT, fill_color);
751     } else {
752       // we resize here since the shape changes
753       // create a new bounding box with the rotate
754       cv::Rect2f bbox = cv::RotatedRect(pc, input_img.size(), degree).boundingRect2f();
755       rot.at<double>(0, 2) += bbox.width / 2.0 - input_img.cols / 2.0;
756       rot.at<double>(1, 2) += bbox.height / 2.0 - input_img.rows / 2.0;
757       // use memcpy and don't compute the new shape since openCV has a rounding problem
758       cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation),
759                      cv::BORDER_CONSTANT, fill_color);
760       RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &output_cv));
761       RETURN_UNEXPECTED_IF_NULL(output_cv);
762     }
763     *output = std::static_pointer_cast<Tensor>(output_cv);
764   } catch (const cv::Exception &e) {
765     RETURN_STATUS_UNEXPECTED("Rotate: " + std::string(e.what()));
766   }
767   return Status::OK();
768 }
769 
770 template <typename T>
Normalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std)771 void Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
772                std::vector<float> std) {
773   auto itr_out = (*output)->begin<float>();
774   auto itr = input->begin<T>();
775   auto end = input->end<T>();
776   int64_t num_channels = (*output)->shape()[CHANNEL_INDEX];
777 
778   while (itr != end) {
779     for (int64_t i = 0; i < num_channels; i++) {
780       *itr_out = static_cast<float>(*itr) / std[i] - mean[i];
781       ++itr_out;
782       ++itr;
783     }
784   }
785 }
786 
Normalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std)787 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
788                  std::vector<float> std) {
789   RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType(DataType::DE_FLOAT32), output));
790   if (input->Rank() == MIN_IMAGE_DIMENSION) {
791     RETURN_IF_NOT_OK((*output)->ExpandDim(MIN_IMAGE_DIMENSION));
792   }
793 
794   CHECK_FAIL_RETURN_UNEXPECTED((*output)->Rank() == DEFAULT_IMAGE_RANK, "Normalize: image shape is not <H,W,C>.");
795   CHECK_FAIL_RETURN_UNEXPECTED(std.size() == mean.size(),
796                                "Normalize: mean and std vectors are not of same size, got size of std:" +
797                                  std::to_string(std.size()) + ", and mean size:" + std::to_string(mean.size()));
798 
799   // caller provided 1 mean/std value and there are more than one channel --> duplicate mean/std value
800   if (mean.size() == 1 && (*output)->shape()[CHANNEL_INDEX] != 1) {
801     for (int64_t i = 0; i < (*output)->shape()[CHANNEL_INDEX] - 1; i++) {
802       mean.push_back(mean[0]);
803       std.push_back(std[0]);
804     }
805   }
806   CHECK_FAIL_RETURN_UNEXPECTED((*output)->shape()[CHANNEL_INDEX] == mean.size(),
807                                "Normalize: number of channels does not match the size of mean and std vectors, got "
808                                "channels: " +
809                                  std::to_string((*output)->shape()[CHANNEL_INDEX]) +
810                                  ", size of mean:" + std::to_string(mean.size()));
811 
812   switch (input->type().value()) {
813     case DataType::DE_BOOL:
814       Normalize<bool>(input, output, mean, std);
815       break;
816     case DataType::DE_INT8:
817       Normalize<int8_t>(input, output, mean, std);
818       break;
819     case DataType::DE_UINT8:
820       Normalize<uint8_t>(input, output, mean, std);
821       break;
822     case DataType::DE_INT16:
823       Normalize<int16_t>(input, output, mean, std);
824       break;
825     case DataType::DE_UINT16:
826       Normalize<uint16_t>(input, output, mean, std);
827       break;
828     case DataType::DE_INT32:
829       Normalize<int32_t>(input, output, mean, std);
830       break;
831     case DataType::DE_UINT32:
832       Normalize<uint32_t>(input, output, mean, std);
833       break;
834     case DataType::DE_INT64:
835       Normalize<int64_t>(input, output, mean, std);
836       break;
837     case DataType::DE_UINT64:
838       Normalize<uint64_t>(input, output, mean, std);
839       break;
840     case DataType::DE_FLOAT16:
841       Normalize<float16>(input, output, mean, std);
842       break;
843     case DataType::DE_FLOAT32:
844       Normalize<float>(input, output, mean, std);
845       break;
846     case DataType::DE_FLOAT64:
847       Normalize<double>(input, output, mean, std);
848       break;
849     default:
850       RETURN_STATUS_UNEXPECTED(
851         "Normalize: unsupported type, currently supported types include "
852         "[bool,int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,int64_t,uint64_t,float16,float,double].");
853   }
854 
855   if (input->Rank() == MIN_IMAGE_DIMENSION) {
856     (*output)->Squeeze();
857   }
858   return Status::OK();
859 }
860 
NormalizePad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::shared_ptr<Tensor> & mean,const std::shared_ptr<Tensor> & std,const std::string & dtype)861 Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
862                     const std::shared_ptr<Tensor> &mean, const std::shared_ptr<Tensor> &std, const std::string &dtype) {
863   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
864   if (!(input_cv->mat().data && input_cv->Rank() == DEFAULT_IMAGE_CHANNELS)) {
865     RETURN_STATUS_UNEXPECTED("[Internal ERROR] NormalizePad: load image failed.");
866   }
867   DataType tensor_type = DataType(DataType::DE_FLOAT32);
868   int compute_type = CV_32F;
869   int channel_type = CV_32FC1;
870   if (dtype == "float16") {
871     compute_type = CV_16F;
872     channel_type = CV_16FC1;
873     tensor_type = DataType(DataType::DE_FLOAT16);
874   }
875   cv::Mat in_image = input_cv->mat();
876   std::shared_ptr<CVTensor> output_cv;
877   TensorShape new_shape({input_cv->shape()[0], input_cv->shape()[1], input_cv->shape()[2] + 1});
878   RETURN_IF_NOT_OK(CVTensor::CreateEmpty(new_shape, tensor_type, &output_cv));
879   mean->Squeeze();
880   if (mean->type() != DataType::DE_FLOAT32 || mean->Rank() != 1 || mean->shape()[0] != DEFAULT_IMAGE_CHANNELS) {
881     std::string err_msg =
882       "NormalizePad: mean tensor should be of size 3 and type float, but got rank: " + std::to_string(mean->Rank()) +
883       ", and type: " + mean->type().ToString();
884     return Status(StatusCode::kMDShapeMisMatch, err_msg);
885   }
886   std->Squeeze();
887   if (std->type() != DataType::DE_FLOAT32 || std->Rank() != 1 || std->shape()[0] != DEFAULT_IMAGE_CHANNELS) {
888     std::string err_msg =
889       "NormalizePad: std tensor should be of size 3 and type float, but got rank: " + std::to_string(std->Rank()) +
890       ", and type: " + std->type().ToString();
891     return Status(StatusCode::kMDShapeMisMatch, err_msg);
892   }
893   try {
894     // NOTE: We are assuming the input image is in RGB and the mean
895     // and std are in RGB
896     std::vector<cv::Mat> rgb;
897     cv::split(in_image, rgb);
898     if (rgb.size() != DEFAULT_IMAGE_CHANNELS) {
899       RETURN_STATUS_UNEXPECTED("NormalizePad: input image is not in RGB, got rank: " + std::to_string(in_image.dims));
900     }
901     for (int8_t i = 0; i < DEFAULT_IMAGE_CHANNELS; i++) {
902       float mean_c, std_c;
903       RETURN_IF_NOT_OK(mean->GetItemAt<float>(&mean_c, {i}));
904       RETURN_IF_NOT_OK(std->GetItemAt<float>(&std_c, {i}));
905       rgb[i].convertTo(rgb[i], compute_type, 1.0 / std_c, (-mean_c / std_c));
906     }
907     rgb.push_back(cv::Mat::zeros(in_image.rows, in_image.cols, channel_type));
908     cv::merge(rgb, output_cv->mat());
909     *output = std::static_pointer_cast<Tensor>(output_cv);
910     return Status::OK();
911   } catch (const cv::Exception &e) {
912     RETURN_STATUS_UNEXPECTED("NormalizePad: " + std::string(e.what()));
913   }
914 }
915 
AdjustBrightness(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & alpha)916 Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha) {
917   try {
918     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
919     cv::Mat input_img = input_cv->mat();
920     if (!input_cv->mat().data) {
921       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustBrightness: load image failed.");
922     }
923     CHECK_FAIL_RETURN_UNEXPECTED(
924       input_cv->shape().Size() > CHANNEL_INDEX,
925       "AdjustBrightness: image rank should not bigger than:" + std::to_string(CHANNEL_INDEX) +
926         ", but got: " + std::to_string(input_cv->shape().Size()));
927     int num_channels = input_cv->shape()[CHANNEL_INDEX];
928     // Rank of the image represents how many dimensions, image is expected to be HWC
929     if (input_cv->Rank() != DEFAULT_IMAGE_RANK || num_channels != DEFAULT_IMAGE_CHANNELS) {
930       RETURN_STATUS_UNEXPECTED("AdjustBrightness: image shape is not <H,W,C> or channel is not 3, got image rank: " +
931                                std::to_string(input_cv->Rank()) + ", and channel:" + std::to_string(num_channels));
932     }
933     std::shared_ptr<CVTensor> output_cv;
934     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
935     output_cv->mat() = input_img * alpha;
936     *output = std::static_pointer_cast<Tensor>(output_cv);
937   } catch (const cv::Exception &e) {
938     RETURN_STATUS_UNEXPECTED("AdjustBrightness: " + std::string(e.what()));
939   }
940   return Status::OK();
941 }
942 
AdjustContrast(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & alpha)943 Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha) {
944   try {
945     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
946     cv::Mat input_img = input_cv->mat();
947     if (!input_cv->mat().data) {
948       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustContrast: load image failed.");
949     }
950     CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > CHANNEL_INDEX,
951                                  "AdjustContrast: image rank should not bigger than:" + std::to_string(CHANNEL_INDEX) +
952                                    ", but got: " + std::to_string(input_cv->shape().Size()));
953     int num_channels = input_cv->shape()[CHANNEL_INDEX];
954     if (input_cv->Rank() != DEFAULT_IMAGE_CHANNELS || num_channels != DEFAULT_IMAGE_CHANNELS) {
955       RETURN_STATUS_UNEXPECTED("AdjustContrast: image shape is not <H,W,C> or channel is not 3, got image rank: " +
956                                std::to_string(input_cv->Rank()) + ", and channel:" + std::to_string(num_channels));
957     }
958     cv::Mat gray, output_img;
959     cv::cvtColor(input_img, gray, CV_RGB2GRAY);
960     int mean_img = static_cast<int>(cv::mean(gray).val[0] + 0.5);
961     std::shared_ptr<CVTensor> output_cv;
962     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
963     output_img = cv::Mat::zeros(input_img.rows, input_img.cols, CV_8UC1);
964     output_img = output_img + mean_img;
965     cv::cvtColor(output_img, output_img, CV_GRAY2RGB);
966     output_cv->mat() = output_img * (1.0 - alpha) + input_img * alpha;
967     *output = std::static_pointer_cast<Tensor>(output_cv);
968   } catch (const cv::Exception &e) {
969     RETURN_STATUS_UNEXPECTED("AdjustContrast: " + std::string(e.what()));
970   }
971   return Status::OK();
972 }
973 
AdjustGamma(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & gamma,const float & gain)974 Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &gamma,
975                    const float &gain) {
976   try {
977     int num_channels = 1;
978     if (input->Rank() < 2) {
979       RETURN_STATUS_UNEXPECTED("AdjustGamma: input tensor is not in shape of <...,H,W,C> or <H,W>.");
980     }
981     if (input->Rank() > 2) {
982       num_channels = input->shape()[-1];
983     }
984     if (num_channels != 1 && num_channels != 3) {
985       RETURN_STATUS_UNEXPECTED("AdjustGamma: channel of input image should be 1 or 3, but got: " +
986                                std::to_string(num_channels));
987     }
988     if (input->type().IsFloat()) {
989       for (auto itr = input->begin<float>(); itr != input->end<float>(); itr++) {
990         *itr = pow((*itr) * gain, gamma);
991         *itr = std::min(std::max((*itr), 0.0f), 1.0f);
992       }
993       *output = input;
994     } else {
995       std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
996       if (!input_cv->mat().data) {
997         RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustGamma: load image failed.");
998       }
999       cv::Mat input_img = input_cv->mat();
1000       std::shared_ptr<CVTensor> output_cv;
1001       RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1002       uchar LUT[256] = {};
1003       for (int i = 0; i < 256; i++) {
1004         float f = i / 255.0;
1005         f = pow(f, gamma);
1006         LUT[i] = static_cast<uchar>(floor(std::min(f * (255.0 + 1 - 1e-3) * gain, 255.0)));
1007       }
1008       if (input_img.channels() == 1) {
1009         cv::MatIterator_<uchar> it = input_img.begin<uchar>();
1010         cv::MatIterator_<uchar> it_end = input_img.end<uchar>();
1011         for (; it != it_end; ++it) {
1012           *it = LUT[(*it)];
1013         }
1014       } else {
1015         cv::MatIterator_<cv::Vec3b> it = input_img.begin<cv::Vec3b>();
1016         cv::MatIterator_<cv::Vec3b> it_end = input_img.end<cv::Vec3b>();
1017         for (; it != it_end; ++it) {
1018           (*it)[0] = LUT[(*it)[0]];
1019           (*it)[1] = LUT[(*it)[1]];
1020           (*it)[2] = LUT[(*it)[2]];
1021         }
1022       }
1023       output_cv->mat() = input_img * 1;
1024       *output = std::static_pointer_cast<Tensor>(output_cv);
1025     }
1026   } catch (const cv::Exception &e) {
1027     RETURN_STATUS_UNEXPECTED("AdjustGamma: " + std::string(e.what()));
1028   }
1029   return Status::OK();
1030 }
1031 
AutoContrast(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & cutoff,const std::vector<uint32_t> & ignore)1032 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &cutoff,
1033                     const std::vector<uint32_t> &ignore) {
1034   try {
1035     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1036     if (!input_cv->mat().data) {
1037       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AutoContrast: load image failed.");
1038     }
1039     if (input_cv->Rank() != DEFAULT_IMAGE_RANK && input_cv->Rank() != MIN_IMAGE_DIMENSION) {
1040       RETURN_STATUS_UNEXPECTED("AutoContrast: image channel should be 1 or 3,  but got: " +
1041                                std::to_string(input_cv->Rank()));
1042     }
1043     // Reshape to extend dimension if rank is 2 for algorithm to work. then reshape output to be of rank 2 like input
1044     if (input_cv->Rank() == MIN_IMAGE_DIMENSION) {
1045       RETURN_IF_NOT_OK(input_cv->ExpandDim(MIN_IMAGE_DIMENSION));
1046     }
1047     // Get number of channels and image matrix
1048     std::size_t num_of_channels = input_cv->shape()[CHANNEL_INDEX];
1049     if (num_of_channels != MIN_IMAGE_CHANNELS && num_of_channels != DEFAULT_IMAGE_CHANNELS) {
1050       RETURN_STATUS_UNEXPECTED("AutoContrast: channel of input image should be 1 or 3, but got: " +
1051                                std::to_string(num_of_channels));
1052     }
1053     cv::Mat image = input_cv->mat();
1054     // Separate the image to channels
1055     std::vector<cv::Mat> planes(num_of_channels);
1056     cv::split(image, planes);
1057     cv::Mat b_hist, g_hist, r_hist;
1058     // Establish the number of bins and set variables for histogram
1059     int32_t hist_size = 256;
1060     int32_t channels = 0;
1061     float range[] = {0, 256};
1062     const float *hist_range[] = {range};
1063     bool uniform = true, accumulate = false;
1064     // Set up lookup table for LUT(Look up table algorithm)
1065     std::vector<int32_t> table;
1066     std::vector<cv::Mat> image_result;
1067     for (std::size_t layer = 0; layer < planes.size(); layer++) {
1068       // Reset lookup table
1069       table = std::vector<int32_t>{};
1070       // Calculate Histogram for channel
1071       cv::Mat hist;
1072       cv::calcHist(&planes[layer], 1, &channels, cv::Mat(), hist, 1, &hist_size, hist_range, uniform, accumulate);
1073       hist.convertTo(hist, CV_32SC1);
1074       std::vector<int32_t> hist_vec;
1075       hist.col(0).copyTo(hist_vec);
1076       // Ignore values in ignore
1077       for (const auto &item : ignore) hist_vec[item] = 0;
1078       int32_t hi = 255;
1079       int32_t lo = 0;
1080       RETURN_IF_NOT_OK(ComputeUpperAndLowerPercentiles(&hist_vec, cutoff, cutoff, &hi, &lo));
1081       if (hi <= lo) {
1082         for (int32_t i = 0; i < 256; i++) {
1083           table.push_back(i);
1084         }
1085       } else {
1086         const float scale = 255.0 / (hi - lo);
1087         const float offset = -1 * lo * scale;
1088         for (int32_t i = 0; i < 256; i++) {
1089           int32_t ix = static_cast<int32_t>(i * scale + offset);
1090           ix = std::max(ix, 0);
1091           ix = std::min(ix, MAX_BIT_VALUE);
1092           table.push_back(ix);
1093         }
1094       }
1095       cv::Mat result_layer;
1096       cv::LUT(planes[layer], table, result_layer);
1097       image_result.push_back(result_layer);
1098     }
1099     cv::Mat result;
1100     cv::merge(image_result, result);
1101     result.convertTo(result, input_cv->mat().type());
1102     std::shared_ptr<CVTensor> output_cv;
1103     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1104     (*output) = std::static_pointer_cast<Tensor>(output_cv);
1105     RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape()));
1106   } catch (const cv::Exception &e) {
1107     RETURN_STATUS_UNEXPECTED("AutoContrast: " + std::string(e.what()));
1108   }
1109   return Status::OK();
1110 }
1111 
AdjustSaturation(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & alpha)1112 Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha) {
1113   try {
1114     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1115     cv::Mat input_img = input_cv->mat();
1116     if (!input_cv->mat().data) {
1117       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustSaturation: load image failed.");
1118     }
1119     CHECK_FAIL_RETURN_UNEXPECTED(
1120       input_cv->shape().Size() > CHANNEL_INDEX,
1121       "AdjustSaturation: image rank should not bigger than: " + std::to_string(CHANNEL_INDEX) +
1122         ", but got: " + std::to_string(input_cv->shape().Size()));
1123     int num_channels = input_cv->shape()[CHANNEL_INDEX];
1124     if (input_cv->Rank() != DEFAULT_IMAGE_RANK || num_channels != DEFAULT_IMAGE_CHANNELS) {
1125       RETURN_STATUS_UNEXPECTED("AdjustSaturation: image shape is not <H,W,C> or channel is not 3, but got rank: " +
1126                                std::to_string(input_cv->Rank()) + ", and channel: " + std::to_string(num_channels));
1127     }
1128     std::shared_ptr<CVTensor> output_cv;
1129     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1130     cv::Mat output_img = output_cv->mat();
1131     cv::Mat gray;
1132     cv::cvtColor(input_img, gray, CV_RGB2GRAY);
1133     cv::cvtColor(gray, output_img, CV_GRAY2RGB);
1134     output_cv->mat() = output_img * (1.0 - alpha) + input_img * alpha;
1135     *output = std::static_pointer_cast<Tensor>(output_cv);
1136   } catch (const cv::Exception &e) {
1137     RETURN_STATUS_UNEXPECTED("AdjustSaturation: " + std::string(e.what()));
1138   }
1139   return Status::OK();
1140 }
1141 
AdjustHue(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & hue)1142 Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &hue) {
1143   if (hue > 0.5 || hue < -0.5) {
1144     RETURN_STATUS_UNEXPECTED("AdjustHue: invalid parameter, hue should within [-0.5, 0.5], but got: " +
1145                              std::to_string(hue));
1146   }
1147   try {
1148     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1149     cv::Mat input_img = input_cv->mat();
1150     if (!input_cv->mat().data) {
1151       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustHue: load image failed.");
1152     }
1153     CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > 2,
1154                                  "AdjustHue: image rank should not bigger than:" + std::to_string(2) +
1155                                    ", but got: " + std::to_string(input_cv->shape().Size()));
1156     int num_channels = input_cv->shape()[2];
1157     if (input_cv->Rank() != DEFAULT_IMAGE_RANK || num_channels != DEFAULT_IMAGE_CHANNELS) {
1158       RETURN_STATUS_UNEXPECTED("AdjustHue: image shape is not <H,W,C> or channel is not 3, but got rank: " +
1159                                std::to_string(input_cv->Rank()) + ", and channel: " + std::to_string(num_channels));
1160     }
1161     std::shared_ptr<CVTensor> output_cv;
1162     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1163     cv::Mat output_img;
1164     cv::cvtColor(input_img, output_img, CV_RGB2HSV_FULL);
1165     for (int y = 0; y < output_img.cols; y++) {
1166       for (int x = 0; x < output_img.rows; x++) {
1167         uint8_t cur1 = output_img.at<cv::Vec3b>(cv::Point(y, x))[0];
1168         uint8_t h_hue = 0;
1169         h_hue = static_cast<uint8_t>(hue * MAX_BIT_VALUE);
1170         cur1 += h_hue;
1171         output_img.at<cv::Vec3b>(cv::Point(y, x))[0] = cur1;
1172       }
1173     }
1174     cv::cvtColor(output_img, output_cv->mat(), CV_HSV2RGB_FULL);
1175     *output = std::static_pointer_cast<Tensor>(output_cv);
1176   } catch (const cv::Exception &e) {
1177     RETURN_STATUS_UNEXPECTED("AdjustHue: " + std::string(e.what()));
1178   }
1179   return Status::OK();
1180 }
1181 
Equalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1182 Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1183   try {
1184     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1185     if (!input_cv->mat().data) {
1186       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Equalize: load image failed.");
1187     }
1188     if (input_cv->Rank() != DEFAULT_IMAGE_RANK && input_cv->Rank() != MIN_IMAGE_DIMENSION) {
1189       RETURN_STATUS_UNEXPECTED("Equalize: image rank should be 1 or 3,  but got: " + std::to_string(input_cv->Rank()));
1190     }
1191     // For greyscale images, extend dimension if rank is 2 and reshape output to be of rank 2.
1192     if (input_cv->Rank() == MIN_IMAGE_DIMENSION) {
1193       RETURN_IF_NOT_OK(input_cv->ExpandDim(MIN_IMAGE_DIMENSION));
1194     }
1195     // Get number of channels and image matrix
1196     std::size_t num_of_channels = input_cv->shape()[CHANNEL_INDEX];
1197     if (num_of_channels != MIN_IMAGE_CHANNELS && num_of_channels != DEFAULT_IMAGE_CHANNELS) {
1198       RETURN_STATUS_UNEXPECTED("Equalize: channel of input image should be 1 or 3, but got: " +
1199                                std::to_string(num_of_channels));
1200     }
1201     cv::Mat image = input_cv->mat();
1202     // Separate the image to channels
1203     std::vector<cv::Mat> planes(num_of_channels);
1204     cv::split(image, planes);
1205     // Equalize each channel separately
1206     std::vector<cv::Mat> image_result;
1207     for (std::size_t layer = 0; layer < planes.size(); layer++) {
1208       cv::Mat channel_result;
1209       cv::equalizeHist(planes[layer], channel_result);
1210       image_result.push_back(channel_result);
1211     }
1212     cv::Mat result;
1213     cv::merge(image_result, result);
1214     std::shared_ptr<CVTensor> output_cv;
1215     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1216     (*output) = std::static_pointer_cast<Tensor>(output_cv);
1217     RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape()));
1218   } catch (const cv::Exception &e) {
1219     RETURN_STATUS_UNEXPECTED("Equalize: " + std::string(e.what()));
1220   }
1221   return Status::OK();
1222 }
1223 
Erase(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t box_height,int32_t box_width,int32_t num_patches,bool bounded,bool random_color,std::mt19937 * rnd,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)1224 Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t box_height,
1225              int32_t box_width, int32_t num_patches, bool bounded, bool random_color, std::mt19937 *rnd, uint8_t fill_r,
1226              uint8_t fill_g, uint8_t fill_b) {
1227   try {
1228     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1229     CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > CHANNEL_INDEX, "Erase: shape is invalid.");
1230     int num_channels = input_cv->shape()[CHANNEL_INDEX];
1231     if (input_cv->mat().data == nullptr) {
1232       RETURN_STATUS_UNEXPECTED("[Internal ERROR] CutOut: load image failed.");
1233     }
1234     if (input_cv->Rank() != DEFAULT_IMAGE_RANK || num_channels != DEFAULT_IMAGE_CHANNELS) {
1235       RETURN_STATUS_UNEXPECTED("CutOut: image shape is not <H,W,C> or channel is not 3, but got rank: " +
1236                                std::to_string(input_cv->Rank()) + ", and channel: " + std::to_string(num_channels));
1237     }
1238     cv::Mat input_img = input_cv->mat();
1239     int32_t image_h = input_cv->shape()[0];
1240     int32_t image_w = input_cv->shape()[1];
1241     // check if erase size is bigger than image itself
1242     if (box_height > image_h || box_width > image_w) {
1243       RETURN_STATUS_UNEXPECTED(
1244         "CutOut: box size is too large for image erase, got box height: " + std::to_string(box_height) +
1245         "box weight: " + std::to_string(box_width) + ", and image height: " + std::to_string(image_h) +
1246         ", image width: " + std::to_string(image_w));
1247     }
1248 
1249     // for random color
1250     std::normal_distribution<double> normal_distribution(0, 1);
1251     std::uniform_int_distribution<int> height_distribution_bound(0, image_h - box_height);
1252     std::uniform_int_distribution<int> width_distribution_bound(0, image_w - box_width);
1253     std::uniform_int_distribution<int> height_distribution_unbound(0, image_h + box_height);
1254     std::uniform_int_distribution<int> width_distribution_unbound(0, image_w + box_width);
1255     // core logic
1256     // update values based on random erasing or cutout
1257 
1258     for (int32_t i = 0; i < num_patches; i++) {
1259       // rows in cv mat refers to the height of the cropped box
1260       // we determine h_start and w_start using two different distributions as erasing is used by two different
1261       // image augmentations. The bounds are also different in each case.
1262       int32_t h_start = (bounded) ? height_distribution_bound(*rnd) : (height_distribution_unbound(*rnd) - box_height);
1263       int32_t w_start = (bounded) ? width_distribution_bound(*rnd) : (width_distribution_unbound(*rnd) - box_width);
1264 
1265       int32_t max_width = (w_start + box_width > image_w) ? image_w : w_start + box_width;
1266       int32_t max_height = (h_start + box_height > image_h) ? image_h : h_start + box_height;
1267       // check for starting range >= 0, here the start range is checked after for cut out, for random erasing
1268       // w_start and h_start will never be less than 0.
1269       h_start = (h_start < 0) ? 0 : h_start;
1270       w_start = (w_start < 0) ? 0 : w_start;
1271       for (int y = w_start; y < max_width; y++) {
1272         for (int x = h_start; x < max_height; x++) {
1273           if (random_color) {
1274             // fill each box with a random value
1275             input_img.at<cv::Vec3b>(cv::Point(y, x))[0] = static_cast<int32_t>(normal_distribution(*rnd));
1276             input_img.at<cv::Vec3b>(cv::Point(y, x))[1] = static_cast<int32_t>(normal_distribution(*rnd));
1277             input_img.at<cv::Vec3b>(cv::Point(y, x))[2] = static_cast<int32_t>(normal_distribution(*rnd));
1278           } else {
1279             input_img.at<cv::Vec3b>(cv::Point(y, x))[0] = fill_r;
1280             input_img.at<cv::Vec3b>(cv::Point(y, x))[1] = fill_g;
1281             input_img.at<cv::Vec3b>(cv::Point(y, x))[2] = fill_b;
1282           }
1283         }
1284       }
1285     }
1286     *output = std::static_pointer_cast<Tensor>(input);
1287     return Status::OK();
1288   } catch (const cv::Exception &e) {
1289     RETURN_STATUS_UNEXPECTED("CutOut: " + std::string(e.what()));
1290   }
1291 }
1292 
Pad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const int32_t & pad_top,const int32_t & pad_bottom,const int32_t & pad_left,const int32_t & pad_right,const BorderType & border_types,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)1293 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
1294            const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
1295            uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) {
1296   try {
1297     // input image
1298     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1299 
1300     // validate rank
1301     if (input_cv->Rank() == 1 || input_cv->mat().dims > MIN_IMAGE_DIMENSION) {
1302       RETURN_STATUS_UNEXPECTED("Pad: input shape is not <H,W,C> or <H, W>, got rank: " +
1303                                std::to_string(input_cv->Rank()));
1304     }
1305 
1306     // get the border type in openCV
1307     auto b_type = GetCVBorderType(border_types);
1308     // output image
1309     cv::Mat out_image;
1310     if (b_type == cv::BORDER_CONSTANT) {
1311       cv::Scalar fill_color = cv::Scalar(fill_b, fill_g, fill_r);
1312       cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type, fill_color);
1313     } else {
1314       cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type);
1315     }
1316     std::shared_ptr<CVTensor> output_cv;
1317     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, input_cv->Rank(), &output_cv));
1318     // pad the dimension if shape information is only 2 dimensional, this is grayscale
1319     int num_channels = input_cv->shape()[CHANNEL_INDEX];
1320     if (input_cv->Rank() == DEFAULT_IMAGE_RANK && num_channels == MIN_IMAGE_CHANNELS &&
1321         output_cv->Rank() == MIN_IMAGE_DIMENSION)
1322       RETURN_IF_NOT_OK(output_cv->ExpandDim(CHANNEL_INDEX));
1323     *output = std::static_pointer_cast<Tensor>(output_cv);
1324     return Status::OK();
1325   } catch (const cv::Exception &e) {
1326     RETURN_STATUS_UNEXPECTED("Pad: " + std::string(e.what()));
1327   }
1328 }
1329 
RgbaToRgb(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1330 Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1331   try {
1332     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
1333     int num_channels = input_cv->shape()[CHANNEL_INDEX];
1334     if (input_cv->shape().Size() != DEFAULT_IMAGE_CHANNELS || num_channels != 4) {
1335       std::string err_msg = "RgbaToRgb: rank of image is not: " + std::to_string(DEFAULT_IMAGE_CHANNELS) +
1336                             ", but got: " + std::to_string(input_cv->shape().Size()) +
1337                             ", or channels of image should be 4, but got: " + std::to_string(num_channels);
1338       RETURN_STATUS_UNEXPECTED(err_msg);
1339     }
1340     TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3});
1341     std::shared_ptr<CVTensor> output_cv;
1342     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1343     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGBA2RGB));
1344     *output = std::static_pointer_cast<Tensor>(output_cv);
1345     return Status::OK();
1346   } catch (const cv::Exception &e) {
1347     RETURN_STATUS_UNEXPECTED("RgbaToRgb: " + std::string(e.what()));
1348   }
1349 }
1350 
RgbaToBgr(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1351 Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1352   try {
1353     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
1354     int num_channels = input_cv->shape()[CHANNEL_INDEX];
1355     if (input_cv->shape().Size() != DEFAULT_IMAGE_CHANNELS || num_channels != 4) {
1356       std::string err_msg = "RgbaToBgr: rank of image is not: " + std::to_string(DEFAULT_IMAGE_CHANNELS) +
1357                             ", but got: " + std::to_string(input_cv->shape().Size()) +
1358                             ", or channels of image should be 4, but got: " + std::to_string(num_channels);
1359       RETURN_STATUS_UNEXPECTED(err_msg);
1360     }
1361     TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3});
1362     std::shared_ptr<CVTensor> output_cv;
1363     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1364     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGBA2BGR));
1365     *output = std::static_pointer_cast<Tensor>(output_cv);
1366     return Status::OK();
1367   } catch (const cv::Exception &e) {
1368     RETURN_STATUS_UNEXPECTED("RgbaToBgr: " + std::string(e.what()));
1369   }
1370 }
1371 
RgbToBgr(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1372 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1373   try {
1374     auto input_type = input->type();
1375     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1376     if (!input_cv->mat().data) {
1377       RETURN_STATUS_UNEXPECTED("[Internal ERROR] RgbToBgr: load image failed.");
1378     }
1379     if (input_cv->Rank() != 3 || input_cv->shape()[2] != 3) {
1380       RETURN_STATUS_UNEXPECTED("RgbToBgr: input tensor is not in shape of <H,W,C> or channel is not 3, got rank: " +
1381                                std::to_string(input_cv->Rank()) +
1382                                ", and channel: " + std::to_string(input_cv->shape()[2]));
1383     }
1384 
1385     cv::Mat image = input_cv->mat().clone();
1386     if (input_type == DataType::DE_FLOAT16 || input_type == DataType::DE_INT16 || input_type == DataType::DE_UINT16) {
1387       for (int i = 0; i < input_cv->mat().rows; ++i) {
1388         cv::Vec3s *p1 = input_cv->mat().ptr<cv::Vec3s>(i);
1389         cv::Vec3s *p2 = image.ptr<cv::Vec3s>(i);
1390         for (int j = 0; j < input_cv->mat().cols; ++j) {
1391           p2[j][2] = p1[j][0];
1392           p2[j][1] = p1[j][1];
1393           p2[j][0] = p1[j][2];
1394         }
1395       }
1396     } else if (input_type == DataType::DE_FLOAT32 || input_type == DataType::DE_INT32) {
1397       for (int i = 0; i < input_cv->mat().rows; ++i) {
1398         cv::Vec3f *p1 = input_cv->mat().ptr<cv::Vec3f>(i);
1399         cv::Vec3f *p2 = image.ptr<cv::Vec3f>(i);
1400         for (int j = 0; j < input_cv->mat().cols; ++j) {
1401           p2[j][2] = p1[j][0];
1402           p2[j][1] = p1[j][1];
1403           p2[j][0] = p1[j][2];
1404         }
1405       }
1406     } else if (input_type == DataType::DE_FLOAT64) {
1407       for (int i = 0; i < input_cv->mat().rows; ++i) {
1408         cv::Vec3d *p1 = input_cv->mat().ptr<cv::Vec3d>(i);
1409         cv::Vec3d *p2 = image.ptr<cv::Vec3d>(i);
1410         for (int j = 0; j < input_cv->mat().cols; ++j) {
1411           p2[j][2] = p1[j][0];
1412           p2[j][1] = p1[j][1];
1413           p2[j][0] = p1[j][2];
1414         }
1415       }
1416     } else {
1417       cv::cvtColor(input_cv->mat(), image, cv::COLOR_RGB2BGR);
1418     }
1419 
1420     std::shared_ptr<CVTensor> output_cv;
1421     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(image, input_cv->Rank(), &output_cv));
1422 
1423     *output = std::static_pointer_cast<Tensor>(output_cv);
1424     return Status::OK();
1425   } catch (const cv::Exception &e) {
1426     RETURN_STATUS_UNEXPECTED("RgbToBgr: " + std::string(e.what()));
1427   }
1428 }
1429 
RgbToGray(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1430 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1431   try {
1432     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
1433     if (input_cv->Rank() != DEFAULT_IMAGE_RANK || input_cv->shape()[CHANNEL_INDEX] != DEFAULT_IMAGE_CHANNELS) {
1434       RETURN_STATUS_UNEXPECTED(
1435         "RgbToGray: image shape is not <H,W,C> or channel is not 3, got rank: " + std::to_string(input_cv->Rank()) +
1436         ", and channel: " + std::to_string(input_cv->shape()[2]));
1437     }
1438     TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1]});
1439     std::shared_ptr<CVTensor> output_cv;
1440     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1441     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGB2GRAY));
1442     *output = std::static_pointer_cast<Tensor>(output_cv);
1443     return Status::OK();
1444   } catch (const cv::Exception &e) {
1445     RETURN_STATUS_UNEXPECTED("RgbToGray: " + std::string(e.what()));
1446   }
1447 }
1448 
GetJpegImageInfo(const std::shared_ptr<Tensor> & input,int * img_width,int * img_height)1449 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height) {
1450   struct jpeg_decompress_struct cinfo {};
1451   struct JpegErrorManagerCustom jerr {};
1452   cinfo.err = jpeg_std_error(&jerr.pub);
1453   jerr.pub.error_exit = JpegErrorExitCustom;
1454   try {
1455     jpeg_create_decompress(&cinfo);
1456     JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
1457     (void)jpeg_read_header(&cinfo, TRUE);
1458     jpeg_calc_output_dimensions(&cinfo);
1459   } catch (std::runtime_error &e) {
1460     jpeg_destroy_decompress(&cinfo);
1461     RETURN_STATUS_UNEXPECTED(e.what());
1462   }
1463   *img_height = cinfo.output_height;
1464   *img_width = cinfo.output_width;
1465   jpeg_destroy_decompress(&cinfo);
1466   return Status::OK();
1467 }
1468 
Affine(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::vector<float_t> & mat,InterpolationMode interpolation,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)1469 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::vector<float_t> &mat,
1470               InterpolationMode interpolation, uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) {
1471   try {
1472     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1473     RETURN_IF_NOT_OK(ValidateImageRank("Affine", input_cv->Rank()));
1474 
1475     cv::Mat affine_mat(mat);
1476     affine_mat = affine_mat.reshape(1, {2, 3});
1477 
1478     std::shared_ptr<CVTensor> output_cv;
1479     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1480     RETURN_UNEXPECTED_IF_NULL(output_cv);
1481     cv::warpAffine(input_cv->mat(), output_cv->mat(), affine_mat, input_cv->mat().size(),
1482                    GetCVInterpolationMode(interpolation), cv::BORDER_CONSTANT, cv::Scalar(fill_r, fill_g, fill_b));
1483     (*output) = std::static_pointer_cast<Tensor>(output_cv);
1484     return Status::OK();
1485   } catch (const cv::Exception &e) {
1486     RETURN_STATUS_UNEXPECTED("Affine: " + std::string(e.what()));
1487   }
1488 }
1489 
GaussianBlur(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t kernel_x,int32_t kernel_y,float sigma_x,float sigma_y)1490 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_x,
1491                     int32_t kernel_y, float sigma_x, float sigma_y) {
1492   try {
1493     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1494     if (input_cv->mat().data == nullptr) {
1495       RETURN_STATUS_UNEXPECTED("[Internal ERROR] GaussianBlur: load image failed.");
1496     }
1497     cv::Mat output_cv_mat;
1498     cv::GaussianBlur(input_cv->mat(), output_cv_mat, cv::Size(kernel_x, kernel_y), static_cast<double>(sigma_x),
1499                      static_cast<double>(sigma_y));
1500     std::shared_ptr<CVTensor> output_cv;
1501     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, input_cv->Rank(), &output_cv));
1502     (*output) = std::static_pointer_cast<Tensor>(output_cv);
1503     return Status::OK();
1504   } catch (const cv::Exception &e) {
1505     RETURN_STATUS_UNEXPECTED("GaussianBlur: " + std::string(e.what()));
1506   }
1507 }
1508 
ComputePatchSize(const std::shared_ptr<CVTensor> & input_cv,std::shared_ptr<std::pair<int32_t,int32_t>> * patch_size,int32_t num_height,int32_t num_width,SliceMode slice_mode)1509 Status ComputePatchSize(const std::shared_ptr<CVTensor> &input_cv,
1510                         std::shared_ptr<std::pair<int32_t, int32_t>> *patch_size, int32_t num_height, int32_t num_width,
1511                         SliceMode slice_mode) {
1512   if (input_cv->mat().data == nullptr) {
1513     RETURN_STATUS_UNEXPECTED("[Internal ERROR] SlicePatches: Tensor could not convert to CV Tensor.");
1514   }
1515   RETURN_IF_NOT_OK(ValidateImageRank("Affine", input_cv->Rank()));
1516 
1517   cv::Mat in_img = input_cv->mat();
1518   cv::Size s = in_img.size();
1519   if (num_height == 0 || num_height > s.height) {
1520     RETURN_STATUS_UNEXPECTED(
1521       "SlicePatches: The number of patches on height axis equals 0 or is greater than height, got number of patches:" +
1522       std::to_string(num_height));
1523   }
1524   if (num_width == 0 || num_width > s.width) {
1525     RETURN_STATUS_UNEXPECTED(
1526       "SlicePatches: The number of patches on width axis equals 0 or is greater than width, got number of patches:" +
1527       std::to_string(num_width));
1528   }
1529   int32_t patch_h = s.height / num_height;
1530   if (s.height % num_height != 0) {
1531     if (slice_mode == SliceMode::kPad) {
1532       patch_h += 1;  // patch_h * num_height - s.height
1533     }
1534   }
1535   int32_t patch_w = s.width / num_width;
1536   if (s.width % num_width != 0) {
1537     if (slice_mode == SliceMode::kPad) {
1538       patch_w += 1;  // patch_w * num_width - s.width
1539     }
1540   }
1541   (*patch_size)->first = patch_h;
1542   (*patch_size)->second = patch_w;
1543   return Status::OK();
1544 }
1545 
SlicePatches(const std::shared_ptr<Tensor> & input,std::vector<std::shared_ptr<Tensor>> * output,int32_t num_height,int32_t num_width,SliceMode slice_mode,uint8_t fill_value)1546 Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::shared_ptr<Tensor>> *output,
1547                     int32_t num_height, int32_t num_width, SliceMode slice_mode, uint8_t fill_value) {
1548   if (num_height == DEFAULT_NUM_HEIGHT && num_width == DEFAULT_NUM_WIDTH) {
1549     (*output).push_back(input);
1550     return Status::OK();
1551   }
1552 
1553   auto patch_size = std::make_shared<std::pair<int32_t, int32_t>>(0, 0);
1554   int32_t patch_h = 0;
1555   int32_t patch_w = 0;
1556 
1557   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1558   RETURN_IF_NOT_OK(ComputePatchSize(input_cv, &patch_size, num_height, num_width, slice_mode));
1559   std::tie(patch_h, patch_w) = *patch_size;
1560 
1561   cv::Mat in_img = input_cv->mat();
1562   cv::Size s = in_img.size();
1563   try {
1564     cv::Mat out_img;
1565     if (slice_mode == SliceMode::kPad) {  // padding on right and bottom directions
1566       auto padding_h = patch_h * num_height - s.height;
1567       auto padding_w = patch_w * num_width - s.width;
1568       out_img = cv::Mat(s.height + padding_h, s.width + padding_w, in_img.type(), cv::Scalar::all(fill_value));
1569       in_img.copyTo(out_img(cv::Rect(0, 0, s.width, s.height)));
1570     } else {
1571       out_img = in_img;
1572     }
1573     for (int i = 0; i < num_height; ++i) {
1574       for (int j = 0; j < num_width; ++j) {
1575         std::shared_ptr<CVTensor> patch_cv;
1576         cv::Rect rect(j * patch_w, i * patch_h, patch_w, patch_h);
1577         cv::Mat patch(out_img(rect));
1578         RETURN_IF_NOT_OK(CVTensor::CreateFromMat(patch, input_cv->Rank(), &patch_cv));
1579         (*output).push_back(std::static_pointer_cast<Tensor>(patch_cv));
1580       }
1581     }
1582     return Status::OK();
1583   } catch (const cv::Exception &e) {
1584     RETURN_STATUS_UNEXPECTED("SlicePatches: " + std::string(e.what()));
1585   }
1586 }
1587 
ValidateImageRank(const std::string & op_name,int32_t rank)1588 Status ValidateImageRank(const std::string &op_name, int32_t rank) {
1589   if (rank != 2 && rank != 3) {
1590     std::string err_msg = op_name + ": image shape is not <H,W,C> or <H, W>, but got rank:" + std::to_string(rank);
1591     RETURN_STATUS_UNEXPECTED(err_msg);
1592   }
1593   return Status::OK();
1594 }
1595 }  // namespace dataset
1596 }  // namespace mindspore
1597