• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/kernels/image/lite_image_utils.h"
17 
18 #include <limits>
19 #include <stdexcept>
20 #include <utility>
21 #include <vector>
22 
23 #if defined(ENABLE_MINDDATA_PYTHON)
24 #include <opencv2/imgproc/types_c.h>
25 #include <opencv2/imgcodecs.hpp>
26 #include <opencv2/imgproc/imgproc.hpp>
27 #endif
28 
29 #if defined(ENABLE_MINDDATA_PYTHON)
30 #include "minddata/dataset/core/cv_tensor.h"
31 #endif
32 #include "minddata/dataset/core/tensor.h"
33 #include "minddata/dataset/core/tensor_shape.h"
34 #include "minddata/dataset/include/dataset/constants.h"
35 #include "minddata/dataset/kernels/image/lite_cv/image_process.h"
36 #include "minddata/dataset/kernels/image/lite_cv/lite_mat.h"
37 #include "minddata/dataset/kernels/image/math_utils.h"
38 #if defined(ENABLE_MINDDATA_PYTHON)
39 #include "minddata/dataset/kernels/image/resize_cubic_op.h"
40 #endif
41 #include "minddata/dataset/util/random.h"
42 
43 constexpr int64_t hw_shape = 2;
44 constexpr int64_t hwc_rank = 3;
45 
46 #define MAX_INT_PRECISION 16777216  // float int precision is 16777216
47 namespace mindspore {
48 namespace dataset {
49 #if defined(ENABLE_MINDDATA_PYTHON)
IsNonEmptyPNG(const std::shared_ptr<Tensor> & input)50 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input) {
51   const unsigned char kPngMagic[] = "\x89\x50\x4E\x47";
52   constexpr dsize_t kPngMagicLen = 4;
53   return input->SizeInBytes() > kPngMagicLen && memcmp(input->GetBuffer(), kPngMagic, kPngMagicLen) == 0;
54 }
55 
Rescale(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float rescale,float shift)56 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift) {
57   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
58   if (!input_cv->mat().data) {
59     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Rescale: load image failed.");
60   }
61   cv::Mat input_image = input_cv->mat();
62   std::shared_ptr<CVTensor> output_cv;
63   RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv));
64   try {
65     input_image.convertTo(output_cv->mat(), CV_32F, rescale, shift);
66     *output = std::static_pointer_cast<Tensor>(output_cv);
67   } catch (const cv::Exception &e) {
68     RETURN_STATUS_UNEXPECTED("Rescale: " + std::string(e.what()));
69   }
70   return Status::OK();
71 }
72 
SwapRedAndBlue(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)73 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
74   try {
75     RETURN_IF_NOT_OK(ValidateImage(input, "SwapRedBlue", {3, 5, 11}));
76     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
77     CHECK_FAIL_RETURN_UNEXPECTED(
78       input_cv->shape().Size() > kChannelIndexHWC,
79       "SwapRedAndBlue: rank of input data should be greater than:" + std::to_string(kChannelIndexHWC) +
80         ", but got:" + std::to_string(input_cv->shape().Size()));
81     int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
82     if (input_cv->shape().Size() != kDefaultImageRank || num_channels != kDefaultImageChannel) {
83       RETURN_STATUS_UNEXPECTED("SwapRedBlue: image shape should be in <H,W,C> format, but got:" +
84                                input_cv->shape().ToString());
85     }
86     std::shared_ptr<CVTensor> output_cv;
87     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
88 
89     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_BGR2RGB));
90     *output = std::static_pointer_cast<Tensor>(output_cv);
91     return Status::OK();
92   } catch (const cv::Exception &e) {
93     RETURN_STATUS_UNEXPECTED("SwapRedBlue: " + std::string(e.what()));
94   }
95 }
96 #endif
97 
IsNonEmptyJPEG(const std::shared_ptr<Tensor> & input)98 bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input) {
99   const unsigned char *kJpegMagic = (unsigned char *)"\xFF\xD8\xFF";
100   constexpr size_t kJpegMagicLen = 3;
101   return input->SizeInBytes() > kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0;
102 }
103 
JpegInitSource(j_decompress_ptr cinfo)104 static void JpegInitSource(j_decompress_ptr cinfo) {}
105 
JpegFillInputBuffer(j_decompress_ptr cinfo)106 static boolean JpegFillInputBuffer(j_decompress_ptr cinfo) {
107   if (cinfo->src->bytes_in_buffer == 0) {
108     // Under ARM platform raise runtime_error may cause core problem,
109     // so we catch runtime_error and just return FALSE.
110     try {
111       ERREXIT(cinfo, JERR_INPUT_EMPTY);
112     } catch (const std::exception &e) {
113       return FALSE;
114     }
115     return FALSE;
116   }
117   return TRUE;
118 }
119 
JpegTermSource(j_decompress_ptr cinfo)120 static void JpegTermSource(j_decompress_ptr cinfo) {}
121 
JpegSkipInputData(j_decompress_ptr cinfo,int64_t jump)122 static void JpegSkipInputData(j_decompress_ptr cinfo, int64_t jump) {
123   if (jump < 0) {
124     return;
125   }
126   if (static_cast<size_t>(jump) > cinfo->src->bytes_in_buffer) {
127     cinfo->src->bytes_in_buffer = 0;
128     return;
129   } else {
130     cinfo->src->bytes_in_buffer -= jump;
131     cinfo->src->next_input_byte += jump;
132   }
133 }
134 
JpegSetSource(j_decompress_ptr cinfo,const void * data,int64_t datasize)135 void JpegSetSource(j_decompress_ptr cinfo, const void *data, int64_t datasize) {
136   cinfo->src = static_cast<struct jpeg_source_mgr *>(
137     (*cinfo->mem->alloc_small)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)));
138   cinfo->src->init_source = JpegInitSource;
139   cinfo->src->fill_input_buffer = JpegFillInputBuffer;
140 #if defined(_WIN32) || defined(_WIN64) || defined(ENABLE_ARM32)
141   // the following line skips CI because it uses underlying C type
142   cinfo->src->skip_input_data = reinterpret_cast<void (*)(j_decompress_ptr, long)>(JpegSkipInputData);  // NOLINT.
143 #else
144   cinfo->src->skip_input_data = JpegSkipInputData;
145 #endif
146   cinfo->src->resync_to_restart = jpeg_resync_to_restart;
147   cinfo->src->term_source = JpegTermSource;
148   cinfo->src->bytes_in_buffer = datasize;
149   cinfo->src->next_input_byte = static_cast<const JOCTET *>(data);
150 }
151 
JpegReadScanlines(jpeg_decompress_struct * const cinfo,int max_scanlines_to_read,JSAMPLE * buffer,int buffer_size,int crop_w,int crop_w_aligned,int offset,int stride)152 static Status JpegReadScanlines(jpeg_decompress_struct *const cinfo, int max_scanlines_to_read, JSAMPLE *buffer,
153                                 int buffer_size, int crop_w, int crop_w_aligned, int offset, int stride) {
154   // scanlines will be read to this buffer first, must have the number
155   // of components equal to the number of components in the image
156   int64_t scanline_size = crop_w_aligned * cinfo->output_components;
157   std::vector<JSAMPLE> scanline(scanline_size);
158   JSAMPLE *scanline_ptr = &scanline[0];
159   while (cinfo->output_scanline < static_cast<unsigned int>(max_scanlines_to_read)) {
160     unsigned int num_lines_read = 0;
161     try {
162       num_lines_read = jpeg_read_scanlines(cinfo, &scanline_ptr, 1);
163     } catch (const std::exception &e) {
164       RETURN_STATUS_UNEXPECTED("Decode: jpeg_read_scanlines error.");
165     }
166     if (cinfo->out_color_space == JCS_CMYK && num_lines_read > 0) {
167       for (int i = 0; i < crop_w; ++i) {
168         const int cmyk_pixel = 4 * i + offset;
169         const int c = scanline_ptr[cmyk_pixel];
170         const int m = scanline_ptr[cmyk_pixel + 1];
171         const int y = scanline_ptr[cmyk_pixel + 2];
172         const int k = scanline_ptr[cmyk_pixel + 3];
173         int r, g, b;
174         if (cinfo->saw_Adobe_marker) {
175           r = (k * c) / kMaxPixelValue;
176           g = (k * m) / kMaxPixelValue;
177           b = (k * y) / kMaxPixelValue;
178         } else {
179           r = (kMaxPixelValue - c) * (kMaxPixelValue - k) / kMaxPixelValue;
180           g = (kMaxPixelValue - m) * (kMaxPixelValue - k) / kMaxPixelValue;
181           b = (kMaxPixelValue - y) * (kMaxPixelValue - k) / kMaxPixelValue;
182         }
183         constexpr int buffer_rgb_val_size = 3;
184         constexpr int channel_red = 0;
185         constexpr int channel_green = 1;
186         constexpr int channel_blue = 2;
187         buffer[buffer_rgb_val_size * i + channel_red] = r;
188         buffer[buffer_rgb_val_size * i + channel_green] = g;
189         buffer[buffer_rgb_val_size * i + channel_blue] = b;
190       }
191     } else if (num_lines_read > 0) {
192       auto copy_status = memcpy_s(buffer, buffer_size, scanline_ptr + offset, stride);
193       if (copy_status != 0) {
194         jpeg_destroy_decompress(cinfo);
195         RETURN_STATUS_UNEXPECTED("Decode: memcpy_s failed");
196       }
197     } else {
198       jpeg_destroy_decompress(cinfo);
199       std::string err_msg = "Decode: failed to decompress image.";
200       RETURN_STATUS_UNEXPECTED(err_msg);
201     }
202     buffer += stride;
203     buffer_size = buffer_size - stride;
204   }
205   return Status::OK();
206 }
207 
JpegSetColorSpace(jpeg_decompress_struct * cinfo)208 static Status JpegSetColorSpace(jpeg_decompress_struct *cinfo) {
209   switch (cinfo->num_components) {
210     case 1:
211       // we want to output 3 components if it's grayscale
212       cinfo->out_color_space = JCS_RGB;
213       return Status::OK();
214     case 3:
215       cinfo->out_color_space = JCS_RGB;
216       return Status::OK();
217     case 4:
218       // Need to manually convert to RGB
219       cinfo->out_color_space = JCS_CMYK;
220       return Status::OK();
221     default:
222       jpeg_destroy_decompress(cinfo);
223       std::string err_msg = "Decode: failed to decompress image.";
224       RETURN_STATUS_UNEXPECTED(err_msg);
225   }
226 }
227 
JpegErrorExitCustom(j_common_ptr cinfo)228 void JpegErrorExitCustom(j_common_ptr cinfo) {
229   char jpeg_last_error_msg[JMSG_LENGTH_MAX];
230   (*(cinfo->err->format_message))(cinfo, jpeg_last_error_msg);
231   throw std::runtime_error(jpeg_last_error_msg);
232 }
233 
JpegCropAndDecode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int crop_x,int crop_y,int crop_w,int crop_h)234 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int crop_x, int crop_y,
235                          int crop_w, int crop_h) {
236   struct jpeg_decompress_struct cinfo {};
237   auto DestroyDecompressAndReturnError = [&cinfo](const std::string &err) {
238     jpeg_destroy_decompress(&cinfo);
239     RETURN_STATUS_UNEXPECTED(err);
240   };
241   struct JpegErrorManagerCustom jerr {};
242   cinfo.err = jpeg_std_error(&jerr.pub);
243   jerr.pub.error_exit = JpegErrorExitCustom;
244   try {
245     jpeg_create_decompress(&cinfo);
246     JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
247     (void)jpeg_read_header(&cinfo, TRUE);
248     RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo));
249     jpeg_calc_output_dimensions(&cinfo);
250   } catch (const std::exception &e) {
251     return DestroyDecompressAndReturnError(e.what());
252   }
253   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_w) > crop_x, "invalid crop width");
254   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_h) > crop_y, "invalid crop height");
255   if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) {
256     crop_w = static_cast<int>(cinfo.output_width);
257     crop_h = static_cast<int>(cinfo.output_height);
258   } else if (crop_w == 0 || static_cast<unsigned int>(crop_w + crop_x) > cinfo.output_width || crop_h == 0 ||
259              static_cast<unsigned int>(crop_h + crop_y) > cinfo.output_height) {
260     return DestroyDecompressAndReturnError("Decode: invalid crop size");
261   }
262   const int mcu_size = cinfo.min_DCT_scaled_size;
263   CHECK_FAIL_RETURN_UNEXPECTED(mcu_size != 0, "Invalid data.");
264   unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size;
265   unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned;
266   try {
267     (void)jpeg_start_decompress(&cinfo);
268     jpeg_crop_scanline(&cinfo, &crop_x_aligned, &crop_w_aligned);
269   } catch (const std::exception &e) {
270     return DestroyDecompressAndReturnError(e.what());
271   }
272   JDIMENSION skipped_scanlines = jpeg_skip_scanlines(&cinfo, crop_y);
273   // three number of output components, always convert to RGB and output
274   constexpr int kOutNumComponents = 3;
275   TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
276   std::shared_ptr<Tensor> output_tensor;
277   RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
278   const int buffer_size = static_cast<int>(output_tensor->SizeInBytes());
279   JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
280   // stride refers to output tensor, which has 3 components at most
281   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - skipped_scanlines) > crop_h,
282                                "Invalid crop height.");
283   const int max_scanlines_to_read = static_cast<int>(skipped_scanlines) + crop_h;
284   // stride refers to output tensor, which has 3 components at most
285   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / crop_w) > kOutNumComponents,
286                                "Invalid crop width.");
287   const int stride = crop_w * kOutNumComponents;
288   // offset is calculated for scanlines read from the image, therefore
289   // has the same number of components as the image
290   const int offset = (crop_x - static_cast<int>(crop_x_aligned)) * cinfo.output_components;
291   RETURN_IF_NOT_OK(
292     JpegReadScanlines(&cinfo, max_scanlines_to_read, buffer, buffer_size, crop_w, crop_w_aligned, offset, stride));
293   *output = output_tensor;
294   jpeg_destroy_decompress(&cinfo);
295   return Status::OK();
296 }
297 
GetLiteCVDataType(const DataType & data_type)298 static LDataType GetLiteCVDataType(const DataType &data_type) {
299   if (data_type == DataType::DE_UINT8) {
300     return LDataType::UINT8;
301   } else if (data_type == DataType::DE_FLOAT32) {
302     return LDataType::FLOAT32;
303   } else {
304     return LDataType::UNKNOWN;
305   }
306 }
307 
308 #if defined(ENABLE_MINDDATA_PYTHON)
DecodeCv(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)309 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
310   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
311   if (!input_cv->mat().data) {
312     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: load image failed.");
313   }
314   try {
315     cv::Mat img_mat = cv::imdecode(input_cv->mat(), cv::IMREAD_COLOR | cv::IMREAD_IGNORE_ORIENTATION);
316     if (img_mat.data == nullptr) {
317       std::string err = "Decode: image decode failed.";
318       RETURN_STATUS_UNEXPECTED(err);
319     }
320     cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB));
321     std::shared_ptr<CVTensor> output_cv;
322     const dsize_t rank_num = 3;
323     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, rank_num, &output_cv));
324     *output = std::static_pointer_cast<Tensor>(output_cv);
325     return Status::OK();
326   } catch (const cv::Exception &e) {
327     RETURN_STATUS_UNEXPECTED("Decode: " + std::string(e.what()));
328   }
329 }
330 #endif
331 
Decode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)332 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
333   if (IsNonEmptyJPEG(input)) {
334     return JpegCropAndDecode(input, output);
335   } else {
336 #if defined(ENABLE_MINDDATA_PYTHON)
337     return DecodeCv(input, output);
338 #else
339     RETURN_STATUS_UNEXPECTED("Decode: Decode only supports jpeg for android");
340 #endif
341   }
342 }
343 
Crop(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int x,int y,int w,int h)344 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h) {
345   if (input->Rank() != 3 && input->Rank() != 2) {
346     RETURN_STATUS_UNEXPECTED("Crop: image shape is not <H,W,C> or <H,W>");
347   }
348 
349   if (input->type() != DataType::DE_FLOAT32 && input->type() != DataType::DE_UINT8) {
350     RETURN_STATUS_UNEXPECTED("Crop: image datatype is not float32 or uint8");
351   }
352 
353   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - y) > h, "Invalid crop height.");
354   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - x) > w, "Invalid crop width.");
355   // account for integer overflow
356   if (y < 0 || (y + h) > input->shape()[0] || (y + h) < 0) {
357     RETURN_STATUS_UNEXPECTED(
358       "Crop: invalid y coordinate value for crop"
359       "y coordinate value exceeds the boundary of the image.");
360   }
361   // account for integer overflow
362   if (x < 0 || (x + w) > input->shape()[1] || (x + w) < 0) {
363     RETURN_STATUS_UNEXPECTED(
364       "Crop: invalid x coordinate value for crop"
365       "x coordinate value exceeds the boundary of the image.");
366   }
367 
368   try {
369     LiteMat lite_mat_rgb;
370     TensorShape shape{h, w};
371     int input_height = static_cast<int>(input->shape()[0]);
372     int input_width = static_cast<int>(input->shape()[1]);
373     int input_channel = static_cast<int>(input->shape()[2]);
374     if (input->Rank() == 2) {
375       lite_mat_rgb.Init(input_width, input_height,
376                         const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
377                         GetLiteCVDataType(input->type()));
378     } else {  // rank == 3
379       lite_mat_rgb.Init(input_width, input_height, input_channel,
380                         const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
381                         GetLiteCVDataType(input->type()));
382       shape = shape.AppendDim(input_channel);
383     }
384     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_rgb.IsEmpty(), "Crop: Init image tensor failed, return empty tensor.");
385 
386     std::shared_ptr<Tensor> output_tensor;
387     RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, input->type(), &output_tensor));
388 
389     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
390     LiteMat lite_mat_cut;
391 
392     lite_mat_cut.Init(w, h, lite_mat_rgb.channel_, reinterpret_cast<void *>(buffer), GetLiteCVDataType(input->type()));
393     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_cut.IsEmpty(), "Crop: Init image tensor failed, return empty tensor.");
394 
395     bool ret = Crop(lite_mat_rgb, lite_mat_cut, x, y, w, h);
396     CHECK_FAIL_RETURN_UNEXPECTED(ret, "Crop: image crop failed.");
397 
398     *output = output_tensor;
399     return Status::OK();
400   } catch (const std::exception &e) {
401     RETURN_STATUS_UNEXPECTED("Crop: " + std::string(e.what()));
402   }
403   return Status::OK();
404 }
405 
GetJpegImageInfo(const std::shared_ptr<Tensor> & input,int * img_width,int * img_height)406 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height) {
407   struct jpeg_decompress_struct cinfo {};
408   struct JpegErrorManagerCustom jerr {};
409   cinfo.err = jpeg_std_error(&jerr.pub);
410   jerr.pub.error_exit = JpegErrorExitCustom;
411   try {
412     jpeg_create_decompress(&cinfo);
413     JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
414     (void)jpeg_read_header(&cinfo, TRUE);
415     jpeg_calc_output_dimensions(&cinfo);
416   } catch (const std::exception &e) {
417     jpeg_destroy_decompress(&cinfo);
418     RETURN_STATUS_UNEXPECTED(e.what());
419   }
420   *img_height = static_cast<int>(cinfo.output_height);
421   *img_width = static_cast<int>(cinfo.output_width);
422   jpeg_destroy_decompress(&cinfo);
423   return Status::OK();
424 }
425 
Normalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::vector<float> & vec_mean,const std::vector<float> & vec_std)426 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
427                  const std::vector<float> &vec_mean, const std::vector<float> &vec_std) {
428   if (input->Rank() != 3) {
429     RETURN_STATUS_UNEXPECTED("Normalize: image shape is not <H,W,C>.");
430   }
431 
432   if (input->type() != DataType::DE_UINT8 && input->type() != DataType::DE_FLOAT32) {
433     RETURN_STATUS_UNEXPECTED("Normalize: image datatype is not uint8 or float32.");
434   }
435 
436   try {
437     LiteMat lite_mat_norm;
438     bool ret = false;
439     int input_height = static_cast<int>(input->shape()[0]);
440     int input_width = static_cast<int>(input->shape()[1]);
441     int input_channel = static_cast<int>(input->shape()[2]);
442     LiteMat lite_mat_rgb(input_width, input_height, input_channel,
443                          const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
444                          GetLiteCVDataType(input->type()));
445 
446     if (input->type() == DataType::DE_UINT8) {
447       LiteMat lite_mat_float;
448       // change input to float
449       ret = ConvertTo(lite_mat_rgb, lite_mat_float, 1.0);
450       CHECK_FAIL_RETURN_UNEXPECTED(ret, "Normalize: convert to float datatype failed.");
451       ret = SubStractMeanNormalize(lite_mat_float, lite_mat_norm, vec_mean, vec_std);
452     } else {  // float32
453       ret = SubStractMeanNormalize(lite_mat_rgb, lite_mat_norm, vec_mean, vec_std);
454     }
455     CHECK_FAIL_RETURN_UNEXPECTED(ret, "Normalize: normalize failed.");
456 
457     std::shared_ptr<Tensor> output_tensor;
458     RETURN_IF_NOT_OK(Tensor::CreateFromMemory(input->shape(), DataType(DataType::DE_FLOAT32),
459                                               static_cast<uchar *>(lite_mat_norm.data_ptr_), &output_tensor));
460 
461     *output = output_tensor;
462   } catch (const std::exception &e) {
463     RETURN_STATUS_UNEXPECTED("Normalize: " + std::string(e.what()));
464   }
465   return Status::OK();
466 }
467 
468 #if defined(ENABLE_MINDDATA_PYTHON)
GetCVInterpolationMode(InterpolationMode mode)469 int GetCVInterpolationMode(InterpolationMode mode) {
470   switch (mode) {
471     case InterpolationMode::kLinear:
472       return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
473     case InterpolationMode::kCubic:
474       return static_cast<int>(cv::InterpolationFlags::INTER_CUBIC);
475     case InterpolationMode::kArea:
476       return static_cast<int>(cv::InterpolationFlags::INTER_AREA);
477     case InterpolationMode::kNearestNeighbour:
478       return static_cast<int>(cv::InterpolationFlags::INTER_NEAREST);
479     default:
480       return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
481   }
482 }
483 
Resize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t output_height,int32_t output_width,double fx,double fy,InterpolationMode mode)484 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height,
485               int32_t output_width, double fx, double fy, InterpolationMode mode) {
486   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
487   if (!input_cv->mat().data) {
488     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Resize: load image failed.");
489   }
490   RETURN_IF_NOT_OK(ValidateImageRank("Resize", input_cv->Rank()));
491 
492   cv::Mat in_image = input_cv->mat();
493   const uint32_t kResizeShapeLimits = 1000;
494   // resize image too large or too small, 1000 is arbitrarily chosen here to prevent open cv from segmentation fault
495   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kResizeShapeLimits) > in_image.rows,
496                                "Resize: in_image rows out of bounds.");
497   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kResizeShapeLimits) > in_image.cols,
498                                "Resize: in_image cols out of bounds.");
499   if (output_height > in_image.rows * kResizeShapeLimits || output_width > in_image.cols * kResizeShapeLimits) {
500     RETURN_STATUS_ERROR(
501       StatusCode::kMDShapeMisMatch,
502       "Resize: the resizing width or height is too big, it's 1000 times bigger than the original image, got output "
503       "height: " +
504         std::to_string(output_height) + ", width: " + std::to_string(output_width) +
505         ", and original image size:" + std::to_string(in_image.rows) + ", " + std::to_string(in_image.cols));
506   }
507   if (output_height == 0 || output_width == 0) {
508     RETURN_STATUS_ERROR(StatusCode::kMDShapeMisMatch,
509                         "Resize: the input value of 'resize' is invalid, width or height is zero.");
510   }
511 
512   if (mode == InterpolationMode::kCubicPil) {
513     if (input_cv->shape().Size() != kDefaultImageChannel ||
514         input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
515       RETURN_STATUS_UNEXPECTED("Resize: Interpolation mode PILCUBIC only supports image with 3 channels, but got: " +
516                                input_cv->shape().ToString());
517     }
518 
519     LiteMat im_in;
520     LiteMat im_out;
521     std::shared_ptr<Tensor> output_tensor;
522     TensorShape new_shape = TensorShape({output_height, output_width, 3});
523     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input_cv->type(), &output_tensor));
524     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
525     im_out.Init(output_width, output_height, static_cast<int>(input_cv->shape()[kChannelIndexHWC]),
526                 reinterpret_cast<void *>(buffer), LDataType::UINT8);
527     im_in.Init(static_cast<int>(input_cv->shape()[1]), static_cast<int>(input_cv->shape()[0]),
528                static_cast<int>(input_cv->shape()[kChannelIndexHWC]), input_cv->mat().data, LDataType::UINT8);
529     CHECK_FAIL_RETURN_UNEXPECTED(!im_out.IsEmpty(), "Resize: Init image tensor failed, return empty tensor.");
530     CHECK_FAIL_RETURN_UNEXPECTED(!im_in.IsEmpty(), "Resize: Init image tensor failed, return empty tensor.");
531     if (ResizeCubic(im_in, im_out, output_width, output_height) == false) {
532       RETURN_STATUS_UNEXPECTED("Resize: failed to do resize, please check the error msg.");
533     }
534     *output = output_tensor;
535     return Status::OK();
536   }
537   try {
538     TensorShape shape{output_height, output_width};
539     if (input_cv->Rank() == kDefaultImageRank) {
540       int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
541       shape = shape.AppendDim(num_channels);
542     }
543     std::shared_ptr<CVTensor> output_cv;
544     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
545 
546     auto cv_mode = GetCVInterpolationMode(mode);
547     cv::resize(in_image, output_cv->mat(), cv::Size(output_width, output_height), fx, fy, cv_mode);
548     *output = std::static_pointer_cast<Tensor>(output_cv);
549     return Status::OK();
550   } catch (const cv::Exception &e) {
551     RETURN_STATUS_UNEXPECTED("Resize: " + std::string(e.what()));
552   }
553 }
554 
555 #else
Resize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t output_height,int32_t output_width,double fx,double fy,InterpolationMode mode)556 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height,
557               int32_t output_width, double fx, double fy, InterpolationMode mode) {
558   if (mode != InterpolationMode::kLinear) {
559     RETURN_STATUS_UNEXPECTED("Resize: Only Liner interpolation is supported currently.");
560   }
561   if (input->Rank() != 3 && input->Rank() != 2) {
562     RETURN_STATUS_UNEXPECTED("Resize: input image is not in shape of <H,W,C> or <H,W>");
563   }
564   if (input->type() != DataType::DE_UINT8) {
565     RETURN_STATUS_UNEXPECTED("Resize: image datatype is not uint8.");
566   }
567   // resize image too large or too small
568   const int height_width_scale_limit = 1000;
569   if (output_height == 0 || output_height > input->shape()[0] * height_width_scale_limit || output_width == 0 ||
570       output_width > input->shape()[1] * height_width_scale_limit) {
571     std::string err_msg =
572       "Resize: the resizing width or height 1) is too big, it's up to "
573       "1000 times the original image; 2) can not be 0.";
574     return Status(StatusCode::kMDShapeMisMatch, err_msg);
575   }
576   try {
577     LiteMat lite_mat_rgb;
578     TensorShape shape{output_height, output_width};
579     if (input->Rank() == 2) {
580       lite_mat_rgb.Init(input->shape()[1], input->shape()[0],
581                         const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
582                         GetLiteCVDataType(input->type()));
583     } else {  // rank == 3
584       lite_mat_rgb.Init(input->shape()[1], input->shape()[0], input->shape()[2],
585                         const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
586                         GetLiteCVDataType(input->type()));
587       int num_channels = input->shape()[2];
588       shape = shape.AppendDim(num_channels);
589     }
590     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_rgb.IsEmpty(), "Resize: Init image tensor failed, return empty tensor.");
591 
592     LiteMat lite_mat_resize;
593     std::shared_ptr<Tensor> output_tensor;
594     RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, input->type(), &output_tensor));
595 
596     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
597 
598     lite_mat_resize.Init(output_width, output_height, lite_mat_rgb.channel_, reinterpret_cast<void *>(buffer),
599                          GetLiteCVDataType(input->type()));
600     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_resize.IsEmpty(), "Resize: Init image tensor failed, return empty tensor.");
601 
602     bool ret = ResizeBilinear(lite_mat_rgb, lite_mat_resize, output_width, output_height);
603     CHECK_FAIL_RETURN_UNEXPECTED(ret, "Resize: bilinear resize failed.");
604 
605     *output = output_tensor;
606   } catch (const std::exception &e) {
607     RETURN_STATUS_UNEXPECTED("Resize: " + std::string(e.what()));
608   }
609   return Status::OK();
610 }
611 #endif
612 
ResizePreserve(const TensorRow & inputs,int32_t height,int32_t width,int32_t img_orientation,TensorRow * outputs)613 Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation,
614                       TensorRow *outputs) {
615   constexpr int64_t size = 3;
616   outputs->resize(size);
617   CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() > 0,
618                                "Invalid input, should be greater than 0, but got " + std::to_string(inputs.size()));
619   const std::shared_ptr<Tensor> &input = inputs[0];
620   CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be greater than 3 dimensions.");
621   int input_height = static_cast<int>(input->shape()[0]);
622   int input_width = static_cast<int>(input->shape()[1]);
623   int input_channel = static_cast<int>(input->shape()[2]);
624   LiteMat lite_mat_src(input_width, input_height, input_channel,
625                        const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
626                        GetLiteCVDataType(input->type()));
627 
628   LiteMat lite_mat_dst;
629   std::shared_ptr<Tensor> image_tensor;
630   TensorShape new_shape = TensorShape({height, width, input->shape()[2]});
631   RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, DataType(DataType::DE_FLOAT32), &image_tensor));
632   uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*image_tensor->begin<uint8_t>()));
633   lite_mat_dst.Init(width, height, input_channel, reinterpret_cast<void *>(buffer), LDataType::FLOAT32);
634   CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_dst.IsEmpty(), "Resize: Init image tensor failed, return empty tensor.");
635 
636   float ratioShiftWShiftH[3] = {0};
637   float invM[2][3] = {{0, 0, 0}, {0, 0, 0}};
638   bool ret =
639     ResizePreserveARWithFiller(lite_mat_src, lite_mat_dst, height, width, &ratioShiftWShiftH, &invM, img_orientation);
640   CHECK_FAIL_RETURN_UNEXPECTED(ret, "Resize: bilinear resize failed.");
641 
642   std::shared_ptr<Tensor> ratio_tensor;
643   TensorShape ratio_shape = TensorShape({3});
644   RETURN_IF_NOT_OK(Tensor::CreateFromMemory(ratio_shape, DataType(DataType::DE_FLOAT32),
645                                             reinterpret_cast<uint8_t *>(&ratioShiftWShiftH), &ratio_tensor));
646 
647   std::shared_ptr<Tensor> invM_tensor;
648   TensorShape invM_shape = TensorShape({2, 3});
649   RETURN_IF_NOT_OK(Tensor::CreateFromMemory(invM_shape, DataType(DataType::DE_FLOAT32),
650                                             reinterpret_cast<uint8_t *>(&invM), &invM_tensor));
651 
652   (*outputs)[0] = image_tensor;
653   (*outputs)[1] = ratio_tensor;
654   (*outputs)[2] = invM_tensor;
655   return Status::OK();
656 }
657 
RgbToBgr(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)658 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
659   if (input->Rank() != hwc_rank) {
660     RETURN_STATUS_UNEXPECTED("RgbToBgr: input image is not in shape of <H,W,C>");
661   }
662   if (input->type() != DataType::DE_UINT8) {
663     RETURN_STATUS_UNEXPECTED("RgbToBgr: image datatype is not uint8.");
664   }
665 
666   try {
667     int input_height = static_cast<int>(input->shape()[0]);
668     int input_width = static_cast<int>(input->shape()[1]);
669     int input_channel = static_cast<int>(input->shape()[1]);
670     LiteMat lite_mat_rgb(input_width, input_height, input_channel,
671                          const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
672                          GetLiteCVDataType(input->type()));
673     LiteMat lite_mat_convert;
674     std::shared_ptr<Tensor> output_tensor;
675     constexpr auto kInputChannel = 3;
676     TensorShape new_shape = TensorShape({input_height, input_width, kInputChannel});
677     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
678     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
679     lite_mat_convert.Init(input_width, input_height, kInputChannel, reinterpret_cast<void *>(buffer),
680                           GetLiteCVDataType(input->type()));
681     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_convert.IsEmpty(),
682                                  "RgbToBgr: Init image tensor failed, return empty tensor.");
683 
684     bool ret =
685       ConvertRgbToBgr(lite_mat_rgb, GetLiteCVDataType(input->type()), input_width, input_height, lite_mat_convert);
686     CHECK_FAIL_RETURN_UNEXPECTED(ret, "RgbToBgr: RGBToBGR failed.");
687 
688     *output = output_tensor;
689   } catch (const std::exception &e) {
690     RETURN_STATUS_UNEXPECTED("RgbToBgr: " + std::string(e.what()));
691   }
692   return Status::OK();
693 }
694 
RgbToGray(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)695 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
696   if (input->Rank() != 3) {
697     RETURN_STATUS_UNEXPECTED("RgbToGray: input image is not in shape of <H,W,C>");
698   }
699   if (input->type() != DataType::DE_UINT8) {
700     RETURN_STATUS_UNEXPECTED("RgbToGray: image datatype is not uint8.");
701   }
702 
703   try {
704     int input_height = static_cast<int>(input->shape()[0]);
705     int input_width = static_cast<int>(input->shape()[1]);
706     int input_channel = static_cast<int>(input->shape()[2]);
707     LiteMat lite_mat_rgb(input_width, input_height, input_channel,
708                          const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
709                          GetLiteCVDataType(input->type()));
710     LiteMat lite_mat_convert;
711     std::shared_ptr<Tensor> output_tensor;
712     TensorShape new_shape = TensorShape({input_height, input_width, 1});
713     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
714     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
715     lite_mat_convert.Init(input_width, input_height, 1, reinterpret_cast<void *>(buffer),
716                           GetLiteCVDataType(input->type()));
717     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_convert.IsEmpty(),
718                                  "RgbToBgr: Init image tensor failed, return empty tensor.");
719 
720     bool ret =
721       ConvertRgbToGray(lite_mat_rgb, GetLiteCVDataType(input->type()), input_width, input_height, lite_mat_convert);
722     CHECK_FAIL_RETURN_UNEXPECTED(ret, "RgbToGray: RGBToGRAY failed.");
723 
724     *output = output_tensor;
725   } catch (const std::exception &e) {
726     RETURN_STATUS_UNEXPECTED("RgbToGray: " + std::string(e.what()));
727   }
728   return Status::OK();
729 }
730 
Pad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const int32_t & pad_top,const int32_t & pad_bottom,const int32_t & pad_left,const int32_t & pad_right,const BorderType & border_types,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)731 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
732            const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
733            uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) {
734   if (input->Rank() != 3) {
735     RETURN_STATUS_UNEXPECTED("Pad: input image is not in shape of <H,W,C>");
736   }
737 
738   if (input->type() != DataType::DE_FLOAT32 && input->type() != DataType::DE_UINT8) {
739     RETURN_STATUS_UNEXPECTED("Pad: image datatype is not uint8 or float32.");
740   }
741 
742   if (pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0) {
743     RETURN_STATUS_UNEXPECTED(
744       "Pad: "
745       "the top, bottom, left, right of pad must be greater than 0.");
746   }
747 
748   try {
749     int input_height = static_cast<int>(input->shape()[0]);
750     int input_width = static_cast<int>(input->shape()[1]);
751     int input_channel = static_cast<int>(input->shape()[2]);
752     LiteMat lite_mat_rgb(input_width, input_height, input_channel,
753                          const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
754                          GetLiteCVDataType(input->type()));
755     LiteMat lite_mat_pad;
756 
757     std::shared_ptr<Tensor> output_tensor;
758 
759     CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_) > pad_left,
760                                  "Invalid pad width.");
761     CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_ + pad_left) > pad_right,
762                                  "Invalid pad width.");
763     int pad_width = lite_mat_rgb.width_ + pad_left + pad_right;
764     CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_) > pad_top,
765                                  "Invalid pad height.");
766     CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_ + pad_top) > pad_bottom,
767                                  "Invalid pad height.");
768     int pad_height = lite_mat_rgb.height_ + pad_top + pad_bottom;
769     TensorShape new_shape = TensorShape({pad_height, pad_width, input->shape()[2]});
770     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
771 
772     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
773 
774     lite_mat_pad.Init(pad_width, pad_height, lite_mat_rgb.channel_, reinterpret_cast<void *>(buffer),
775                       GetLiteCVDataType(input->type()));
776     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_pad.IsEmpty(), "Pad: Init image tensor failed, return empty tensor.");
777 
778     bool ret = Pad(lite_mat_rgb, lite_mat_pad, pad_top, pad_bottom, pad_left, pad_right,
779                    PaddBorderType::PADD_BORDER_CONSTANT, fill_r, fill_g, fill_b);
780     CHECK_FAIL_RETURN_UNEXPECTED(ret, "Pad: pad failed.");
781 
782     *output = output_tensor;
783   } catch (const std::exception &e) {
784     RETURN_STATUS_UNEXPECTED("Pad: " + std::string(e.what()));
785   }
786   return Status::OK();
787 }
788 
RotateAngleWithOutMirror(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const uint64_t orientation)789 static Status RotateAngleWithOutMirror(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
790                                        const uint64_t orientation) {
791   try {
792     int height = 0;
793     int width = 0;
794     double M[6] = {};
795 
796     int input_height = static_cast<int>(input->shape()[0]);
797     int input_width = static_cast<int>(input->shape()[1]);
798     int input_channel = static_cast<int>(input->shape()[2]);
799     LiteMat lite_mat_rgb(input_width, input_height, input_channel,
800                          const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
801                          GetLiteCVDataType(input->type()));
802 
803     // The 2D affine transformation matrix consists of 6 parameters (a, b, c, d, e, f)
804     // 0, 1, 2, 3, 4, 5 is the 6 parameters
805     if (orientation == 3) {
806       height = lite_mat_rgb.height_;
807       width = lite_mat_rgb.width_;
808       M[0] = -1.0f;
809       M[1] = 0.0f;
810       M[2] = lite_mat_rgb.width_ - 1;
811       M[3] = 0.0f;
812       M[4] = -1.0f;
813       M[5] = lite_mat_rgb.height_ - 1;
814     } else if (orientation == 6) {
815       height = lite_mat_rgb.width_;
816       width = lite_mat_rgb.height_;
817       M[0] = 0.0f;
818       M[1] = -1.0f;
819       M[2] = lite_mat_rgb.height_ - 1;
820       M[3] = 1.0f;
821       M[4] = 0.0f;
822       M[5] = 0.0f;
823     } else if (orientation == 8) {
824       height = lite_mat_rgb.width_;
825       width = lite_mat_rgb.height_;
826       M[0] = 0.0f;
827       M[1] = 1.0f;
828       M[2] = 0.0f;
829       M[3] = -1.0f;
830       M[4] = 0.0f;
831       M[5] = static_cast<float>(lite_mat_rgb.width_) - 1.0f;
832     } else {
833     }
834 
835     std::vector<size_t> dsize;
836     dsize.push_back(width);
837     dsize.push_back(height);
838     LiteMat lite_mat_affine;
839     std::shared_ptr<Tensor> output_tensor;
840     TensorShape new_shape = TensorShape({height, width, input->shape()[2]});
841     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
842     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
843     lite_mat_affine.Init(width, height, lite_mat_rgb.channel_, reinterpret_cast<void *>(buffer),
844                          GetLiteCVDataType(input->type()));
845     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_affine.IsEmpty(), "Rotate: Init image tensor failed, return empty tensor.");
846 
847     bool ret = Affine(lite_mat_rgb, lite_mat_affine, M, dsize, UINT8_C3(0, 0, 0));
848     CHECK_FAIL_RETURN_UNEXPECTED(ret, "Rotate: rotate failed.");
849 
850     *output = output_tensor;
851   } catch (const std::exception &e) {
852     RETURN_STATUS_UNEXPECTED("Rotate: " + std::string(e.what()));
853   }
854   return Status::OK();
855 }
856 
RotateAngleWithMirror(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const uint64_t orientation)857 static Status RotateAngleWithMirror(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
858                                     const uint64_t orientation) {
859   try {
860     int height = 0;
861     int width = 0;
862     double M[6] = {};
863     int input_height = static_cast<int>(input->shape()[0]);
864     int input_width = static_cast<int>(input->shape()[1]);
865     int input_channel = static_cast<int>(input->shape()[2]);
866     LiteMat lite_mat_rgb(input_width, input_height, input_channel,
867                          const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
868                          GetLiteCVDataType(input->type()));
869 
870     // The 2D affine transformation matrix consists of 6 parameters (a, b, c, d, e, f)
871     // 0, 1, 2, 3, 4, 5 is the 6 parameters
872     if (orientation == 2) {
873       height = lite_mat_rgb.height_;
874       width = lite_mat_rgb.width_;
875       M[0] = -1.0f;
876       M[1] = 0.0f;
877       M[2] = lite_mat_rgb.width_ - 1;
878       M[3] = 0.0f;
879       M[4] = 1.0f;
880       M[5] = 0.0f;
881     } else if (orientation == 5) {
882       height = lite_mat_rgb.width_;
883       width = lite_mat_rgb.height_;
884       M[0] = 0.0f;
885       M[1] = 1.0f;
886       M[2] = 0.0f;
887       M[3] = 1.0f;
888       M[4] = 0.0f;
889       M[5] = 0.0f;
890     } else if (orientation == 7) {
891       height = lite_mat_rgb.width_;
892       width = lite_mat_rgb.height_;
893       M[0] = 0.0f;
894       M[1] = -1.0f;
895       M[2] = lite_mat_rgb.height_ - 1;
896       M[3] = -1.0f;
897       M[4] = 0.0f;
898       M[5] = lite_mat_rgb.width_ - 1;
899     } else if (orientation == 4) {
900       height = lite_mat_rgb.height_;
901       width = lite_mat_rgb.width_;
902       M[0] = 1.0f;
903       M[1] = 0.0f;
904       M[2] = 0.0f;
905       M[3] = 0.0f;
906       M[4] = -1.0f;
907       M[5] = lite_mat_rgb.height_ - 1;
908     } else {
909     }
910     std::vector<size_t> dsize;
911     dsize.push_back(width);
912     dsize.push_back(height);
913     LiteMat lite_mat_affine;
914     std::shared_ptr<Tensor> output_tensor;
915     TensorShape new_shape = TensorShape({height, width, input->shape()[2]});
916     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
917     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
918     lite_mat_affine.Init(width, height, lite_mat_rgb.channel_, reinterpret_cast<void *>(buffer),
919                          GetLiteCVDataType(input->type()));
920     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_affine.IsEmpty(), "Rotate: Init image tensor failed, return empty tensor.");
921 
922     bool ret = Affine(lite_mat_rgb, lite_mat_affine, M, dsize, UINT8_C3(0, 0, 0));
923     CHECK_FAIL_RETURN_UNEXPECTED(ret, "Rotate: rotate failed.");
924 
925     *output = output_tensor;
926   } catch (const std::exception &e) {
927     RETURN_STATUS_UNEXPECTED("Rotate: " + std::string(e.what()));
928   }
929   return Status::OK();
930 }
931 
IsMirror(int orientation)932 static bool IsMirror(int orientation) {
933   if (orientation == 2 || orientation == 4 || orientation == 5 || orientation == 7) {
934     return true;
935   }
936   return false;
937 }
938 // rotate the image by EXIF orientation
Rotate(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const uint64_t orientation)939 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const uint64_t orientation) {
940   if (input->Rank() != hw_shape && input->Rank() != hwc_rank) {
941     RETURN_STATUS_UNEXPECTED("Rotate: input image is not in shape of <H,W,C> or <H,W>");
942   }
943 
944   if (input->type() != DataType::DE_FLOAT32 && input->type() != DataType::DE_UINT8) {
945     RETURN_STATUS_UNEXPECTED("Rotate: image datatype is not float32 or uint8.");
946   }
947 
948   if (!IsMirror(static_cast<int>(orientation))) {
949     return RotateAngleWithOutMirror(input, output, orientation);
950   } else {
951     return RotateAngleWithMirror(input, output, orientation);
952   }
953 }
954 
GetAffineMatrix(const std::shared_ptr<Tensor> & input,std::vector<float_t> * matrix,float_t degrees,const std::vector<float_t> & translation,float_t scale,const std::vector<float_t> & shear)955 Status GetAffineMatrix(const std::shared_ptr<Tensor> &input, std::vector<float_t> *matrix, float_t degrees,
956                        const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear) {
957   CHECK_FAIL_RETURN_UNEXPECTED(translation.size() >= 2, "AffineOp::Compute translation_ size should >= 2");
958   float_t translation_x = translation[0];
959   float_t translation_y = translation[1];
960   float_t degrees_tmp = 0.0;
961   RETURN_IF_NOT_OK(DegreesToRadians(degrees, &degrees_tmp));
962   float_t shear_x = shear[0];
963   float_t shear_y = shear[1];
964   RETURN_IF_NOT_OK(DegreesToRadians(shear_x, &shear_x));
965   RETURN_IF_NOT_OK(DegreesToRadians(-1 * shear_y, &shear_y));
966 
967   // Apply Affine Transformation
968   //       T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
969   //       C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1]
970   //       RSS is rotation with scale and shear matrix
971   //       RSS(a, s, (sx, sy)) =
972   //       = R(a) * S(s) * SHy(sy) * SHx(sx)
973   //       = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ]
974   //         [ s*sin(a - sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ]
975   //         [ 0                    , 0                                      , 1 ]
976   //
977   // where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears:
978   // SHx(s) = [1, -tan(s)] and SHy(s) = [1      , 0]
979   //          [0, 1      ]              [-tan(s), 1]
980   //
981   // Thus, the affine matrix is M = T * C * RSS * C^-1
982 
983   // image is hwc, rows = shape()[0]
984   float_t cx = (static_cast<float_t>(input->shape()[1]) - 1.0F) / 2.0F;
985   float_t cy = (static_cast<float_t>(input->shape()[0]) - 1.0F) / 2.0F;
986 
987   CHECK_FAIL_RETURN_UNEXPECTED(cos(shear_y) != 0.0, "AffineOp: cos(shear_y) should not be zero.");
988 
989   // Calculate RSS
990   *matrix = std::vector<float_t>{
991     static_cast<float>(scale * cos(degrees_tmp + shear_y) / cos(shear_y)),
992     static_cast<float>(scale * (-1 * cos(degrees_tmp + shear_y) * tan(shear_x) / cos(shear_y) - sin(degrees_tmp))),
993     0,
994     static_cast<float>(scale * sin(degrees_tmp + shear_y) / cos(shear_y)),
995     static_cast<float>(scale * (-1 * sin(degrees_tmp + shear_y) * tan(shear_x) / cos(shear_y) + cos(degrees_tmp))),
996     0};
997   // Compute T * C * RSS * C^-1
998   // Compute T * C * RSS * C^-1
999   (*matrix)[2] = (1 - (*matrix)[0]) * cx - (*matrix)[1] * cy + translation_x;
1000   (*matrix)[5] = (1 - (*matrix)[4]) * cy - (*matrix)[3] * cx + translation_y;
1001   return Status::OK();
1002 }
1003 
Affine(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float_t degrees,const std::vector<float_t> & translation,float_t scale,const std::vector<float_t> & shear,InterpolationMode interpolation,const std::vector<uint8_t> & fill_value)1004 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float_t degrees,
1005               const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear,
1006               InterpolationMode interpolation, const std::vector<uint8_t> &fill_value) {
1007   try {
1008     CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be 3.");
1009     if (interpolation != InterpolationMode::kLinear) {
1010       MS_LOG(WARNING) << "Only Bilinear interpolation supported for now";
1011     }
1012     std::vector<float_t> matrix;
1013     RETURN_IF_NOT_OK(GetAffineMatrix(input, &matrix, degrees, translation, scale, shear));
1014     int height = 0;
1015     int width = 0;
1016     CHECK_FAIL_RETURN_UNEXPECTED(matrix.size() <= 6, "Invalid mat shape.");
1017     double M[6] = {};
1018     for (size_t i = 0; i < matrix.size(); i++) {
1019       M[i] = static_cast<double>(matrix[i]);
1020     }
1021     int input_height = static_cast<int>(input->shape()[0]);
1022     int input_width = static_cast<int>(input->shape()[1]);
1023     int input_channel = static_cast<int>(input->shape()[2]);
1024     LiteMat lite_mat_rgb(input_width, input_height, input_channel,
1025                          const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
1026                          GetLiteCVDataType(input->type()));
1027 
1028     height = lite_mat_rgb.height_;
1029     width = lite_mat_rgb.width_;
1030     std::vector<size_t> dsize;
1031     dsize.push_back(width);
1032     dsize.push_back(height);
1033     LiteMat lite_mat_affine;
1034     std::shared_ptr<Tensor> output_tensor;
1035     TensorShape new_shape = TensorShape({height, width, input->shape()[2]});
1036     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
1037     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
1038     lite_mat_affine.Init(width, height, lite_mat_rgb.channel_, reinterpret_cast<void *>(buffer),
1039                          GetLiteCVDataType(input->type()));
1040     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_affine.IsEmpty(), "Affine: Init image tensor failed, return empty tensor.");
1041 
1042     bool ret = Affine(lite_mat_rgb, lite_mat_affine, M, dsize,
1043                       UINT8_C3(fill_value[kRIndex], fill_value[kGIndex], fill_value[kBIndex]));
1044     CHECK_FAIL_RETURN_UNEXPECTED(ret, "Affine: affine failed.");
1045 
1046     *output = output_tensor;
1047     return Status::OK();
1048   } catch (const std::exception &e) {
1049     RETURN_STATUS_UNEXPECTED("Affine: " + std::string(e.what()));
1050   }
1051 }
1052 
GaussianBlur(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t kernel_x,int32_t kernel_y,float sigma_x,float sigma_y)1053 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_x,
1054                     int32_t kernel_y, float sigma_x, float sigma_y) {
1055   try {
1056     LiteMat lite_mat_input;
1057     int input_height = static_cast<int>(input->shape()[0]);
1058     int input_width = static_cast<int>(input->shape()[1]);
1059     int input_channel = static_cast<int>(input->shape()[2]);
1060     if (input->Rank() == 3) {
1061       if (input->shape()[2] != 1 && input->shape()[2] != 3) {
1062         RETURN_STATUS_UNEXPECTED("GaussianBlur: input image is not in channel of 1 or 3");
1063       }
1064       lite_mat_input = LiteMat(input_width, input_height, input_channel,
1065                                const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
1066                                GetLiteCVDataType(input->type()));
1067     } else if (input->Rank() == 2) {
1068       lite_mat_input =
1069         LiteMat(input_width, input_height, const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
1070                 GetLiteCVDataType(input->type()));
1071     } else {
1072       RETURN_STATUS_UNEXPECTED("GaussianBlur: input image is not in shape of <H,W,C> or <H,W>");
1073     }
1074 
1075     std::shared_ptr<Tensor> output_tensor;
1076     RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), input->type(), &output_tensor));
1077     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
1078     LiteMat lite_mat_output;
1079     lite_mat_output.Init(lite_mat_input.width_, lite_mat_input.height_, lite_mat_input.channel_,
1080                          reinterpret_cast<void *>(buffer), GetLiteCVDataType(input->type()));
1081     CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_output.IsEmpty(),
1082                                  "GaussianBlur: Init image tensor failed, return empty tensor.");
1083 
1084     bool ret = GaussianBlur(lite_mat_input, lite_mat_output, {kernel_x, kernel_y}, static_cast<double>(sigma_x),
1085                             static_cast<double>(sigma_y));
1086     CHECK_FAIL_RETURN_UNEXPECTED(ret, "GaussianBlur: GaussianBlur failed.");
1087     *output = output_tensor;
1088     return Status::OK();
1089   } catch (const std::exception &e) {
1090     RETURN_STATUS_UNEXPECTED("GaussianBlur: " + std::string(e.what()));
1091   }
1092 }
1093 
ImageNumChannels(const std::shared_ptr<Tensor> & image,dsize_t * channels)1094 Status ImageNumChannels(const std::shared_ptr<Tensor> &image, dsize_t *channels) {
1095   if (image->Rank() < kMinImageRank) {
1096     RETURN_STATUS_UNEXPECTED(
1097       "GetImageNumChannels: invalid parameter, image should have at least two dimensions, but got: " +
1098       std::to_string(image->Rank()));
1099   } else if (image->Rank() == kMinImageRank) {
1100     *channels = 1;
1101   } else {
1102     *channels = image->shape()[-1];
1103   }
1104   return Status::OK();
1105 }
1106 
ValidateImage(const std::shared_ptr<Tensor> & image,const std::string & op_name,const std::set<uint8_t> & valid_dtype,const std::set<dsize_t> & valid_rank,const std::set<dsize_t> & valid_channel)1107 Status ValidateImage(const std::shared_ptr<Tensor> &image, const std::string &op_name,
1108                      const std::set<uint8_t> &valid_dtype, const std::set<dsize_t> &valid_rank,
1109                      const std::set<dsize_t> &valid_channel) {
1110   // Validate image dtype
1111   if (!valid_dtype.empty()) {
1112     auto dtype = image->type();
1113     if (valid_dtype.find(dtype.value()) == valid_dtype.end()) {
1114       std::string err_msg = op_name + ": the data type of image tensor does not match the requirement of operator.";
1115       err_msg += " Expecting tensor in type of " + DataTypeSetToString(valid_dtype);
1116       err_msg += ". But got type " + dtype.ToString() + ".";
1117       RETURN_STATUS_UNEXPECTED(err_msg);
1118     }
1119   }
1120   // Validate image rank
1121   auto rank = image->Rank();
1122   if (!valid_rank.empty()) {
1123     if (valid_rank.find(rank) == valid_rank.end()) {
1124       std::string err_msg = op_name + ": the dimension of image tensor does not match the requirement of operator.";
1125       err_msg += " Expecting tensor in dimension of " + NumberSetToString(valid_rank);
1126       if (valid_rank == std::set<dsize_t>({kMinImageRank, kDefaultImageRank})) {
1127         err_msg += ", in shape of <H, W> or <H, W, C>";
1128       } else if (valid_rank == std::set<dsize_t>({kMinImageRank})) {
1129         err_msg += ", in shape of <H, W>";
1130       } else if (valid_rank == std::set<dsize_t>({kDefaultImageRank})) {
1131         err_msg += ", in shape of <H, W, C>";
1132       }
1133       err_msg += ". But got dimension " + std::to_string(rank) + ".";
1134       if (rank == 1) {
1135         err_msg += " You may need to perform Decode first.";
1136       }
1137       RETURN_STATUS_UNEXPECTED(err_msg);
1138     }
1139   } else {
1140     if (rank < kMinImageRank) {
1141       std::string err_msg =
1142         op_name + ": the image tensor should have at least two dimensions. You may need to perform Decode first.";
1143       RETURN_STATUS_UNEXPECTED(err_msg);
1144     }
1145   }
1146   // Validate image channel
1147   if (!valid_channel.empty()) {
1148     dsize_t channel = 1;
1149     RETURN_IF_NOT_OK(ImageNumChannels(image, &channel));
1150     if (valid_channel.find(channel) == valid_channel.end()) {
1151       std::string err_msg = op_name + ": the channel of image tensor does not match the requirement of operator.";
1152       err_msg += " Expecting tensor in channel of " + NumberSetToString(valid_channel);
1153       err_msg += ". But got channel " + std::to_string(channel) + ".";
1154       RETURN_STATUS_UNEXPECTED(err_msg);
1155     }
1156   }
1157   return Status::OK();
1158 }
1159 
ImageSize(const std::shared_ptr<Tensor> & image,std::vector<dsize_t> * size)1160 Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *size) {
1161   RETURN_UNEXPECTED_IF_NULL(size);
1162   *size = std::vector<dsize_t>(kMinImageRank);
1163   if (image->Rank() < kMinImageRank) {
1164     RETURN_STATUS_UNEXPECTED("GetImageSize: invalid parameter, image should have at least two dimensions, but got: " +
1165                              std::to_string(image->Rank()));
1166   } else if (image->Rank() == kMinImageRank) {
1167     (*size)[0] = image->shape()[0];
1168     (*size)[1] = image->shape()[1];
1169   } else {
1170     const int32_t kHeightIndex = -3;
1171     const int32_t kWidthIndex = -2;
1172     (*size)[0] = image->shape()[kHeightIndex];
1173     (*size)[1] = image->shape()[kWidthIndex];
1174   }
1175   return Status::OK();
1176 }
1177 
ValidateImageRank(const std::string & op_name,int32_t rank)1178 Status ValidateImageRank(const std::string &op_name, int32_t rank) {
1179   if (rank != 2 && rank != 3) {
1180     std::string err_msg = op_name + ": image shape is not <H,W,C> or <H, W>, but got rank:" + std::to_string(rank);
1181     if (rank == 1) {
1182       err_msg = err_msg + ", may need to do Decode operation first.";
1183     }
1184     RETURN_STATUS_UNEXPECTED(err_msg);
1185   }
1186   return Status::OK();
1187 }
1188 
HwcToChw(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1189 Status HwcToChw(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1190   try {
1191     if (input->Rank() <= 3) {
1192       int input_height = static_cast<int>(input->shape()[0]);
1193       int input_width = static_cast<int>(input->shape()[1]);
1194       int input_channel = static_cast<int>(input->shape()[2]);
1195       LiteMat lite_mat_hwc(input_width, input_height, input_channel,
1196                            const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
1197                            GetLiteCVDataType(input->type()));
1198       LiteMat lite_mat_chw;
1199       std::shared_ptr<Tensor> output_tensor;
1200       TensorShape new_shape = TensorShape({input_channel, input_height, input_width});
1201       RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
1202       uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
1203       lite_mat_chw.Init(input_height, input_channel, input_width, reinterpret_cast<void *>(buffer),
1204                         GetLiteCVDataType(input->type()));
1205       CHECK_FAIL_RETURN_UNEXPECTED(!lite_mat_chw.IsEmpty(), "HwcToChw: Init image tensor failed, return empty tensor.");
1206 
1207       bool ret = HWC2CHW(lite_mat_hwc, lite_mat_chw);
1208       CHECK_FAIL_RETURN_UNEXPECTED(ret, "HwcToChw: HwcToChw failed.");
1209       *output = output_tensor;
1210     } else {
1211       RETURN_STATUS_UNEXPECTED("HwcToChw: input image is not in shape of <H,W,C> or <H,W>");
1212     }
1213   } catch (const std::exception &e) {
1214     RETURN_STATUS_UNEXPECTED("HwcToChw: " + std::string(e.what()));
1215   }
1216   return Status::OK();
1217 }
1218 }  // namespace dataset
1219 }  // namespace mindspore
1220