• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/kernels/image/image_utils.h"
17 
18 #include <opencv2/imgproc/types_c.h>
19 
20 #include <algorithm>
21 #include <fstream>
22 #include <limits>
23 #include <stdexcept>
24 #include <string>
25 #include <vector>
26 
27 #include <opencv2/imgcodecs.hpp>
28 
29 #include "minddata/dataset/core/cv_tensor.h"
30 #include "minddata/dataset/core/tensor.h"
31 #include "minddata/dataset/core/tensor_shape.h"
32 #include "minddata/dataset/include/dataset/constants.h"
33 #include "minddata/dataset/kernels/data/data_utils.h"
34 #include "minddata/dataset/kernels/image/affine_op.h"
35 #include "minddata/dataset/kernels/image/invert_op.h"
36 #include "minddata/dataset/kernels/image/math_utils.h"
37 #include "minddata/dataset/kernels/image/posterize_op.h"
38 #include "minddata/dataset/kernels/image/resize_cubic_op.h"
39 #include "minddata/dataset/kernels/image/sharpness_op.h"
40 #include "utils/file_utils.h"
41 #include "utils/ms_utils.h"
42 
43 const int32_t MAX_INT_PRECISION = 16777216;  // float int precision is 16777216
44 const int32_t DOUBLING_FACTOR = 2;           // used as multiplier with MAX_INT_PRECISION
45 const int32_t DEFAULT_NUM_HEIGHT = 1;
46 const int32_t DEFAULT_NUM_WIDTH = 1;
47 
48 namespace mindspore {
49 namespace dataset {
GetCVInterpolationMode(InterpolationMode mode)50 int GetCVInterpolationMode(InterpolationMode mode) {
51   switch (mode) {
52     case InterpolationMode::kLinear:
53       return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
54     case InterpolationMode::kCubic:
55       return static_cast<int>(cv::InterpolationFlags::INTER_CUBIC);
56     case InterpolationMode::kArea:
57       return static_cast<int>(cv::InterpolationFlags::INTER_AREA);
58     case InterpolationMode::kNearestNeighbour:
59       return static_cast<int>(cv::InterpolationFlags::INTER_NEAREST);
60     default:
61       return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
62   }
63 }
64 
GetCVBorderType(BorderType type)65 int GetCVBorderType(BorderType type) {
66   switch (type) {
67     case BorderType::kConstant:
68       return static_cast<int>(cv::BorderTypes::BORDER_CONSTANT);
69     case BorderType::kEdge:
70       return static_cast<int>(cv::BorderTypes::BORDER_REPLICATE);
71     case BorderType::kReflect:
72       return static_cast<int>(cv::BorderTypes::BORDER_REFLECT101);
73     case BorderType::kSymmetric:
74       return static_cast<int>(cv::BorderTypes::BORDER_REFLECT);
75     default:
76       return static_cast<int>(cv::BorderTypes::BORDER_CONSTANT);
77   }
78 }
79 
GetConvertShape(ConvertMode convert_mode,const std::shared_ptr<CVTensor> & input_cv,std::vector<dsize_t> * node)80 Status GetConvertShape(ConvertMode convert_mode, const std::shared_ptr<CVTensor> &input_cv,
81                        std::vector<dsize_t> *node) {
82   RETURN_UNEXPECTED_IF_NULL(node);
83   std::vector<ConvertMode> one_channels = {ConvertMode::COLOR_BGR2GRAY, ConvertMode::COLOR_RGB2GRAY,
84                                            ConvertMode::COLOR_BGRA2GRAY, ConvertMode::COLOR_RGBA2GRAY};
85   std::vector<ConvertMode> three_channels = {
86     ConvertMode::COLOR_BGRA2BGR, ConvertMode::COLOR_RGBA2RGB, ConvertMode::COLOR_RGBA2BGR, ConvertMode::COLOR_BGRA2RGB,
87     ConvertMode::COLOR_BGR2RGB,  ConvertMode::COLOR_RGB2BGR,  ConvertMode::COLOR_GRAY2BGR, ConvertMode::COLOR_GRAY2RGB};
88   std::vector<ConvertMode> four_channels = {ConvertMode::COLOR_BGR2BGRA,  ConvertMode::COLOR_RGB2RGBA,
89                                             ConvertMode::COLOR_BGR2RGBA,  ConvertMode::COLOR_RGB2BGRA,
90                                             ConvertMode::COLOR_BGRA2RGBA, ConvertMode::COLOR_RGBA2BGRA,
91                                             ConvertMode::COLOR_GRAY2BGRA, ConvertMode::COLOR_GRAY2RGBA};
92   if (std::find(three_channels.begin(), three_channels.end(), convert_mode) != three_channels.end()) {
93     *node = {input_cv->shape()[0], input_cv->shape()[1], 3};
94   } else if (std::find(four_channels.begin(), four_channels.end(), convert_mode) != four_channels.end()) {
95     *node = {input_cv->shape()[0], input_cv->shape()[1], 4};
96   } else if (std::find(one_channels.begin(), one_channels.end(), convert_mode) != one_channels.end()) {
97     *node = {input_cv->shape()[0], input_cv->shape()[1]};
98   } else {
99     RETURN_STATUS_UNEXPECTED(
100       "The mode of image channel conversion must be in ConvertMode, which mainly includes "
101       "conversion between RGB, BGR, GRAY, RGBA etc.");
102   }
103   return Status::OK();
104 }
105 
ImageNumChannels(const std::shared_ptr<Tensor> & image,dsize_t * channels)106 Status ImageNumChannels(const std::shared_ptr<Tensor> &image, dsize_t *channels) {
107   RETURN_UNEXPECTED_IF_NULL(channels);
108   if (image->Rank() < kMinImageRank) {
109     RETURN_STATUS_UNEXPECTED(
110       "GetImageNumChannels: invalid parameter, image should have at least two dimensions, but got: " +
111       std::to_string(image->Rank()));
112   } else if (image->Rank() == kMinImageRank) {
113     *channels = 1;
114   } else {
115     *channels = image->shape()[-1];
116   }
117   return Status::OK();
118 }
119 
ImageSize(const std::shared_ptr<Tensor> & image,std::vector<dsize_t> * size)120 Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *size) {
121   RETURN_UNEXPECTED_IF_NULL(size);
122   *size = std::vector<dsize_t>(kMinImageRank);
123   if (image->Rank() < kMinImageRank) {
124     RETURN_STATUS_UNEXPECTED("GetImageSize: invalid parameter, image should have at least two dimensions, but got: " +
125                              std::to_string(image->Rank()));
126   } else if (image->Rank() == kMinImageRank) {
127     (*size)[0] = image->shape()[0];
128     (*size)[1] = image->shape()[1];
129   } else {
130     const int32_t kHeightIndexFromBack = -3;
131     const int32_t kWidthIndexFromBack = -2;
132     (*size)[0] = image->shape()[kHeightIndexFromBack];
133     (*size)[1] = image->shape()[kWidthIndexFromBack];
134   }
135   return Status::OK();
136 }
137 
ValidateImage(const std::shared_ptr<Tensor> & image,const std::string & op_name,const std::set<uint8_t> & valid_dtype,const std::set<dsize_t> & valid_rank,const std::set<dsize_t> & valid_channel)138 Status ValidateImage(const std::shared_ptr<Tensor> &image, const std::string &op_name,
139                      const std::set<uint8_t> &valid_dtype, const std::set<dsize_t> &valid_rank,
140                      const std::set<dsize_t> &valid_channel) {
141   // Validate image dtype
142   if (!valid_dtype.empty()) {
143     auto dtype = image->type();
144     if (valid_dtype.find(dtype.value()) == valid_dtype.end()) {
145       std::string err_msg = op_name + ": the data type of image tensor does not match the requirement of operator.";
146       err_msg += " Expecting tensor in type of " + DataTypeSetToString(valid_dtype);
147       err_msg += ". But got type " + dtype.ToString() + ".";
148       RETURN_STATUS_UNEXPECTED(err_msg);
149     }
150   }
151   // Validate image rank
152   auto rank = image->Rank();
153   if (!valid_rank.empty()) {
154     if (valid_rank.find(rank) == valid_rank.end()) {
155       std::string err_msg = op_name + ": the dimension of image tensor does not match the requirement of operator.";
156       err_msg += " Expecting tensor in dimension of " + NumberSetToString(valid_rank);
157       if (valid_rank == std::set<dsize_t>({kMinImageRank, kDefaultImageRank})) {
158         err_msg += ", in shape of <H, W> or <H, W, C>";
159       } else if (valid_rank == std::set<dsize_t>({kMinImageRank})) {
160         err_msg += ", in shape of <H, W>";
161       } else if (valid_rank == std::set<dsize_t>({kDefaultImageRank})) {
162         err_msg += ", in shape of <H, W, C>";
163       }
164       err_msg += ". But got dimension " + std::to_string(rank) + ".";
165       if (rank == 1) {
166         err_msg += " You may need to perform Decode first.";
167       }
168       RETURN_STATUS_UNEXPECTED(err_msg);
169     }
170   } else {
171     if (rank < kMinImageRank) {
172       std::string err_msg =
173         op_name + ": the image tensor should have at least two dimensions. You may need to perform Decode first.";
174       RETURN_STATUS_UNEXPECTED(err_msg);
175     }
176   }
177   // Validate image channel
178   if (!valid_channel.empty()) {
179     dsize_t channel = 1;
180     RETURN_IF_NOT_OK(ImageNumChannels(image, &channel));
181     if (valid_channel.find(channel) == valid_channel.end()) {
182       std::string err_msg = op_name + ": the channel of image tensor does not match the requirement of operator.";
183       err_msg += " Expecting tensor in channel of " + NumberSetToString(valid_channel);
184       err_msg += ". But got channel " + std::to_string(channel) + ".";
185       RETURN_STATUS_UNEXPECTED(err_msg);
186     }
187   }
188   return Status::OK();
189 }
190 
ValidateImageDtype(const std::string & op_name,DataType dtype)191 Status ValidateImageDtype(const std::string &op_name, DataType dtype) {
192   uint8_t type = dtype.AsCVType();
193   if (type == kCVInvalidType) {
194     std::string type_name = "unknown";
195     if (dtype.value() < DataType::NUM_OF_TYPES) {
196       type_name = std::string(DataType::kTypeInfo[dtype.value()].name_);
197     }
198     std::string err_msg = op_name + ": Cannot convert [" + type_name + "] to OpenCV type." +
199                           " Currently unsupported data type: [uint32, int64, uint64, string]";
200     RETURN_STATUS_UNEXPECTED(err_msg);
201   }
202   return Status::OK();
203 }
204 
ValidateImageRank(const std::string & op_name,int32_t rank)205 Status ValidateImageRank(const std::string &op_name, int32_t rank) {
206   if (rank != kMinImageRank && rank != kDefaultImageRank) {
207     std::string err_msg =
208       op_name + ": input tensor is not in shape of <H,W> or <H,W,C>, but got rank: " + std::to_string(rank);
209     if (rank == 1) {
210       err_msg = err_msg + ". You may need to perform Decode first.";
211     }
212     RETURN_STATUS_UNEXPECTED(err_msg);
213   }
214   return Status::OK();
215 }
216 
CheckTensorShape(const std::shared_ptr<Tensor> & tensor,const int & channel)217 bool CheckTensorShape(const std::shared_ptr<Tensor> &tensor, const int &channel) {
218   if (tensor == nullptr) {
219     return false;
220   }
221   bool rc = false;
222   if (tensor->shape().Size() <= channel) {
223     return false;
224   }
225   if (tensor->Rank() != kDefaultImageRank ||
226       (tensor->shape()[channel] != 1 && tensor->shape()[channel] != kDefaultImageChannel)) {
227     rc = true;
228   }
229   return rc;
230 }
231 
Flip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output,int flip_code)232 Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code) {
233   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
234   if (!input_cv->mat().data) {
235     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Flip: load image failed.");
236   }
237 
238   std::shared_ptr<CVTensor> output_cv;
239   RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
240 
241   try {
242     cv::flip(input_cv->mat(), output_cv->mat(), flip_code);
243     *output = std::static_pointer_cast<Tensor>(output_cv);
244   } catch (const cv::Exception &e) {
245     RETURN_STATUS_UNEXPECTED("Flip: " + std::string(e.what()));
246   }
247   return Status::OK();
248 }
249 
HorizontalFlip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)250 Status HorizontalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
251   return Flip(std::move(input), output, 1);
252 }
253 
VerticalFlip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)254 Status VerticalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
255   return Flip(std::move(input), output, 0);
256 }
257 
Resize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t output_height,int32_t output_width,double fx,double fy,InterpolationMode mode)258 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height,
259               int32_t output_width, double fx, double fy, InterpolationMode mode) {
260   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
261   if (!input_cv->mat().data) {
262     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Resize: load image failed.");
263   }
264   RETURN_IF_NOT_OK(ValidateImageRank("Resize", input_cv->Rank()));
265 
266   cv::Mat in_image = input_cv->mat();
267   const uint32_t kResizeShapeLimits = 1000;
268   // resize image too large or too small, 1000 is arbitrarily chosen here to prevent open cv from segmentation fault
269   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kResizeShapeLimits) > in_image.rows,
270                                "Resize: in_image rows out of bounds.");
271   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kResizeShapeLimits) > in_image.cols,
272                                "Resize: in_image cols out of bounds.");
273   if (output_height > in_image.rows * kResizeShapeLimits || output_width > in_image.cols * kResizeShapeLimits) {
274     RETURN_STATUS_ERROR(
275       StatusCode::kMDShapeMisMatch,
276       "Resize: the resizing width or height is too big, it's 1000 times bigger than the original image, got output "
277       "height: " +
278         std::to_string(output_height) + ", width: " + std::to_string(output_width) +
279         ", and original image size:" + std::to_string(in_image.rows) + ", " + std::to_string(in_image.cols));
280   }
281   if (output_height == 0 || output_width == 0) {
282     RETURN_STATUS_ERROR(StatusCode::kMDShapeMisMatch,
283                         "Resize: the input value of 'resize' is invalid, width or height is zero.");
284   }
285 
286   if (mode == InterpolationMode::kCubicPil) {
287     if (input_cv->shape().Size() != kDefaultImageChannel ||
288         input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
289       RETURN_STATUS_UNEXPECTED("Resize: Interpolation mode PILCUBIC only supports image with 3 channels, but got: " +
290                                input_cv->shape().ToString());
291     }
292 
293     LiteMat imIn, imOut;
294     std::shared_ptr<Tensor> output_tensor;
295     TensorShape new_shape = TensorShape({output_height, output_width, 3});
296     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input_cv->type(), &output_tensor));
297     uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
298     imOut.Init(output_width, output_height, static_cast<int>(input_cv->shape()[kChannelIndexHWC]),
299                reinterpret_cast<void *>(buffer), LDataType::UINT8);
300     imIn.Init(static_cast<int>(input_cv->shape()[1]), static_cast<int>(input_cv->shape()[0]),
301               static_cast<int>(input_cv->shape()[kChannelIndexHWC]), input_cv->mat().data, LDataType::UINT8);
302     if (ResizeCubic(imIn, imOut, output_width, output_height) == false) {
303       RETURN_STATUS_UNEXPECTED("Resize: failed to do resize, please check the error msg.");
304     }
305     *output = output_tensor;
306     return Status::OK();
307   }
308   try {
309     TensorShape shape{output_height, output_width};
310     if (input_cv->Rank() == kDefaultImageRank) {
311       int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
312       shape = shape.AppendDim(num_channels);
313     }
314     std::shared_ptr<CVTensor> output_cv;
315     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
316 
317     auto cv_mode = GetCVInterpolationMode(mode);
318     cv::resize(in_image, output_cv->mat(), cv::Size(output_width, output_height), fx, fy, cv_mode);
319     *output = std::static_pointer_cast<Tensor>(output_cv);
320     return Status::OK();
321   } catch (const cv::Exception &e) {
322     RETURN_STATUS_UNEXPECTED("Resize: " + std::string(e.what()));
323   }
324 }
325 
326 const unsigned char kJpegMagic[] = "\xFF\xD8\xFF";
327 constexpr dsize_t kJpegMagicLen = 3;
328 const unsigned char kPngMagic[] = "\x89\x50\x4E\x47";
329 constexpr dsize_t kPngMagicLen = 4;
330 
IsNonEmptyJPEG(const std::shared_ptr<Tensor> & input)331 bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input) {
332   if (input->type() == DataType::DE_BYTES) {
333     uint32_t len = 0;
334     if (input->GetStringLength(&len) != Status::OK()) {
335       MS_LOG(ERROR) << "Get string length from bytes field failed.";
336       return false;
337     }
338     return len > kJpegMagicLen && memcmp(input->GetStringsBuffer(), kJpegMagic, kJpegMagicLen) == 0;
339   }
340   return input->SizeInBytes() > kJpegMagicLen && memcmp(input->GetMutableBuffer(), kJpegMagic, kJpegMagicLen) == 0;
341 }
342 
IsNonEmptyPNG(const std::shared_ptr<Tensor> & input)343 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input) {
344   if (input->type() == DataType::DE_BYTES) {
345     uint32_t len = 0;
346     if (input->GetStringLength(&len) != Status::OK()) {
347       MS_LOG(ERROR) << "Get string length from bytes field failed.";
348       return false;
349     }
350     return len > kPngMagicLen && memcmp(input->GetStringsBuffer(), kPngMagic, kPngMagicLen) == 0;
351   }
352   return input->SizeInBytes() > kPngMagicLen && memcmp(input->GetMutableBuffer(), kPngMagic, kPngMagicLen) == 0;
353 }
354 
Decode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)355 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
356   RETURN_IF_NOT_OK(CheckUnsupportedImage(input));
357 
358   if (input->type() == DataType::DE_BYTES && input->shape().NumOfElements() != 1) {
359     RETURN_STATUS_UNEXPECTED("Decode: couldn't decode bytes field with multi dims.");
360   }
361 
362   Status ret;
363   if (IsNonEmptyJPEG(input)) {
364     ret = JpegCropAndDecode(input, output);
365   } else {
366     ret = DecodeCv(input, output);
367   }
368 
369   // decode failed and dump it
370   if (ret != Status::OK()) {
371     return DumpImageAndAppendStatus(input, ret);
372   }
373   return ret;
374 }
375 
DecodeCv(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)376 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
377   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
378   if (!input_cv->mat().data) {
379     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: load image failed.");
380   }
381   try {
382     cv::Mat img_mat = cv::imdecode(input_cv->mat(), cv::IMREAD_COLOR | cv::IMREAD_IGNORE_ORIENTATION);
383     if (img_mat.data == nullptr) {
384       std::string err = "Decode: image decode failed.";
385       RETURN_STATUS_UNEXPECTED(err);
386     }
387     cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB));
388     std::shared_ptr<CVTensor> output_cv;
389     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, 3, &output_cv));
390     *output = std::static_pointer_cast<Tensor>(output_cv);
391     return Status::OK();
392   } catch (const cv::Exception &e) {
393     RETURN_STATUS_UNEXPECTED("Decode: " + std::string(e.what()));
394   }
395 }
396 
JpegInitSource(j_decompress_ptr cinfo)397 static void JpegInitSource(j_decompress_ptr cinfo) {}
398 
JpegFillInputBuffer(j_decompress_ptr cinfo)399 static boolean JpegFillInputBuffer(j_decompress_ptr cinfo) {
400   if (cinfo->src->bytes_in_buffer == 0) {
401     // Under ARM platform raise runtime_error may cause core problem,
402     // so we catch runtime_error and just return FALSE.
403     try {
404       ERREXIT(cinfo, JERR_INPUT_EMPTY);
405     } catch (std::runtime_error &e) {
406       return FALSE;
407     }
408     return FALSE;
409   }
410   return TRUE;
411 }
412 
JpegTermSource(j_decompress_ptr cinfo)413 static void JpegTermSource(j_decompress_ptr cinfo) {}
414 
JpegSkipInputData(j_decompress_ptr cinfo,int64_t jump)415 static void JpegSkipInputData(j_decompress_ptr cinfo, int64_t jump) {
416   if (jump < 0) {
417     return;
418   }
419   if (static_cast<size_t>(jump) > cinfo->src->bytes_in_buffer) {
420     cinfo->src->bytes_in_buffer = 0;
421     return;
422   } else {
423     cinfo->src->bytes_in_buffer -= jump;
424     cinfo->src->next_input_byte += jump;
425   }
426 }
427 
JpegSetSource(j_decompress_ptr cinfo,const void * data,int64_t datasize)428 void JpegSetSource(j_decompress_ptr cinfo, const void *data, int64_t datasize) {
429   cinfo->src = static_cast<struct jpeg_source_mgr *>(
430     (*cinfo->mem->alloc_small)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)));
431   cinfo->src->init_source = JpegInitSource;
432   cinfo->src->fill_input_buffer = JpegFillInputBuffer;
433 #if defined(_WIN32) || defined(_WIN64) || defined(ENABLE_ARM32) || defined(__APPLE__)
434   cinfo->src->skip_input_data = reinterpret_cast<void (*)(j_decompress_ptr, long)>(JpegSkipInputData);
435 #else
436   cinfo->src->skip_input_data = JpegSkipInputData;
437 #endif
438   cinfo->src->resync_to_restart = jpeg_resync_to_restart;
439   cinfo->src->term_source = JpegTermSource;
440   cinfo->src->bytes_in_buffer = datasize;
441   cinfo->src->next_input_byte = static_cast<const JOCTET *>(data);
442 }
443 
444 thread_local std::vector<Status> jpeg_status;
445 
CheckJpegExit(jpeg_decompress_struct * cinfo)446 Status CheckJpegExit(jpeg_decompress_struct *cinfo) {
447   if (!jpeg_status.empty()) {
448     jpeg_destroy_decompress(cinfo);
449     Status s = jpeg_status[0];
450     jpeg_status.clear();
451     return s;
452   }
453   return Status::OK();
454 }
455 
JpegReadScanlines(jpeg_decompress_struct * const cinfo,int max_scanlines_to_read,JSAMPLE * buffer,int buffer_size,int crop_w,int crop_w_aligned,int offset,int stride)456 static Status JpegReadScanlines(jpeg_decompress_struct *const cinfo, int max_scanlines_to_read, JSAMPLE *buffer,
457                                 int buffer_size, int crop_w, int crop_w_aligned, int offset, int stride) {
458   // scanlines will be read to this buffer first, must have the number
459   // of components equal to the number of components in the image
460   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int64_t>::max() / cinfo->output_components) > crop_w_aligned,
461                                "JpegReadScanlines: multiplication out of bounds.");
462   int64_t scanline_size = crop_w_aligned * cinfo->output_components;
463   std::vector<JSAMPLE> scanline(scanline_size);
464   JSAMPLE *scanline_ptr = &scanline[0];
465   while (cinfo->output_scanline < static_cast<unsigned int>(max_scanlines_to_read)) {
466     int num_lines_read = 0;
467     try {
468       num_lines_read = static_cast<int>(jpeg_read_scanlines(cinfo, &scanline_ptr, 1));
469       RETURN_IF_NOT_OK(CheckJpegExit(cinfo));
470     } catch (std::runtime_error &e) {
471       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: image decode failed.");
472     }
473     if (cinfo->out_color_space == JCS_CMYK && num_lines_read > 0) {
474       for (int i = 0; i < crop_w; ++i) {
475         const int cmyk_pixel = 4 * i + offset;
476         const int c = scanline_ptr[cmyk_pixel];
477         const int m = scanline_ptr[cmyk_pixel + 1];
478         const int y = scanline_ptr[cmyk_pixel + 2];
479         const int k = scanline_ptr[cmyk_pixel + 3];
480         int r, g, b;
481         if (cinfo->saw_Adobe_marker) {
482           r = (k * c) / kMaxBitValue;
483           g = (k * m) / kMaxBitValue;
484           b = (k * y) / kMaxBitValue;
485         } else {
486           r = (kMaxBitValue - c) * (kMaxBitValue - k) / kMaxBitValue;
487           g = (kMaxBitValue - m) * (kMaxBitValue - k) / kMaxBitValue;
488           b = (kMaxBitValue - y) * (kMaxBitValue - k) / kMaxBitValue;
489         }
490         buffer[kDefaultImageChannel * i + kRIndex] = r;
491         buffer[kDefaultImageChannel * i + kGIndex] = g;
492         buffer[kDefaultImageChannel * i + kBIndex] = b;
493       }
494     } else if (num_lines_read > 0) {
495       int copy_status = memcpy_s(buffer, buffer_size, scanline_ptr + offset, stride);
496       if (copy_status != 0) {
497         jpeg_destroy_decompress(cinfo);
498         RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: memcpy failed.");
499       }
500     } else {
501       jpeg_destroy_decompress(cinfo);
502       std::string err_msg = "[Internal ERROR] Decode: image decode failed.";
503       RETURN_STATUS_UNEXPECTED(err_msg);
504     }
505     buffer += stride;
506     buffer_size = buffer_size - stride;
507   }
508   return Status::OK();
509 }
510 
JpegSetColorSpace(jpeg_decompress_struct * cinfo)511 static Status JpegSetColorSpace(jpeg_decompress_struct *cinfo) {
512   switch (cinfo->num_components) {
513     case 1:
514       // we want to output 3 components if it's grayscale
515       cinfo->out_color_space = JCS_RGB;
516       return Status::OK();
517     case 3:
518       cinfo->out_color_space = JCS_RGB;
519       return Status::OK();
520     case 4:
521       // Need to manually convert to RGB
522       cinfo->out_color_space = JCS_CMYK;
523       return Status::OK();
524     default:
525       jpeg_destroy_decompress(cinfo);
526       std::string err_msg = "[Internal ERROR] Decode: image decode failed.";
527       RETURN_STATUS_UNEXPECTED(err_msg);
528   }
529 }
530 
JpegErrorExitCustom(j_common_ptr cinfo)531 void JpegErrorExitCustom(j_common_ptr cinfo) {
532   char jpeg_error_msg[JMSG_LENGTH_MAX];
533   (*(cinfo->err->format_message))(cinfo, jpeg_error_msg);
534   // we encounter core dump when execute jpeg_start_decompress at arm platform,
535   // so we collect Status instead of throwing exception.
536   jpeg_status.emplace_back(
537     STATUS_ERROR(StatusCode::kMDUnexpectedError, "Error raised by libjpeg: " + std::string(jpeg_error_msg)));
538 }
539 
JpegCropAndDecode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int crop_x,int crop_y,int crop_w,int crop_h)540 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int crop_x, int crop_y,
541                          int crop_w, int crop_h) {
542   struct jpeg_decompress_struct cinfo {};
543   auto DestroyDecompressAndReturnError = [&cinfo](const std::string &err) {
544     jpeg_destroy_decompress(&cinfo);
545     RETURN_STATUS_UNEXPECTED(err);
546   };
547   struct JpegErrorManagerCustom jerr {};
548   cinfo.err = jpeg_std_error(&jerr.pub);
549   jerr.pub.error_exit = JpegErrorExitCustom;
550   try {
551     jpeg_create_decompress(&cinfo);
552     if (input->type() == DataType::DE_BYTES) {
553       uint32_t len = 0;
554       RETURN_IF_NOT_OK(input->GetStringLength(&len));
555       JpegSetSource(&cinfo, input->GetStringsBuffer(), len);
556     } else {
557       JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes());
558     }
559     (void)jpeg_read_header(&cinfo, TRUE);
560     RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo));
561     jpeg_calc_output_dimensions(&cinfo);
562     RETURN_IF_NOT_OK(CheckJpegExit(&cinfo));
563   } catch (std::runtime_error &e) {
564     return DestroyDecompressAndReturnError(e.what());
565   }
566   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_w) > crop_x,
567                                "JpegCropAndDecode: addition(crop x and crop width) out of bounds, got crop x:" +
568                                  std::to_string(crop_x) + ", and crop width:" + std::to_string(crop_w));
569   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_h) > crop_y,
570                                "JpegCropAndDecode: addition(crop y and crop height) out of bounds, got crop y:" +
571                                  std::to_string(crop_y) + ", and crop height:" + std::to_string(crop_h));
572   if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) {
573     crop_w = static_cast<int>(cinfo.output_width);
574     crop_h = static_cast<int>(cinfo.output_height);
575   } else if (crop_w == 0 || static_cast<unsigned int>(crop_w + crop_x) > cinfo.output_width || crop_h == 0 ||
576              static_cast<unsigned int>(crop_h + crop_y) > cinfo.output_height) {
577     return DestroyDecompressAndReturnError(
578       "Crop: invalid crop size, corresponding crop value equal to 0 or too big, got crop width: " +
579       std::to_string(crop_w) + ", crop height:" + std::to_string(crop_h) +
580       ", and crop x coordinate:" + std::to_string(crop_x) + ", crop y coordinate:" + std::to_string(crop_y));
581   }
582   const int mcu_size = cinfo.min_DCT_scaled_size;
583   CHECK_FAIL_RETURN_UNEXPECTED(mcu_size != 0, "JpegCropAndDecode: divisor mcu_size is zero.");
584   unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size;
585   unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned;
586   try {
587     bool status = jpeg_start_decompress(&cinfo);
588     CHECK_FAIL_RETURN_UNEXPECTED(status, "JpegCropAndDecode: fail to decode, jpeg maybe a multi-scan file or broken.");
589     RETURN_IF_NOT_OK(CheckJpegExit(&cinfo));
590     jpeg_crop_scanline(&cinfo, &crop_x_aligned, &crop_w_aligned);
591     RETURN_IF_NOT_OK(CheckJpegExit(&cinfo));
592   } catch (std::runtime_error &e) {
593     return DestroyDecompressAndReturnError(e.what());
594   }
595   JDIMENSION skipped_scanlines = jpeg_skip_scanlines(&cinfo, crop_y);
596   // three number of output components, always convert to RGB and output
597   constexpr int kOutNumComponents = 3;
598   TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
599   std::shared_ptr<Tensor> output_tensor;
600   RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
601   const int buffer_size = static_cast<int>(output_tensor->SizeInBytes());
602   JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
603   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<float_t>::max() - skipped_scanlines) > crop_h,
604                                "JpegCropAndDecode: addition out of bounds.");
605   const int max_scanlines_to_read = static_cast<int>(skipped_scanlines) + crop_h;
606   // stride refers to output tensor, which has 3 components at most
607   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / crop_w) > kOutNumComponents,
608                                "JpegCropAndDecode: multiplication out of bounds.");
609   const int stride = crop_w * kOutNumComponents;
610   // offset is calculated for scanlines read from the image, therefore
611   // has the same number of components as the image
612   int minius_value = crop_x - static_cast<int>(crop_x_aligned);
613   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<float_t>::max() / minius_value) > cinfo.output_components,
614                                "JpegCropAndDecode: multiplication out of bounds.");
615   const int offset = minius_value * cinfo.output_components;
616   RETURN_IF_NOT_OK(
617     JpegReadScanlines(&cinfo, max_scanlines_to_read, buffer, buffer_size, crop_w, crop_w_aligned, offset, stride));
618   *output = output_tensor;
619   jpeg_destroy_decompress(&cinfo);
620   return Status::OK();
621 }
622 
Rescale(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float rescale,float shift)623 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift) {
624   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
625   if (!input_cv->mat().data) {
626     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Rescale: load image failed.");
627   }
628   cv::Mat input_image = input_cv->mat();
629   std::shared_ptr<CVTensor> output_cv;
630   RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv));
631   try {
632     input_image.convertTo(output_cv->mat(), CV_32F, rescale, shift);
633     *output = std::static_pointer_cast<Tensor>(output_cv);
634   } catch (const cv::Exception &e) {
635     RETURN_STATUS_UNEXPECTED("Rescale: " + std::string(e.what()));
636   }
637   return Status::OK();
638 }
639 
Crop(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int x,int y,int w,int h)640 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h) {
641   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
642   if (!input_cv->mat().data) {
643     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Crop: load image failed.");
644   }
645   RETURN_IF_NOT_OK(ValidateImageRank("Crop", input_cv->Rank()));
646   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - y) > h,
647                                "Crop: addition(x and height) out of bounds, got height:" + std::to_string(h) +
648                                  ", and coordinate y:" + std::to_string(y));
649   // account for integer overflow
650   if (y < 0 || (y + h) > input_cv->shape()[0] || (y + h) < 0) {
651     RETURN_STATUS_UNEXPECTED(
652       "Crop: invalid y coordinate value for crop, y coordinate value exceeds the boundary of the image, got y: " +
653       std::to_string(y));
654   }
655   CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - x) > w, "Crop: addition out of bounds.");
656   // account for integer overflow
657   if (x < 0 || (x + w) > input_cv->shape()[1] || (x + w) < 0) {
658     RETURN_STATUS_UNEXPECTED(
659       "Crop: invalid x coordinate value for crop, "
660       "x coordinate value exceeds the boundary of the image, got x: " +
661       std::to_string(x));
662   }
663   try {
664     TensorShape shape{h, w};
665     if (input_cv->Rank() == kDefaultImageRank) {
666       int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
667       shape = shape.AppendDim(num_channels);
668     }
669     std::shared_ptr<CVTensor> output_cv;
670     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
671     cv::Rect roi(x, y, w, h);
672     (input_cv->mat())(roi).copyTo(output_cv->mat());
673     *output = std::static_pointer_cast<Tensor>(output_cv);
674     return Status::OK();
675   } catch (const cv::Exception &e) {
676     RETURN_STATUS_UNEXPECTED("Crop: " + std::string(e.what()));
677   }
678 }
679 
ConvertColor(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,ConvertMode convert_mode)680 Status ConvertColor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, ConvertMode convert_mode) {
681   try {
682     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
683     RETURN_IF_NOT_OK(ValidateImageRank("ConvertColor", input_cv->Rank()));
684     if (!input_cv->mat().data) {
685       RETURN_STATUS_UNEXPECTED("[Internal ERROR] ConvertColor: load image failed.");
686     }
687     if (input_cv->Rank() == kDefaultImageRank) {
688       int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
689       if (num_channels != kMinImageChannel && num_channels != kDefaultImageChannel &&
690           num_channels != kMaxImageChannel) {
691         RETURN_STATUS_UNEXPECTED("ConvertColor: number of channels of image should be 1, 3, 4, but got:" +
692                                  std::to_string(num_channels));
693       }
694     }
695     std::vector<dsize_t> node;
696     RETURN_IF_NOT_OK(GetConvertShape(convert_mode, input_cv, &node));
697     if (node.empty()) {
698       RETURN_STATUS_UNEXPECTED(
699         "ConvertColor: convert mode must be in ConvertMode, which mainly includes conversion "
700         "between RGB, BGR, GRAY, RGBA etc.");
701     }
702     TensorShape out_shape = TensorShape(node);
703     std::shared_ptr<CVTensor> output_cv;
704     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
705     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(convert_mode));
706     *output = std::static_pointer_cast<Tensor>(output_cv);
707     return Status::OK();
708   } catch (const cv::Exception &e) {
709     RETURN_STATUS_UNEXPECTED("ConvertColor: " + std::string(e.what()));
710   }
711 }
712 
HwcToChw(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)713 Status HwcToChw(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
714   try {
715     if (input->Rank() == kMinImageRank) {
716       // If input tensor is 2D, we assume we have hw dimensions
717       *output = input;
718       return Status::OK();
719     }
720     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
721     if (!input_cv->mat().data) {
722       RETURN_STATUS_UNEXPECTED("[Internal ERROR] HWC2CHW: load image failed.");
723     }
724     if (input_cv->Rank() != kDefaultImageRank) {
725       RETURN_STATUS_UNEXPECTED("HWC2CHW: image shape should be <H,W> or <H,W,C>, but got rank: " +
726                                std::to_string(input_cv->Rank()));
727     }
728     cv::Mat output_img;
729 
730     int height = static_cast<int>(input_cv->shape()[0]);
731     int width = static_cast<int>(input_cv->shape()[1]);
732     int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
733 
734     std::shared_ptr<CVTensor> output_cv;
735     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(TensorShape{num_channels, height, width}, input_cv->type(), &output_cv));
736 
737     for (int i = 0; i < num_channels; ++i) {
738       cv::Mat mat;
739       RETURN_IF_NOT_OK(output_cv->MatAtIndex({i}, &mat));
740       cv::extractChannel(input_cv->mat(), mat, i);
741     }
742     *output = std::move(output_cv);
743     return Status::OK();
744   } catch (const cv::Exception &e) {
745     RETURN_STATUS_UNEXPECTED("HWC2CHW: " + std::string(e.what()));
746   }
747 }
748 
MaskWithTensor(const std::shared_ptr<Tensor> & sub_mat,std::shared_ptr<Tensor> * input,int x,int y,int crop_width,int crop_height,ImageFormat image_format)749 Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Tensor> *input, int x, int y,
750                       int crop_width, int crop_height, ImageFormat image_format) {
751   constexpr int64_t input_shape = 2;
752   if (image_format == ImageFormat::HWC) {
753     if (CheckTensorShape(*input, input_shape)) {
754       RETURN_STATUS_UNEXPECTED(
755         "CutMixBatch: MaskWithTensor failed: "
756         "input shape doesn't match <H,W,C> format, got shape:" +
757         (*input)->shape().ToString());
758     }
759     if (CheckTensorShape(sub_mat, input_shape)) {
760       RETURN_STATUS_UNEXPECTED(
761         "CutMixBatch: MaskWithTensor failed: "
762         "sub_mat shape doesn't match <H,W,C> format, got shape:" +
763         (*input)->shape().ToString());
764     }
765     int number_of_channels = static_cast<int>((*input)->shape()[kChannelIndexHWC]);
766     for (int i = 0; i < crop_width; i++) {
767       for (int j = 0; j < crop_height; j++) {
768         for (int c = 0; c < number_of_channels; c++) {
769           RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i, c}, {y + j, x + i, c}));
770         }
771       }
772     }
773   } else if (image_format == ImageFormat::CHW) {
774     if (CheckTensorShape(*input, 0)) {
775       RETURN_STATUS_UNEXPECTED(
776         "CutMixBatch: MaskWithTensor failed: "
777         "input shape doesn't match <C,H,W> format, got shape:" +
778         (*input)->shape().ToString());
779     }
780     if (CheckTensorShape(sub_mat, 0)) {
781       RETURN_STATUS_UNEXPECTED(
782         "CutMixBatch: MaskWithTensor failed: "
783         "sub_mat shape doesn't match <C,H,W> format, got shape:" +
784         (*input)->shape().ToString());
785     }
786     int number_of_channels = static_cast<int>((*input)->shape()[0]);
787     for (int i = 0; i < crop_width; i++) {
788       for (int j = 0; j < crop_height; j++) {
789         for (int c = 0; c < number_of_channels; c++) {
790           RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {c, j, i}, {c, y + j, x + i}));
791         }
792       }
793     }
794   } else if (image_format == ImageFormat::HW) {
795     if ((*input)->Rank() != kMinImageRank) {
796       RETURN_STATUS_UNEXPECTED(
797         "CutMixBatch: MaskWithTensor failed: "
798         "input shape doesn't match <H,W> format, got shape:" +
799         (*input)->shape().ToString());
800     }
801     if (sub_mat->Rank() != kMinImageRank) {
802       RETURN_STATUS_UNEXPECTED(
803         "CutMixBatch: MaskWithTensor failed: "
804         "sub_mat shape doesn't match <H,W> format, got shape:" +
805         (*input)->shape().ToString());
806     }
807     for (int i = 0; i < crop_width; i++) {
808       for (int j = 0; j < crop_height; j++) {
809         RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i}, {y + j, x + i}));
810       }
811     }
812   } else {
813     RETURN_STATUS_UNEXPECTED(
814       "CutMixBatch: MaskWithTensor failed: "
815       "image format must be <C,H,W>, <H,W,C>, or <H,W>, got shape:" +
816       (*input)->shape().ToString());
817   }
818   return Status::OK();
819 }
820 
CopyTensorValue(const std::shared_ptr<Tensor> & source_tensor,std::shared_ptr<Tensor> * dest_tensor,const std::vector<int64_t> & source_indx,const std::vector<int64_t> & dest_indx)821 Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor,
822                        const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx) {
823   if (source_tensor->type() != (*dest_tensor)->type()) {
824     RETURN_STATUS_UNEXPECTED(
825       "CutMixBatch: CopyTensorValue failed: "
826       "source and destination tensor must have the same type.");
827   }
828   if (source_tensor->type() == DataType::DE_UINT8) {
829     uint8_t pixel_value = 0;
830     RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
831     RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
832   } else if (source_tensor->type() == DataType::DE_FLOAT32) {
833     float pixel_value = 0;
834     RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
835     RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
836   } else {
837     RETURN_STATUS_UNEXPECTED(
838       "CutMixBatch: CopyTensorValue failed: "
839       "Tensor type is not supported. Tensor type must be float32 or uint8.");
840   }
841   return Status::OK();
842 }
843 
SwapRedAndBlue(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)844 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
845   try {
846     RETURN_IF_NOT_OK(ValidateImage(input, "SwapRedBlue", {3, 5, 11}));
847     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
848     CHECK_FAIL_RETURN_UNEXPECTED(
849       input_cv->shape().Size() > kChannelIndexHWC,
850       "SwapRedAndBlue: rank of input data should be greater than:" + std::to_string(kChannelIndexHWC) +
851         ", but got:" + std::to_string(input_cv->shape().Size()));
852     int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
853     if (input_cv->shape().Size() != kDefaultImageRank || num_channels != kDefaultImageChannel) {
854       RETURN_STATUS_UNEXPECTED("SwapRedBlue: image shape should be in <H,W,C> format, but got:" +
855                                input_cv->shape().ToString());
856     }
857     std::shared_ptr<CVTensor> output_cv;
858     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
859 
860     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_BGR2RGB));
861     *output = std::static_pointer_cast<Tensor>(output_cv);
862     return Status::OK();
863   } catch (const cv::Exception &e) {
864     RETURN_STATUS_UNEXPECTED("SwapRedBlue: " + std::string(e.what()));
865   }
866 }
867 
CropAndResize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int x,int y,int crop_height,int crop_width,int target_height,int target_width,InterpolationMode mode)868 Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y,
869                      int crop_height, int crop_width, int target_height, int target_width, InterpolationMode mode) {
870   try {
871     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
872     if (!input_cv->mat().data) {
873       RETURN_STATUS_UNEXPECTED("[Internal ERROR] CropAndResize: load image failed.");
874     }
875     RETURN_IF_NOT_OK(ValidateImageRank("CropAndResize", input_cv->Rank()));
876     // image too large or too small, 1000 is arbitrary here to prevent opencv from segmentation fault
877     const uint32_t kCropShapeLimits = 1000;
878     CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kCropShapeLimits) > crop_height,
879                                  "CropAndResize: crop_height out of bounds.");
880     CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kCropShapeLimits) > crop_width,
881                                  "CropAndResize: crop_width out of bounds.");
882     if (crop_height == 0 || crop_width == 0 || target_height == 0 || target_height > crop_height * kCropShapeLimits ||
883         target_width == 0 || target_width > crop_width * kCropShapeLimits) {
884       std::string err_msg =
885         "CropAndResize: the resizing width or height 1) is too big, it's up to " + std::to_string(kCropShapeLimits) +
886         " times the original image; 2) can not be 0. Detail info is: crop_height: " + std::to_string(crop_height) +
887         ", crop_width: " + std::to_string(crop_width) + ", target_height: " + std::to_string(target_height) +
888         ", target_width: " + std::to_string(target_width);
889       RETURN_STATUS_UNEXPECTED(err_msg);
890     }
891     cv::Rect roi(x, y, crop_width, crop_height);
892     auto cv_mode = GetCVInterpolationMode(mode);
893     cv::Mat cv_in = input_cv->mat();
894 
895     if (mode == InterpolationMode::kCubicPil) {
896       if (input_cv->shape().Size() != kDefaultImageChannel ||
897           input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
898         RETURN_STATUS_UNEXPECTED(
899           "CropAndResize: Interpolation mode PILCUBIC only supports image with 3 channels, but got: " +
900           input_cv->shape().ToString());
901       }
902 
903       cv::Mat input_roi = cv_in(roi);
904       std::shared_ptr<CVTensor> input_image;
905       RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, input_cv->Rank(), &input_image));
906       LiteMat imIn, imOut;
907       std::shared_ptr<Tensor> output_tensor;
908       TensorShape new_shape = TensorShape({target_height, target_width, 3});
909       RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input_cv->type(), &output_tensor));
910       uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
911       int input_channel = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
912       imOut.Init(target_width, target_height, input_channel, reinterpret_cast<void *>(buffer), LDataType::UINT8);
913       int input_height = static_cast<int>(input_image->shape()[0]);
914       int input_width = static_cast<int>(input_image->shape()[1]);
915       imIn.Init(input_width, input_height, input_channel, input_image->mat().data, LDataType::UINT8);
916       if (ResizeCubic(imIn, imOut, target_width, target_height) == false) {
917         RETURN_STATUS_UNEXPECTED("Resize: failed to do resize, please check the error msg.");
918       }
919       *output = output_tensor;
920       return Status::OK();
921     }
922 
923     TensorShape shape{target_height, target_width};
924     if (input_cv->Rank() == kDefaultImageRank) {
925       int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
926       shape = shape.AppendDim(num_channels);
927     }
928     std::shared_ptr<CVTensor> cvt_out;
929     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &cvt_out));
930     cv::resize(cv_in(roi), cvt_out->mat(), cv::Size(target_width, target_height), 0, 0, cv_mode);
931     *output = std::static_pointer_cast<Tensor>(cvt_out);
932     return Status::OK();
933   } catch (const cv::Exception &e) {
934     RETURN_STATUS_UNEXPECTED("CropAndResize: " + std::string(e.what()));
935   }
936 }
937 
Rotate(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> center,float degree,InterpolationMode interpolation,bool expand,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)938 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> center,
939               float degree, InterpolationMode interpolation, bool expand, uint8_t fill_r, uint8_t fill_g,
940               uint8_t fill_b) {
941   try {
942     RETURN_IF_NOT_OK(ValidateImageRank("Rotate", input->Rank()));
943     dsize_t channel = 1;
944     RETURN_IF_NOT_OK(ImageNumChannels(input, &channel));
945     CHECK_FAIL_RETURN_UNEXPECTED(channel <= kMaxImageChannel || interpolation != InterpolationMode::kCubic,
946                                  "Rotate: interpolation can not be CUBIC when image channel is greater than 4.");
947     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
948     if (!input_cv->mat().data) {
949       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Rotate: load image failed.");
950     }
951 
952     cv::Mat input_img = input_cv->mat();
953     if (input_img.cols > (MAX_INT_PRECISION * DOUBLING_FACTOR) ||
954         input_img.rows > (MAX_INT_PRECISION * DOUBLING_FACTOR)) {
955       RETURN_STATUS_UNEXPECTED("Rotate: image is too large and center is not precise, got image width:" +
956                                std::to_string(input_img.cols) + ", and image height:" + std::to_string(input_img.rows) +
957                                ", both should be small than:" + std::to_string(MAX_INT_PRECISION * DOUBLING_FACTOR));
958     }
959     float fx = 0, fy = 0;
960     if (center.empty()) {
961       // default to center of image
962       fx = (static_cast<float>(input_img.cols) - 1.0F) * kHalf;
963       fy = (static_cast<float>(input_img.rows) - 1.0F) * kHalf;
964     } else {
965       fx = center[0];
966       fy = center[1];
967     }
968     cv::Mat output_img;
969     cv::Scalar fill_color = cv::Scalar(fill_b, fill_g, fill_r);
970     // maybe don't use uint32 for image dimension here
971     cv::Point2f pc(fx, fy);
972     cv::Mat rot = cv::getRotationMatrix2D(pc, degree, 1.0);
973     std::shared_ptr<CVTensor> output_cv;
974     if (!expand) {
975       // this case means that the shape doesn't change, size stays the same
976       // We may not need this memcpy if it is in place.
977       RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
978       // using inter_nearest to comply with python default
979       cv::warpAffine(input_img, output_cv->mat(), rot, input_img.size(), GetCVInterpolationMode(interpolation),
980                      cv::BORDER_CONSTANT, fill_color);
981     } else {
982       // we resize here since the shape changes
983       // create a new bounding box with the rotate
984       cv::Rect2f bbox = cv::RotatedRect(pc, input_img.size(), degree).boundingRect2f();
985       rot.at<double>(0, 2) += bbox.width / 2.0 - input_img.cols / 2.0;
986       rot.at<double>(1, 2) += bbox.height / 2.0 - input_img.rows / 2.0;
987       // use memcpy and don't compute the new shape since openCV has a rounding problem
988       cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation),
989                      cv::BORDER_CONSTANT, fill_color);
990       RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &output_cv));
991       RETURN_UNEXPECTED_IF_NULL(output_cv);
992     }
993     *output = std::static_pointer_cast<Tensor>(output_cv);
994   } catch (const cv::Exception &e) {
995     RETURN_STATUS_UNEXPECTED("Rotate: " + std::string(e.what()));
996   }
997   return Status::OK();
998 }
999 
1000 template <typename T1, typename T2>
Normalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std,bool is_hwc,bool pad=false)1001 void Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
1002                std::vector<float> std, bool is_hwc, bool pad = false) {
1003   // T1 is the type of input tensor, T2 is the type of output tensor
1004   auto itr_out = (*output)->begin<T2>();
1005   auto itr = input->begin<T1>();
1006   auto end = input->end<T1>();
1007   int64_t num_channels;
1008   if (is_hwc) {
1009     num_channels = (*output)->shape()[kChannelIndexHWC];
1010     while (itr != end) {
1011       for (size_t i = 0; i < num_channels - static_cast<int>(pad); i++) {
1012         *itr_out = static_cast<T2>((static_cast<float>(*itr) - mean[i]) / std[i]);
1013         ++itr_out;
1014         ++itr;
1015       }
1016     }
1017   } else {
1018     num_channels = (*output)->shape()[kChannelIndexCHW];
1019     int64_t height_index = 1;
1020     int64_t width_index = 2;
1021     int64_t channel_len = (*output)->shape()[height_index] * (*output)->shape()[width_index];
1022     while (itr != end) {
1023       for (size_t i = 0; i < num_channels - static_cast<int>(pad); i++) {
1024         for (int64_t j = 0; j < channel_len; j++) {
1025           *itr_out = static_cast<T2>((static_cast<float>(*itr) - mean[i]) / std[i]);
1026           ++itr_out;
1027           ++itr;
1028         }
1029       }
1030     }
1031   }
1032 }
1033 
1034 template <typename T>
Normalize_caller(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::vector<float> mean_v,const std::vector<float> std_v,bool is_hwc,bool pad)1035 Status Normalize_caller(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
1036                         const std::vector<float> mean_v, const std::vector<float> std_v, bool is_hwc, bool pad) {
1037   switch (static_cast<int>(input->type().value())) {
1038     case DataType::DE_BOOL:
1039       Normalize<bool, T>(input, output, mean_v, std_v, is_hwc, pad);
1040       break;
1041     case DataType::DE_INT8:
1042       Normalize<int8_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1043       break;
1044     case DataType::DE_UINT8:
1045       Normalize<uint8_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1046       break;
1047     case DataType::DE_INT16:
1048       Normalize<int16_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1049       break;
1050     case DataType::DE_UINT16:
1051       Normalize<uint16_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1052       break;
1053     case DataType::DE_INT32:
1054       Normalize<int32_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1055       break;
1056     case DataType::DE_UINT32:
1057       Normalize<uint32_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1058       break;
1059     case DataType::DE_INT64:
1060       Normalize<int64_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1061       break;
1062     case DataType::DE_UINT64:
1063       Normalize<uint64_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1064       break;
1065     case DataType::DE_FLOAT16:
1066       Normalize<float16, T>(input, output, mean_v, std_v, is_hwc, pad);
1067       break;
1068     case DataType::DE_FLOAT32:
1069       Normalize<float, T>(input, output, mean_v, std_v, is_hwc, pad);
1070       break;
1071     case DataType::DE_FLOAT64:
1072       Normalize<double, T>(input, output, mean_v, std_v, is_hwc, pad);
1073       break;
1074     default:
1075       std::string op_name = (pad) ? "NormalizePad" : "Normalize";
1076       RETURN_STATUS_UNEXPECTED(
1077         op_name + ": unsupported type, currently supported types include " +
1078         "[bool,int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,int64_t,uint64_t,float16,float,double].");
1079   }
1080   return Status::OK();
1081 }
1082 
Normalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std,bool is_hwc)1083 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
1084                  std::vector<float> std, bool is_hwc) {
1085   RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType(DataType::DE_FLOAT32), output));
1086   if (input->Rank() == kMinImageRank) {
1087     RETURN_IF_NOT_OK((*output)->ExpandDim(kMinImageRank));
1088   }
1089 
1090   CHECK_FAIL_RETURN_UNEXPECTED((*output)->Rank() == kDefaultImageRank,
1091                                "Normalize: output image rank should be: " + std::to_string(kDefaultImageRank) +
1092                                  ", but got: " + std::to_string((*output)->Rank()));
1093   CHECK_FAIL_RETURN_UNEXPECTED(std.size() == mean.size(),
1094                                "Normalize: mean and std vectors are not of same size, got size of std: " +
1095                                  std::to_string(std.size()) + ", and mean size: " + std::to_string(mean.size()));
1096   int64_t channel_index;
1097   if (is_hwc) {
1098     channel_index = kChannelIndexHWC;
1099   } else {
1100     channel_index = kChannelIndexCHW;
1101   }
1102   // caller provided 1 mean/std value and there is more than one channel --> duplicate mean/std value
1103   if (mean.size() == 1 && (*output)->shape()[channel_index] != 1) {
1104     for (int64_t i = 0; i < (*output)->shape()[channel_index] - 1; i++) {
1105       mean.push_back(mean[0]);
1106       std.push_back(std[0]);
1107     }
1108   }
1109   CHECK_FAIL_RETURN_UNEXPECTED((*output)->shape()[channel_index] == static_cast<dsize_t>(mean.size()),
1110                                "Normalize: number of channels does not match the size of mean and std vectors, got "
1111                                "channels: " +
1112                                  std::to_string((*output)->shape()[channel_index]) +
1113                                  ", size of mean: " + std::to_string(mean.size()));
1114   RETURN_IF_NOT_OK(Normalize_caller<float>(input, output, mean, std, is_hwc, false));
1115 
1116   if (input->Rank() == kMinImageRank) {
1117     (*output)->Squeeze();
1118   }
1119   return Status::OK();
1120 }
1121 
NormalizePad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std,const std::string & dtype,bool is_hwc)1122 Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
1123                     std::vector<float> std, const std::string &dtype, bool is_hwc) {
1124   RETURN_IF_NOT_OK(ValidateImageRank("NormalizePad", input->Rank()));
1125   int64_t channel_index = kChannelIndexCHW;
1126   if (is_hwc) {
1127     channel_index = kChannelIndexHWC;
1128   }
1129   int32_t channels = 1;
1130   if (input->Rank() == kDefaultImageRank) {
1131     channels = static_cast<int>(input->shape()[channel_index]);
1132   }
1133 
1134   if (is_hwc) {
1135     TensorShape new_shape = TensorShape({input->shape()[0], input->shape()[1], channels + 1});
1136     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, DataType(dtype), output));
1137     RETURN_IF_NOT_OK((*output)->Zero());
1138   } else {
1139     TensorShape new_shape = TensorShape({channels + 1, input->shape()[1], input->shape()[2]});
1140     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, DataType(dtype), output));
1141     RETURN_IF_NOT_OK((*output)->Zero());
1142   }
1143 
1144   // caller provided 1 mean/std value and there are more than one channel --> duplicate mean/std value
1145   if (mean.size() == 1 && channels > 1) {
1146     while (mean.size() < channels) {
1147       mean.push_back(mean[0]);
1148       std.push_back(std[0]);
1149     }
1150   }
1151   CHECK_FAIL_RETURN_UNEXPECTED((*output)->shape()[channel_index] == static_cast<dsize_t>(mean.size()) + 1,
1152                                "NormalizePad: number of channels does not match the size of mean and std vectors, got "
1153                                "channels: " +
1154                                  std::to_string((*output)->shape()[channel_index] - 1) +
1155                                  ", size of mean: " + std::to_string(mean.size()));
1156   if (dtype == "float16") {
1157     RETURN_IF_NOT_OK(Normalize_caller<float16>(input, output, mean, std, is_hwc, true));
1158   } else {
1159     RETURN_IF_NOT_OK(Normalize_caller<float>(input, output, mean, std, is_hwc, true));
1160   }
1161   if (input->Rank() == kMinImageRank) {
1162     (*output)->Squeeze();
1163   }
1164   return Status::OK();
1165 }
1166 
AdjustBrightness(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float alpha)1167 Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha) {
1168   try {
1169     RETURN_IF_NOT_OK(ValidateImage(input, "AdjustBrightness", {1, 2, 3, 4, 5, 6, 10, 11, 12}, {2, 3}, {1, 3}));
1170     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1171     cv::Mat input_img = input_cv->mat();
1172     if (!input_cv->mat().data) {
1173       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustBrightness: load image failed.");
1174     }
1175     std::shared_ptr<CVTensor> output_cv;
1176     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1177     output_cv->mat() = input_img * alpha;
1178     *output = std::static_pointer_cast<Tensor>(output_cv);
1179   } catch (const cv::Exception &e) {
1180     RETURN_STATUS_UNEXPECTED("AdjustBrightness: " + std::string(e.what()));
1181   }
1182   return Status::OK();
1183 }
1184 
AdjustContrast(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float alpha)1185 Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha) {
1186   try {
1187     RETURN_IF_NOT_OK(ValidateImage(input, "AdjustContrast", {3, 5, 11}, {3}, {3}));
1188     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1189     cv::Mat input_img = input_cv->mat();
1190     if (!input_cv->mat().data) {
1191       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustContrast: load image failed.");
1192     }
1193     cv::Mat gray, output_img;
1194     cv::cvtColor(input_img, gray, CV_RGB2GRAY);
1195     auto mean_img = cv::mean(gray).val[0];
1196     std::shared_ptr<CVTensor> output_cv;
1197     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1198     // thread safe: change cv::Mat::zeros to cv::Mat + setTo
1199     output_img = cv::Mat(input_img.rows, input_img.cols, input_img.depth());
1200     output_img.setTo(cv::Scalar::all(0));
1201     output_img = output_img + mean_img;
1202     cv::cvtColor(output_img, output_img, CV_GRAY2RGB);
1203     output_img = output_img * (1.0 - alpha) + input_img * alpha;
1204     output_img.copyTo(output_cv->mat());
1205     *output = std::static_pointer_cast<Tensor>(output_cv);
1206   } catch (const cv::Exception &e) {
1207     RETURN_STATUS_UNEXPECTED("AdjustContrast: " + std::string(e.what()));
1208   }
1209   return Status::OK();
1210 }
1211 
AdjustGamma(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float gamma,float gain)1212 Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float gamma, float gain) {
1213   try {
1214     int num_channels = 1;
1215     if (input->Rank() < kMinImageRank) {
1216       RETURN_STATUS_UNEXPECTED("AdjustGamma: input tensor is not in shape of <...,H,W,C> or <H,W>, got shape:" +
1217                                input->shape().ToString());
1218     }
1219     if (input->Rank() > 2) {
1220       num_channels = static_cast<int>(input->shape()[-1]);
1221     }
1222     if (num_channels != 1 && num_channels != 3) {
1223       RETURN_STATUS_UNEXPECTED("AdjustGamma: channel of input image should be 1 or 3, but got: " +
1224                                std::to_string(num_channels));
1225     }
1226     if (input->type().IsFloat()) {
1227       for (auto itr = input->begin<float>(); itr != input->end<float>(); itr++) {
1228         *itr = pow((*itr) * gain, gamma);
1229         *itr = std::min(std::max((*itr), 0.0f), 1.0f);
1230       }
1231       *output = input;
1232     } else {
1233       RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input, output));
1234       std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(*output);
1235       if (!input_cv->mat().data) {
1236         RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustGamma: load image failed.");
1237       }
1238       cv::Mat input_img = input_cv->mat();
1239       uchar LUT[256] = {};
1240       auto kMaxPixelValueFloat = static_cast<float>(kMaxBitValue);
1241       for (int i = 0; i <= kMaxBitValue; i++) {
1242         float f = static_cast<float>(i) / kMaxPixelValueFloat;
1243         f = pow(f, gamma);
1244         LUT[i] =
1245           static_cast<uchar>(floor(std::min(f * (kMaxPixelValueFloat + 1.f - 1e-3f) * gain, kMaxPixelValueFloat)));
1246       }
1247       if (input_img.channels() == 1) {
1248         cv::MatIterator_<uchar> it = input_img.begin<uchar>();
1249         cv::MatIterator_<uchar> it_end = input_img.end<uchar>();
1250         for (; it != it_end; ++it) {
1251           *it = LUT[(*it)];
1252         }
1253       } else {
1254         cv::MatIterator_<cv::Vec3b> it = input_img.begin<cv::Vec3b>();
1255         cv::MatIterator_<cv::Vec3b> it_end = input_img.end<cv::Vec3b>();
1256         for (; it != it_end; ++it) {
1257           (*it)[0] = LUT[(*it)[0]];
1258           (*it)[1] = LUT[(*it)[1]];
1259           (*it)[2] = LUT[(*it)[2]];
1260         }
1261       }
1262       *output = std::static_pointer_cast<Tensor>(input_cv);
1263     }
1264   } catch (const cv::Exception &e) {
1265     RETURN_STATUS_UNEXPECTED("AdjustGamma: " + std::string(e.what()));
1266   }
1267   return Status::OK();
1268 }
1269 
AdjustSharpness(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float alpha)1270 Status AdjustSharpness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha) {
1271   try {
1272     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1273     cv::Mat input_img = input_cv->mat();
1274     if (!input_cv->mat().data) {
1275       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Sharpness: load image failed.");
1276     }
1277 
1278     if (input_cv->Rank() == 1 || input_cv->mat().dims > 2) {
1279       RETURN_STATUS_UNEXPECTED("Sharpness: shape of input is not <H,W,C> or <H,W>, but got rank: " +
1280                                std::to_string(input_cv->Rank()));
1281     }
1282 
1283     /// creating a smoothing filter. 1, 1, 1,
1284     ///                              1, 5, 1,
1285     ///                              1, 1, 1
1286 
1287     const float filterMid = 5.0;
1288     const float filterSum = 13.0;
1289     cv::Mat filter = cv::Mat(3, 3, CV_32F, cv::Scalar::all(1.0 / filterSum));
1290     filter.at<float>(1, 1) = filterMid / filterSum;
1291 
1292     /// applying filter on channels
1293     cv::Mat result = cv::Mat();
1294     cv::filter2D(input_img, result, -1, filter);
1295 
1296     auto height = static_cast<int>(input_cv->shape()[0]);
1297     auto width = static_cast<int>(input_cv->shape()[1]);
1298 
1299     /// restoring the edges
1300     input_img.row(0).copyTo(result.row(0));
1301     input_img.row(height - 1).copyTo(result.row(height - 1));
1302     input_img.col(0).copyTo(result.col(0));
1303     input_img.col(width - 1).copyTo(result.col(width - 1));
1304 
1305     /// blend based on alpha : (alpha_ *input_img) +  ((1.0-alpha_) * result);
1306     cv::addWeighted(input_img, alpha, result, 1.0 - alpha, 0.0, result);
1307 
1308     std::shared_ptr<CVTensor> output_cv;
1309     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1310     RETURN_UNEXPECTED_IF_NULL(output_cv);
1311 
1312     *output = std::static_pointer_cast<Tensor>(output_cv);
1313   } catch (const cv::Exception &e) {
1314     RETURN_STATUS_UNEXPECTED("Sharpness: " + std::string(e.what()));
1315   }
1316   return Status::OK();
1317 }
1318 
AutoContrast(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float cutoff,const std::vector<uint32_t> & ignore)1319 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float cutoff,
1320                     const std::vector<uint32_t> &ignore) {
1321   try {
1322     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1323     if (!input_cv->mat().data) {
1324       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AutoContrast: load image failed.");
1325     }
1326     if (input_cv->Rank() != kDefaultImageRank && input_cv->Rank() != kMinImageRank) {
1327       std::string err_msg = "AutoContrast: image rank should be 2 or 3,  but got: " + std::to_string(input_cv->Rank());
1328       if (input_cv->Rank() == 1) {
1329         err_msg = err_msg + ", may need to do Decode operation first.";
1330       }
1331       RETURN_STATUS_UNEXPECTED("AutoContrast: image rank should be 2 or 3,  but got: " +
1332                                std::to_string(input_cv->Rank()));
1333     }
1334     // Reshape to extend dimension if rank is 2 for algorithm to work. then reshape output to be of rank 2 like input
1335     auto input_rank = input_cv->Rank();
1336     if (input_cv->Rank() == kMinImageRank) {
1337       RETURN_IF_NOT_OK(input_cv->ExpandDim(kMinImageRank));
1338     }
1339     // Get number of channels and image matrix
1340     std::size_t num_of_channels = input_cv->shape()[static_cast<size_t>(kChannelIndexHWC)];
1341     if (num_of_channels != kMinImageChannel && num_of_channels != kDefaultImageChannel) {
1342       RETURN_STATUS_UNEXPECTED("AutoContrast: channel of input image should be 1 or 3, but got: " +
1343                                std::to_string(num_of_channels));
1344     }
1345     cv::Mat image = input_cv->mat();
1346     // Separate the image to channels
1347     std::vector<cv::Mat> planes(num_of_channels);
1348     cv::split(image, planes);
1349     cv::Mat b_hist, g_hist, r_hist;
1350     // Establish the number of bins and set variables for histogram
1351     int32_t hist_size = 256;
1352     int32_t channels = 0;
1353     float range[] = {0, 256};
1354     const float *hist_range[] = {range};
1355     bool uniform = true, accumulate = false;
1356     // Set up lookup table for LUT(Look up table algorithm)
1357     std::vector<int32_t> table;
1358     std::vector<cv::Mat> image_result;
1359     for (std::size_t layer = 0; layer < planes.size(); layer++) {
1360       // Reset lookup table
1361       table = std::vector<int32_t>{};
1362       // Calculate Histogram for channel
1363       cv::Mat hist;
1364       cv::calcHist(&planes[layer], 1, &channels, cv::Mat(), hist, 1, &hist_size, hist_range, uniform, accumulate);
1365       hist.convertTo(hist, CV_32SC1);
1366       std::vector<int32_t> hist_vec;
1367       hist.col(0).copyTo(hist_vec);
1368       // Ignore values in ignore
1369       for (const auto &item : ignore) {
1370         hist_vec[item] = 0;
1371       }
1372       int32_t hi = kMaxBitValue;
1373       int32_t lo = 0;
1374       RETURN_IF_NOT_OK(ComputeUpperAndLowerPercentiles(&hist_vec, cutoff, cutoff, &hi, &lo));
1375       if (hi <= lo) {
1376         for (int32_t i = 0; i < 256; i++) {
1377           table.push_back(i);
1378         }
1379       } else {
1380         const float scale = static_cast<float>(kMaxBitValue) / static_cast<float>(hi - lo);
1381         const float offset = static_cast<float>(-1 * lo) * scale;
1382         for (int32_t i = 0; i < 256; i++) {
1383           auto ix = static_cast<int32_t>(static_cast<float>(i) * scale + offset);
1384           ix = std::max(ix, 0);
1385           ix = std::min(ix, kMaxBitValue);
1386           table.push_back(ix);
1387         }
1388       }
1389       cv::Mat result_layer;
1390       cv::LUT(planes[layer], table, result_layer);
1391       image_result.push_back(result_layer);
1392     }
1393     cv::Mat result;
1394     cv::merge(image_result, result);
1395     result.convertTo(result, input_cv->mat().type());
1396     std::shared_ptr<CVTensor> output_cv;
1397     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1398     (*output) = std::static_pointer_cast<Tensor>(output_cv);
1399     if (input_rank == kMinImageRank) {
1400       (*output)->Squeeze();
1401     }
1402   } catch (const cv::Exception &e) {
1403     RETURN_STATUS_UNEXPECTED("AutoContrast: " + std::string(e.what()));
1404   }
1405   return Status::OK();
1406 }
1407 
AdjustSaturation(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float alpha)1408 Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha) {
1409   try {
1410     RETURN_IF_NOT_OK(ValidateImage(input, "AdjustSaturation", {3, 5, 11}, {3}, {3}));
1411     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1412     cv::Mat input_img = input_cv->mat();
1413     if (!input_cv->mat().data) {
1414       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustSaturation: load image failed.");
1415     }
1416     std::shared_ptr<CVTensor> output_cv;
1417     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1418     cv::Mat output_img = output_cv->mat();
1419     cv::Mat gray;
1420     cv::cvtColor(input_img, gray, CV_RGB2GRAY);
1421     cv::cvtColor(gray, output_img, CV_GRAY2RGB);
1422     output_img = output_img * (1.0 - alpha) + input_img * alpha;
1423     output_img.copyTo(output_cv->mat());
1424     *output = std::static_pointer_cast<Tensor>(output_cv);
1425   } catch (const cv::Exception &e) {
1426     RETURN_STATUS_UNEXPECTED("AdjustSaturation: " + std::string(e.what()));
1427   }
1428   return Status::OK();
1429 }
1430 
AdjustHue(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float hue)1431 Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float hue) {
1432   try {
1433     RETURN_IF_NOT_OK(ValidateImage(input, "AdjustHue", {3, 11}, {3}, {3}));
1434     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1435     cv::Mat input_img = input_cv->mat();
1436     if (!input_cv->mat().data) {
1437       RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustHue: load image failed.");
1438     }
1439     std::shared_ptr<CVTensor> output_cv;
1440     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1441     cv::Mat output_img;
1442     cv::cvtColor(input_img, output_img, CV_RGB2HSV_FULL);
1443     for (int x = 0; x < output_img.cols; x++) {
1444       for (int y = 0; y < output_img.rows; y++) {
1445         uint8_t cur1 = output_img.at<cv::Vec3b>(cv::Point(x, y))[0];
1446         uint8_t h_hue = 0;
1447         h_hue = static_cast<uint8_t>(hue * kMaxBitValue);
1448         cur1 += h_hue;
1449         output_img.at<cv::Vec3b>(cv::Point(x, y))[0] = cur1;
1450       }
1451     }
1452     cv::cvtColor(output_img, output_cv->mat(), CV_HSV2RGB_FULL);
1453     *output = std::static_pointer_cast<Tensor>(output_cv);
1454   } catch (const cv::Exception &e) {
1455     RETURN_STATUS_UNEXPECTED("AdjustHue: " + std::string(e.what()));
1456   }
1457   return Status::OK();
1458 }
1459 
Equalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1460 Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1461   try {
1462     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1463     if (!input_cv->mat().data) {
1464       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Equalize: load image failed.");
1465     }
1466     if (input_cv->Rank() != kDefaultImageRank && input_cv->Rank() != kMinImageRank) {
1467       RETURN_STATUS_UNEXPECTED("Equalize: image rank should be 2 or 3,  but got: " + std::to_string(input_cv->Rank()));
1468     }
1469     // For greyscale images, extend dimension if rank is 2 and reshape output to be of rank 2.
1470     auto input_rank = input_cv->Rank();
1471     if (input_cv->Rank() == kMinImageRank) {
1472       RETURN_IF_NOT_OK(input_cv->ExpandDim(kMinImageRank));
1473     }
1474     // Get number of channels and image matrix
1475     std::size_t num_of_channels = input_cv->shape()[kChannelIndexHWC];
1476     if (num_of_channels != kMinImageChannel && num_of_channels != kDefaultImageChannel) {
1477       RETURN_STATUS_UNEXPECTED("Equalize: channel of input image should be 1 or 3, but got: " +
1478                                std::to_string(num_of_channels));
1479     }
1480     cv::Mat image = input_cv->mat();
1481     // Separate the image to channels
1482     std::vector<cv::Mat> planes(num_of_channels);
1483     cv::split(image, planes);
1484     // Equalize each channel separately
1485     std::vector<cv::Mat> image_result;
1486     for (auto &plane : planes) {
1487       cv::Mat channel_result;
1488       cv::equalizeHist(plane, channel_result);
1489       image_result.push_back(channel_result);
1490     }
1491     cv::Mat result;
1492     cv::merge(image_result, result);
1493     std::shared_ptr<CVTensor> output_cv;
1494     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1495     (*output) = std::static_pointer_cast<Tensor>(output_cv);
1496     if (input_rank == kMinImageRank) {
1497       (*output)->Squeeze();
1498     }
1499   } catch (const cv::Exception &e) {
1500     RETURN_STATUS_UNEXPECTED("Equalize: " + std::string(e.what()));
1501   }
1502   return Status::OK();
1503 }
1504 
Invert(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1505 Status Invert(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1506   try {
1507     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1508     cv::Mat input_img = input_cv->mat();
1509     if (!input_cv->mat().data) {
1510       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Invert: load image failed.");
1511     }
1512 
1513     std::shared_ptr<CVTensor> output_cv;
1514     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1515     RETURN_UNEXPECTED_IF_NULL(output_cv);
1516 
1517     constexpr auto kMaxPixel = 255.0;
1518     output_cv->mat() = cv::Scalar::all(kMaxPixel) - input_img;
1519     *output = std::static_pointer_cast<Tensor>(output_cv);
1520   } catch (const cv::Exception &e) {
1521     RETURN_STATUS_UNEXPECTED("Invert: " + std::string(e.what()));
1522   }
1523   return Status::OK();
1524 }
1525 
Posterize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,uint8_t bits)1526 Status Posterize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, uint8_t bits) {
1527   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1528   if (!input_cv->mat().data) {
1529     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Posterize: load image failed.");
1530   }
1531   if (input_cv->Rank() != 3 && input_cv->Rank() != 2) {
1532     RETURN_STATUS_UNEXPECTED("Posterize: input image is not in shape of <H,W,C> or <H,W>, but got rank: " +
1533                              std::to_string(input_cv->Rank()));
1534   }
1535   uint8_t mask_value = ~((uint8_t)(1 << (8 - bits)) - 1);
1536   std::vector<uint8_t> lut_vector;
1537   for (std::size_t i = 0; i < 256; i++) {
1538     lut_vector.push_back(i & mask_value);
1539   }
1540   cv::Mat in_image = input_cv->mat();
1541 
1542   cv::Mat output_img;
1543   CHECK_FAIL_RETURN_UNEXPECTED(in_image.depth() == CV_8U || in_image.depth() == CV_8S,
1544                                "Posterize: data type of input image should be int8 or uint8, "
1545                                "but got " +
1546                                  input_cv->type().ToString());
1547   cv::LUT(in_image, lut_vector, output_img);
1548   std::shared_ptr<CVTensor> result_tensor;
1549 
1550   RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &result_tensor));
1551   *output = std::static_pointer_cast<Tensor>(result_tensor);
1552   return Status::OK();
1553 }
1554 
ValidateCutOutImage(const std::shared_ptr<Tensor> & input,bool is_hwc,int32_t box_height,int32_t box_width)1555 Status ValidateCutOutImage(const std::shared_ptr<Tensor> &input, bool is_hwc, int32_t box_height, int32_t box_width) {
1556   uint32_t channel_index = is_hwc ? kChannelIndexHWC : kChannelIndexCHW;
1557   uint32_t height_index = is_hwc ? 0 : 1;
1558   uint32_t width_index = is_hwc ? 1 : 2;
1559   std::string right_shape = is_hwc ? "<H,W,C>" : "<C,H,W>";
1560   int64_t image_h = input->shape()[height_index];
1561   int64_t image_w = input->shape()[width_index];
1562 
1563   CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() > channel_index, "CutOut: shape is invalid.");
1564 
1565   if (input->Rank() != kDefaultImageRank) {
1566     RETURN_STATUS_UNEXPECTED("CutOut: image shape is not " + right_shape +
1567                              ", but got rank: " + std::to_string(input->Rank()));
1568   }
1569 
1570   if (box_height > image_h || box_width > image_w) {
1571     RETURN_STATUS_UNEXPECTED(
1572       "CutOut: box size is too large for image erase, got box height: " + std::to_string(box_height) +
1573       "box weight: " + std::to_string(box_width) + ", and image height: " + std::to_string(image_h) +
1574       ", image width: " + std::to_string(image_w));
1575   }
1576   return Status::OK();
1577 }
1578 
GetPtr(const std::shared_ptr<Tensor> & tensor)1579 uchar *GetPtr(const std::shared_ptr<Tensor> &tensor) {
1580   switch (tensor->type().value()) {
1581     case DataType::DE_BOOL:
1582       return reinterpret_cast<uchar *>(&(*tensor->begin<bool>()));
1583     case DataType::DE_INT8:
1584       return reinterpret_cast<uchar *>(&(*tensor->begin<int8_t>()));
1585     case DataType::DE_UINT8:
1586       return reinterpret_cast<uchar *>(&(*tensor->begin<uint8_t>()));
1587     case DataType::DE_INT16:
1588       return reinterpret_cast<uchar *>(&(*tensor->begin<int16_t>()));
1589     case DataType::DE_UINT16:
1590       return reinterpret_cast<uchar *>(&(*tensor->begin<uint16_t>()));
1591     case DataType::DE_INT32:
1592       return reinterpret_cast<uchar *>(&(*tensor->begin<int32_t>()));
1593     case DataType::DE_UINT32:
1594       return reinterpret_cast<uchar *>(&(*tensor->begin<uint32_t>()));
1595     case DataType::DE_INT64:
1596       return reinterpret_cast<uchar *>(&(*tensor->begin<int64_t>()));
1597     case DataType::DE_UINT64:
1598       return reinterpret_cast<uchar *>(&(*tensor->begin<uint64_t>()));
1599     case DataType::DE_FLOAT16:
1600       return reinterpret_cast<uchar *>(&(*tensor->begin<float16>()));
1601     case DataType::DE_FLOAT32:
1602       return reinterpret_cast<uchar *>(&(*tensor->begin<float>()));
1603     case DataType::DE_FLOAT64:
1604       return reinterpret_cast<uchar *>(&(*tensor->begin<double>()));
1605     default:
1606       return nullptr;
1607   }
1608 }
1609 
CutOut(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t box_height,int32_t box_width,int32_t num_patches,bool bounded,bool random_color,std::mt19937 * rnd,std::vector<uint8_t> fill_colors,bool is_hwc)1610 Status CutOut(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t box_height,
1611               int32_t box_width, int32_t num_patches, bool bounded, bool random_color, std::mt19937 *rnd,
1612               std::vector<uint8_t> fill_colors, bool is_hwc) {
1613   try {
1614     RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input, output));
1615     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(*output);
1616     RETURN_IF_NOT_OK(ValidateCutOutImage(input_cv, is_hwc, box_height, box_width));
1617     uint32_t channel_index = is_hwc ? kChannelIndexHWC : kChannelIndexCHW;
1618     uint32_t height_index = is_hwc ? 0 : 1;
1619     uint32_t width_index = is_hwc ? 1 : 2;
1620     uint64_t num_channels = input_cv->shape()[channel_index];
1621     int64_t image_h = input_cv->shape()[height_index];
1622     int64_t image_w = input_cv->shape()[width_index];
1623     uint8_t type_size = input_cv->type().SizeInBytes();
1624     // for random color
1625     std::normal_distribution<double> normal_distribution(0, 1);
1626     std::uniform_int_distribution<int> height_distribution_bound(0, static_cast<int>(image_h) - box_height);
1627     std::uniform_int_distribution<int> width_distribution_bound(0, static_cast<int>(image_w) - box_width);
1628     std::uniform_int_distribution<int> height_distribution_unbound(0, static_cast<int>(image_h) + box_height);
1629     std::uniform_int_distribution<int> width_distribution_unbound(0, static_cast<int>(image_w) + box_width);
1630 
1631     if (fill_colors.empty()) {
1632       fill_colors = std::vector<uint8_t>(num_channels, 0);
1633     }
1634     CHECK_FAIL_RETURN_UNEXPECTED(fill_colors.size() == num_channels,
1635                                  "Number of fill colors (" + std::to_string(fill_colors.size()) +
1636                                    ") does not match the number of channels (" + std::to_string(num_channels) + ").");
1637     // core logic
1638     // update values based on random erasing or cutout
1639     for (int32_t i = 0; i < num_patches; i++) {
1640       // rows in cv mat refers to the height of the cropped box
1641       // we determine h_start and w_start using two different distributions as erasing is used by two different
1642       // image augmentations. The bounds are also different in each case.
1643       int32_t h_start = (bounded) ? height_distribution_bound(*rnd) : (height_distribution_unbound(*rnd) - box_height);
1644       int32_t w_start = (bounded) ? width_distribution_bound(*rnd) : (width_distribution_unbound(*rnd) - box_width);
1645 
1646       int64_t max_width = (w_start + box_width > image_w) ? image_w : w_start + box_width;
1647       int64_t max_height = (h_start + box_height > image_h) ? image_h : h_start + box_height;
1648       // check for starting range >= 0, here the start range is checked after for cut out, for random erasing
1649       // w_start and h_start will never be less than 0.
1650       h_start = (h_start < 0) ? 0 : h_start;
1651       w_start = (w_start < 0) ? 0 : w_start;
1652 
1653       if (is_hwc) {
1654         uchar *buffer = GetPtr(input_cv);
1655         int64_t num_bytes = type_size * static_cast<int64_t>(num_channels) * (max_width - w_start);
1656         for (int x = h_start; x < max_height; x++) {
1657           auto ret = memset_s(buffer + (x * image_w + w_start) * num_channels * type_size, num_bytes, 0, num_bytes);
1658           if (ret != EOK) {
1659             RETURN_STATUS_UNEXPECTED("CutOut: memset_s failed for HWC scenario.");
1660           }
1661         }
1662       } else {
1663         int64_t num_bytes = type_size * (max_width - w_start);
1664         for (uint64_t c = 0; c < num_channels; c++) {
1665           uchar *buffer = GetPtr(input_cv) + (type_size * c * image_h * image_w);
1666           for (int x = h_start; x < max_height; x++) {
1667             auto ret = memset_s(buffer + (x * image_w + w_start) * type_size, num_bytes, 0, num_bytes);
1668             if (ret != EOK) {
1669               RETURN_STATUS_UNEXPECTED("CutOut: memset_s failed for CHW scenario.");
1670             }
1671           }
1672         }
1673       }
1674     }
1675 
1676     *output = std::static_pointer_cast<Tensor>(input_cv);
1677     return Status::OK();
1678   } catch (const cv::Exception &e) {
1679     RETURN_STATUS_UNEXPECTED("CutOut: " + std::string(e.what()));
1680   }
1681 
1682   return Status::OK();
1683 }
1684 
Erase(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t top,int32_t left,int32_t height,int32_t width,const std::vector<float> & value,bool inplace)1685 Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t top, int32_t left,
1686              int32_t height, int32_t width, const std::vector<float> &value, bool inplace) {
1687   try {
1688     std::vector<dsize_t> size;
1689     RETURN_IF_NOT_OK(ImageSize(input, &size));
1690     int64_t image_h = size[kHeightIndex];
1691     int64_t image_w = size[kWidthIndex];
1692     if (height > image_h || width > image_w) {
1693       RETURN_STATUS_UNEXPECTED(
1694         "Erase: box size is too large for image erase, got box height: " + std::to_string(height) +
1695         "box weight: " + std::to_string(width) + ", and image height: " + std::to_string(image_h) +
1696         ", image width: " + std::to_string(image_w));
1697     }
1698 
1699     std::shared_ptr<CVTensor> input_cv;
1700     if (!inplace) {
1701       RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input, output));
1702       input_cv = CVTensor::AsCVTensor(*output);
1703     } else {
1704       input_cv = CVTensor::AsCVTensor(input);
1705     }
1706     cv::Mat input_img = input_cv->mat();
1707 
1708     int32_t h_start = top;
1709     int32_t w_start = left;
1710     h_start = (h_start < 0) ? 0 : h_start;
1711     w_start = (w_start < 0) ? 0 : w_start;
1712 
1713     int32_t max_width = (w_start + width > image_w) ? static_cast<int32_t>(image_w) : w_start + width;
1714     int32_t max_height = (h_start + height > image_h) ? static_cast<int32_t>(image_h) : h_start + height;
1715     int32_t true_width = max_width - w_start;
1716     int32_t true_height = max_height - h_start;
1717 
1718     float fill_r = value[kRIndex];
1719     float fill_g = value[kRIndex];
1720     float fill_b = value[kRIndex];
1721     const size_t kMaxFillValuesSize = 3;
1722     if (value.size() == kMaxFillValuesSize) {
1723       fill_r = value[kRIndex];
1724       fill_g = value[kGIndex];
1725       fill_b = value[kBIndex];
1726     }
1727 
1728     cv::Rect idx = cv::Rect(w_start, h_start, true_width, true_height);
1729     cv::Scalar fill_color = cv::Scalar(fill_r, fill_g, fill_b);
1730     (void)input_img(idx).setTo(fill_color);
1731 
1732     if (!inplace) {
1733       *output = std::static_pointer_cast<Tensor>(input_cv);
1734     } else {
1735       std::shared_ptr<CVTensor> output_cv;
1736       RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_img, input_cv->Rank(), &output_cv));
1737       *output = std::static_pointer_cast<Tensor>(output_cv);
1738     }
1739 
1740     return Status::OK();
1741   } catch (const cv::Exception &e) {
1742     RETURN_STATUS_UNEXPECTED("Erase: " + std::string(e.what()));
1743   }
1744 
1745   return Status::OK();
1746 }
1747 
Pad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const int32_t & pad_top,const int32_t & pad_bottom,const int32_t & pad_left,const int32_t & pad_right,const BorderType & border_types,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)1748 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
1749            const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
1750            uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) {
1751   try {
1752     RETURN_IF_NOT_OK(ValidateImage(input, "Pad", {1, 2, 3, 4, 5, 6, 10, 11, 12}, {2, 3}, {1, 3}));
1753 
1754     // input image
1755     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1756 
1757     if (!input_cv->mat().data) {
1758       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Pad: load image failed.");
1759     }
1760 
1761     // get the border type in openCV
1762     auto b_type = GetCVBorderType(border_types);
1763     // output image
1764     cv::Mat out_image;
1765     if (b_type == cv::BORDER_CONSTANT) {
1766       cv::Scalar fill_color = cv::Scalar(fill_r, fill_g, fill_b);
1767       cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type, fill_color);
1768     } else {
1769       cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type);
1770     }
1771     std::shared_ptr<CVTensor> output_cv;
1772     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, input_cv->Rank(), &output_cv));
1773     // pad the dimension if shape information is only 2 dimensional, this is grayscale
1774     if (input_cv->Rank() == kDefaultImageRank && input_cv->shape()[kChannelIndexHWC] == kMinImageChannel &&
1775         output_cv->Rank() == kMinImageRank) {
1776       RETURN_IF_NOT_OK(output_cv->ExpandDim(kChannelIndexHWC));
1777     }
1778     *output = std::static_pointer_cast<Tensor>(output_cv);
1779     return Status::OK();
1780   } catch (const cv::Exception &e) {
1781     RETURN_STATUS_UNEXPECTED("Pad: " + std::string(e.what()));
1782   }
1783 }
1784 
Perspective(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::vector<std::vector<int32_t>> & start_points,const std::vector<std::vector<int32_t>> & end_points,InterpolationMode interpolation)1785 Status Perspective(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
1786                    const std::vector<std::vector<int32_t>> &start_points,
1787                    const std::vector<std::vector<int32_t>> &end_points, InterpolationMode interpolation) {
1788   try {
1789     RETURN_IF_NOT_OK(ValidateImage(input, "Perspective", {1, 2, 3, 4, 5, 6, 10, 11, 12}, {2, 3}));
1790     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1791     if (!input_cv->mat().data) {
1792       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Perspective: load image failed.");
1793     }
1794     const int kListSize = 4;
1795     // Get Point
1796     cv::Point2f cv_src_point[kListSize];
1797     cv::Point2f cv_dst_point[kListSize];
1798     for (int i = 0; i < kListSize; i++) {
1799       cv_src_point[i] = cv::Point2f(static_cast<float>(start_points[i][0]), static_cast<float>(start_points[i][1]));
1800       cv_dst_point[i] = cv::Point2f(static_cast<float>(end_points[i][0]), static_cast<float>(end_points[i][1]));
1801     }
1802 
1803     // Perspective Operation
1804     std::shared_ptr<CVTensor> output_cv;
1805     cv::Mat M = cv::getPerspectiveTransform(cv_src_point, cv_dst_point, cv::DECOMP_LU);
1806     cv::Mat src_img = input_cv->mat();
1807 
1808     cv::Mat dst_img;
1809     cv::warpPerspective(src_img, dst_img, M, src_img.size(), GetCVInterpolationMode(interpolation));
1810     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(dst_img, input_cv->Rank(), &output_cv));
1811     *output = std::static_pointer_cast<Tensor>(output_cv);
1812     return Status::OK();
1813   } catch (const cv::Exception &e) {
1814     RETURN_STATUS_UNEXPECTED("Perspective: " + std::string(e.what()));
1815   }
1816 }
1817 
RandomLighting(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float rnd_r,float rnd_g,float rnd_b)1818 Status RandomLighting(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rnd_r, float rnd_g,
1819                       float rnd_b) {
1820   try {
1821     RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input, output));
1822     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(*output);
1823     cv::Mat input_img = input_cv->mat();
1824 
1825     if (!input_cv->mat().data) {
1826       RETURN_STATUS_UNEXPECTED(
1827         "RandomLighting: Cannot convert from OpenCV type, unknown "
1828         "CV type. Currently supported data type: [int8, uint8, int16, uint16, "
1829         "int32, float16, float32, float64].");
1830     }
1831 
1832     if (input_cv->Rank() != kDefaultImageRank || input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
1833       RETURN_STATUS_UNEXPECTED(
1834         "RandomLighting: input tensor is not in shape of <H,W,C> or channel is not 3, got rank: " +
1835         std::to_string(input_cv->Rank()) + ", and channel: " + std::to_string(input_cv->shape()[kChannelIndexHWC]));
1836     }
1837     auto input_type = input->type();
1838     CHECK_FAIL_RETURN_UNEXPECTED(input_type != DataType::DE_UINT32 && input_type != DataType::DE_UINT64 &&
1839                                    input_type != DataType::DE_INT64 && !input_type.IsString(),
1840                                  "RandomLighting: invalid tensor type of uint32, int64, uint64, string or bytes.");
1841 
1842     std::vector<std::vector<float>> eig = {{55.46 * -0.5675, 4.794 * 0.7192, 1.148 * 0.4009},
1843                                            {55.46 * -0.5808, 4.794 * -0.0045, 1.148 * -0.8140},
1844                                            {55.46 * -0.5836, 4.794 * -0.6948, 1.148 * 0.4203}};
1845 
1846     float pca_r = eig[0][0] * rnd_r + eig[0][1] * rnd_g + eig[0][2] * rnd_b;
1847     float pca_g = eig[1][0] * rnd_r + eig[1][1] * rnd_g + eig[1][2] * rnd_b;
1848     float pca_b = eig[2][0] * rnd_r + eig[2][1] * rnd_g + eig[2][2] * rnd_b;
1849     for (int row = 0; row < input_img.rows; row++) {
1850       for (int col = 0; col < input_img.cols; col++) {
1851         auto r = static_cast<float>(input_img.at<cv::Vec3b>(row, col)[0]);
1852         auto g = static_cast<float>(input_img.at<cv::Vec3b>(row, col)[1]);
1853         auto b = static_cast<float>(input_img.at<cv::Vec3b>(row, col)[2]);
1854         input_img.at<cv::Vec3b>(row, col)[kRIndex] = cv::saturate_cast<uchar>(r + pca_r);
1855         input_img.at<cv::Vec3b>(row, col)[kGIndex] = cv::saturate_cast<uchar>(g + pca_g);
1856         input_img.at<cv::Vec3b>(row, col)[kBIndex] = cv::saturate_cast<uchar>(b + pca_b);
1857       }
1858     }
1859 
1860     *output = std::static_pointer_cast<Tensor>(input_cv);
1861     return Status::OK();
1862   } catch (const cv::Exception &e) {
1863     RETURN_STATUS_UNEXPECTED("RandomLighting: " + std::string(e.what()));
1864   }
1865 }
1866 
RgbaToRgb(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1867 Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1868   try {
1869     RETURN_IF_NOT_OK(ValidateImage(input, "RgbaToRgb", {3, 5, 11}));
1870     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1871     if (input_cv->shape().Size() != kDefaultImageChannel || input_cv->shape()[kChannelIndexHWC] != kMaxImageChannel) {
1872       std::string err_msg =
1873         "RgbaToRgb: rank of image is not: " + std::to_string(kDefaultImageChannel) +
1874         ", but got: " + std::to_string(input_cv->shape().Size()) +
1875         ", or channels of image should be 4, but got: " + std::to_string(input_cv->shape()[kChannelIndexHWC]);
1876       RETURN_STATUS_UNEXPECTED(err_msg);
1877     }
1878     TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3});
1879     std::shared_ptr<CVTensor> output_cv;
1880     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1881     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGBA2RGB));
1882     *output = std::static_pointer_cast<Tensor>(output_cv);
1883     return Status::OK();
1884   } catch (const cv::Exception &e) {
1885     RETURN_STATUS_UNEXPECTED("RgbaToRgb: " + std::string(e.what()));
1886   }
1887 }
1888 
RgbaToBgr(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1889 Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1890   try {
1891     RETURN_IF_NOT_OK(ValidateImage(input, "RgbaToBgr", {3, 5, 11}));
1892     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1893     if (input_cv->shape().Size() != kDefaultImageChannel || input_cv->shape()[kChannelIndexHWC] != kMaxImageChannel) {
1894       std::string err_msg =
1895         "RgbaToBgr: rank of image is not: " + std::to_string(kDefaultImageChannel) +
1896         ", but got: " + std::to_string(input_cv->shape().Size()) +
1897         ", or channels of image should be 4, but got: " + std::to_string(input_cv->shape()[kChannelIndexHWC]);
1898       RETURN_STATUS_UNEXPECTED(err_msg);
1899     }
1900     TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3});
1901     std::shared_ptr<CVTensor> output_cv;
1902     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1903     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGBA2BGR));
1904     *output = std::static_pointer_cast<Tensor>(output_cv);
1905     return Status::OK();
1906   } catch (const cv::Exception &e) {
1907     RETURN_STATUS_UNEXPECTED("RgbaToBgr: " + std::string(e.what()));
1908   }
1909 }
1910 
RgbToBgr(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1911 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1912   try {
1913     RETURN_IF_NOT_OK(ValidateImage(input, "RgbToBgr", {3, 4, 5, 6, 10, 11, 12}));
1914     auto input_type = input->type();
1915     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1916     if (!input_cv->mat().data) {
1917       RETURN_STATUS_UNEXPECTED("[Internal ERROR] RgbToBgr: load image failed.");
1918     }
1919     if (input_cv->Rank() != kDefaultImageRank || input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
1920       RETURN_STATUS_UNEXPECTED("RgbToBgr: input tensor is not in shape of <H,W,C> or channel is not 3, got rank: " +
1921                                std::to_string(input_cv->Rank()) +
1922                                ", and channel: " + std::to_string(input_cv->shape()[2]));
1923     }
1924 
1925     cv::Mat image = input_cv->mat().clone();
1926     if (input_type == DataType::DE_FLOAT16 || input_type == DataType::DE_INT16 || input_type == DataType::DE_UINT16) {
1927       for (int i = 0; i < input_cv->mat().rows; ++i) {
1928         auto *p1 = input_cv->mat().ptr<cv::Vec3s>(i);
1929         auto *p2 = image.ptr<cv::Vec3s>(i);
1930         for (int j = 0; j < input_cv->mat().cols; ++j) {
1931           p2[j][kBIndex] = p1[j][kRIndex];
1932           p2[j][kGIndex] = p1[j][kGIndex];
1933           p2[j][kRIndex] = p1[j][kBIndex];
1934         }
1935       }
1936     } else if (input_type == DataType::DE_FLOAT32 || input_type == DataType::DE_INT32) {
1937       for (int i = 0; i < input_cv->mat().rows; ++i) {
1938         auto *p1 = input_cv->mat().ptr<cv::Vec3f>(i);
1939         auto *p2 = image.ptr<cv::Vec3f>(i);
1940         for (int j = 0; j < input_cv->mat().cols; ++j) {
1941           p2[j][kBIndex] = p1[j][kRIndex];
1942           p2[j][kGIndex] = p1[j][kGIndex];
1943           p2[j][kRIndex] = p1[j][kBIndex];
1944         }
1945       }
1946     } else if (input_type == DataType::DE_FLOAT64) {
1947       for (int i = 0; i < input_cv->mat().rows; ++i) {
1948         auto *p1 = input_cv->mat().ptr<cv::Vec3d>(i);
1949         auto *p2 = image.ptr<cv::Vec3d>(i);
1950         for (int j = 0; j < input_cv->mat().cols; ++j) {
1951           p2[j][kBIndex] = p1[j][kRIndex];
1952           p2[j][kGIndex] = p1[j][kGIndex];
1953           p2[j][kRIndex] = p1[j][kBIndex];
1954         }
1955       }
1956     } else {
1957       cv::cvtColor(input_cv->mat(), image, cv::COLOR_RGB2BGR);
1958     }
1959 
1960     std::shared_ptr<CVTensor> output_cv;
1961     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(image, input_cv->Rank(), &output_cv));
1962 
1963     *output = std::static_pointer_cast<Tensor>(output_cv);
1964     return Status::OK();
1965   } catch (const cv::Exception &e) {
1966     RETURN_STATUS_UNEXPECTED("RgbToBgr: " + std::string(e.what()));
1967   }
1968 }
1969 
RgbToGray(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1970 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1971   try {
1972     RETURN_IF_NOT_OK(ValidateImage(input, "RgbToGray", {3, 5, 11}));
1973     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1974     if (input_cv->Rank() != kDefaultImageRank || input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
1975       RETURN_STATUS_UNEXPECTED("RgbToGray: image shape is not <H,W,C> or channel is not 3, got rank: " +
1976                                std::to_string(input_cv->Rank()) + ", and shape: " + input_cv->shape().ToString());
1977     }
1978     TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1]});
1979     std::shared_ptr<CVTensor> output_cv;
1980     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1981     cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGB2GRAY));
1982     *output = std::static_pointer_cast<Tensor>(output_cv);
1983     return Status::OK();
1984   } catch (const cv::Exception &e) {
1985     RETURN_STATUS_UNEXPECTED("RgbToGray: " + std::string(e.what()));
1986   }
1987 }
1988 
GetJpegImageInfo(const std::shared_ptr<Tensor> & input,int * img_width,int * img_height)1989 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height) {
1990   struct jpeg_decompress_struct cinfo {};
1991   struct JpegErrorManagerCustom jerr {};
1992   cinfo.err = jpeg_std_error(&jerr.pub);
1993   jerr.pub.error_exit = JpegErrorExitCustom;
1994   try {
1995     jpeg_create_decompress(&cinfo);
1996     if (input->type() == DataType::DE_BYTES) {
1997       uint32_t len = 0;
1998       RETURN_IF_NOT_OK(input->GetStringLength(&len));
1999       JpegSetSource(&cinfo, input->GetStringsBuffer(), len);
2000     } else {
2001       JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes());
2002     }
2003     (void)jpeg_read_header(&cinfo, TRUE);
2004     jpeg_calc_output_dimensions(&cinfo);
2005     RETURN_IF_NOT_OK(CheckJpegExit(&cinfo));
2006   } catch (std::runtime_error &e) {
2007     jpeg_destroy_decompress(&cinfo);
2008     RETURN_STATUS_UNEXPECTED(e.what());
2009   }
2010   *img_height = static_cast<int>(cinfo.output_height);
2011   *img_width = static_cast<int>(cinfo.output_width);
2012   jpeg_destroy_decompress(&cinfo);
2013   return Status::OK();
2014 }
2015 
GetAffineMatrix(const std::shared_ptr<Tensor> & input,std::vector<float_t> * matrix,float_t degrees,const std::vector<float_t> & translation,float_t scale,const std::vector<float_t> & shear)2016 Status GetAffineMatrix(const std::shared_ptr<Tensor> &input, std::vector<float_t> *matrix, float_t degrees,
2017                        const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear) {
2018   CHECK_FAIL_RETURN_UNEXPECTED(translation.size() >= 2, "AffineOp::Compute translation_ size should >= 2");
2019   float_t translation_x = translation[0];
2020   float_t translation_y = translation[1];
2021   float_t degrees_tmp = 0.0;
2022   RETURN_IF_NOT_OK(DegreesToRadians(degrees, &degrees_tmp));
2023   CHECK_FAIL_RETURN_UNEXPECTED(shear.size() >= 2, "AffineOp::Compute shear_ size should >= 2");
2024   float_t shear_x = shear[0];
2025   float_t shear_y = shear[1];
2026   RETURN_IF_NOT_OK(DegreesToRadians(shear_x, &shear_x));
2027   RETURN_IF_NOT_OK(DegreesToRadians(-1 * shear_y, &shear_y));
2028 
2029   // Apply Affine Transformation
2030   //       T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
2031   //       C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1]
2032   //       RSS is rotation with scale and shear matrix
2033   //       RSS(a, s, (sx, sy)) =
2034   //       = R(a) * S(s) * SHy(sy) * SHx(sx)
2035   //       = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ]
2036   //         [ s*sin(a - sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ]
2037   //         [ 0                    , 0                                      , 1 ]
2038   //
2039   // where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears:
2040   // SHx(s) = [1, -tan(s)] and SHy(s) = [1      , 0]
2041   //          [0, 1      ]              [-tan(s), 1]
2042   //
2043   // Thus, the affine matrix is M = T * C * RSS * C^-1
2044 
2045   // image is hwc, rows = shape()[0]
2046   float_t cx = static_cast<float_t>(input->shape()[1] - 1) / 2.0F;
2047   float_t cy = static_cast<float_t>(input->shape()[0] - 1) / 2.0F;
2048 
2049   CHECK_FAIL_RETURN_UNEXPECTED(cos(shear_y) != 0.0, "AffineOp: cos(shear_y) should not be zero.");
2050 
2051   // Calculate RSS
2052   *matrix = std::vector<float_t>{
2053     static_cast<float>(scale * cos(degrees_tmp + shear_y) / cos(shear_y)),
2054     static_cast<float>(scale * (-1 * cos(degrees_tmp + shear_y) * tan(shear_x) / cos(shear_y) - sin(degrees_tmp))),
2055     0,
2056     static_cast<float>(scale * sin(degrees_tmp + shear_y) / cos(shear_y)),
2057     static_cast<float>(scale * (-1 * sin(degrees_tmp + shear_y) * tan(shear_x) / cos(shear_y) + cos(degrees_tmp))),
2058     0};
2059   // Compute T * C * RSS * C^-1
2060   // Compute T * C * RSS * C^-1
2061   (*matrix)[2] = (1 - (*matrix)[0]) * cx - (*matrix)[1] * cy + translation_x;
2062   (*matrix)[5] = (1 - (*matrix)[4]) * cy - (*matrix)[3] * cx + translation_y;
2063   return Status::OK();
2064 }
2065 
Affine(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float_t degrees,const std::vector<float_t> & translation,float_t scale,const std::vector<float_t> & shear,InterpolationMode interpolation,const std::vector<uint8_t> & fill_value)2066 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float_t degrees,
2067               const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear,
2068               InterpolationMode interpolation, const std::vector<uint8_t> &fill_value) {
2069   try {
2070     RETURN_IF_NOT_OK(ValidateImageRank("Affine", input->Rank()));
2071     dsize_t channel = 1;
2072     RETURN_IF_NOT_OK(ImageNumChannels(input, &channel));
2073     CHECK_FAIL_RETURN_UNEXPECTED(channel <= kMaxImageChannel || interpolation != InterpolationMode::kCubic,
2074                                  "Affine: interpolation can not be CUBIC when image channel is greater than 4.");
2075     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2076     if (!input_cv->mat().data) {
2077       RETURN_STATUS_UNEXPECTED("[Internal ERROR] Affine: load image failed.");
2078     }
2079 
2080     std::vector<float_t> matrix;
2081     RETURN_IF_NOT_OK(GetAffineMatrix(input_cv, &matrix, degrees, translation, scale, shear));
2082     cv::Mat affine_mat(matrix);
2083     affine_mat = affine_mat.reshape(1, {2, 3});
2084 
2085     std::shared_ptr<CVTensor> output_cv;
2086     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
2087     RETURN_UNEXPECTED_IF_NULL(output_cv);
2088     cv::warpAffine(input_cv->mat(), output_cv->mat(), affine_mat, input_cv->mat().size(),
2089                    GetCVInterpolationMode(interpolation), cv::BORDER_CONSTANT,
2090                    cv::Scalar(fill_value[kRIndex], fill_value[kGIndex], fill_value[kBIndex]));
2091     (*output) = std::static_pointer_cast<Tensor>(output_cv);
2092     return Status::OK();
2093   } catch (const cv::Exception &e) {
2094     RETURN_STATUS_UNEXPECTED("Affine: " + std::string(e.what()));
2095   }
2096 }
2097 
GaussianBlur(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t kernel_x,int32_t kernel_y,float sigma_x,float sigma_y)2098 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_x,
2099                     int32_t kernel_y, float sigma_x, float sigma_y) {
2100   try {
2101     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2102     if (input_cv->mat().data == nullptr) {
2103       RETURN_STATUS_UNEXPECTED("[Internal ERROR] GaussianBlur: load image failed.");
2104     }
2105     cv::Mat output_cv_mat;
2106     cv::GaussianBlur(input_cv->mat(), output_cv_mat, cv::Size(kernel_x, kernel_y), static_cast<double>(sigma_x),
2107                      static_cast<double>(sigma_y));
2108     std::shared_ptr<CVTensor> output_cv;
2109     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, input_cv->Rank(), &output_cv));
2110     (*output) = std::static_pointer_cast<Tensor>(output_cv);
2111     return Status::OK();
2112   } catch (const cv::Exception &e) {
2113     RETURN_STATUS_UNEXPECTED("GaussianBlur: " + std::string(e.what()));
2114   }
2115 }
2116 
ComputePatchSize(const std::shared_ptr<CVTensor> & input_cv,std::shared_ptr<std::pair<int32_t,int32_t>> * patch_size,int32_t num_height,int32_t num_width,SliceMode slice_mode)2117 Status ComputePatchSize(const std::shared_ptr<CVTensor> &input_cv,
2118                         std::shared_ptr<std::pair<int32_t, int32_t>> *patch_size, int32_t num_height, int32_t num_width,
2119                         SliceMode slice_mode) {
2120   if (input_cv->mat().data == nullptr) {
2121     RETURN_STATUS_UNEXPECTED("[Internal ERROR] SlicePatches: Tensor could not convert to CV Tensor.");
2122   }
2123   RETURN_IF_NOT_OK(ValidateImageRank("Affine", input_cv->Rank()));
2124 
2125   cv::Mat in_img = input_cv->mat();
2126   cv::Size s = in_img.size();
2127   if (num_height == 0 || num_height > s.height) {
2128     RETURN_STATUS_UNEXPECTED(
2129       "SlicePatches: The number of patches on height axis equals 0 or is greater than height, got number of patches:" +
2130       std::to_string(num_height));
2131   }
2132   if (num_width == 0 || num_width > s.width) {
2133     RETURN_STATUS_UNEXPECTED(
2134       "SlicePatches: The number of patches on width axis equals 0 or is greater than width, got number of patches:" +
2135       std::to_string(num_width));
2136   }
2137   int32_t patch_h = s.height / num_height;
2138   if (s.height % num_height != 0) {
2139     if (slice_mode == SliceMode::kPad) {
2140       patch_h += 1;  // patch_h * num_height - s.height
2141     }
2142   }
2143   int32_t patch_w = s.width / num_width;
2144   if (s.width % num_width != 0) {
2145     if (slice_mode == SliceMode::kPad) {
2146       patch_w += 1;  // patch_w * num_width - s.width
2147     }
2148   }
2149   (*patch_size)->first = patch_h;
2150   (*patch_size)->second = patch_w;
2151   return Status::OK();
2152 }
2153 
SlicePatches(const std::shared_ptr<Tensor> & input,std::vector<std::shared_ptr<Tensor>> * output,int32_t num_height,int32_t num_width,SliceMode slice_mode,uint8_t fill_value)2154 Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::shared_ptr<Tensor>> *output,
2155                     int32_t num_height, int32_t num_width, SliceMode slice_mode, uint8_t fill_value) {
2156   if (num_height == DEFAULT_NUM_HEIGHT && num_width == DEFAULT_NUM_WIDTH) {
2157     (*output).push_back(input);
2158     return Status::OK();
2159   }
2160 
2161   auto patch_size = std::make_shared<std::pair<int32_t, int32_t>>(0, 0);
2162   int32_t patch_h = 0;
2163   int32_t patch_w = 0;
2164 
2165   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2166   RETURN_IF_NOT_OK(ComputePatchSize(input_cv, &patch_size, num_height, num_width, slice_mode));
2167   std::tie(patch_h, patch_w) = *patch_size;
2168 
2169   cv::Mat in_img = input_cv->mat();
2170   cv::Size s = in_img.size();
2171   try {
2172     cv::Mat out_img;
2173     if (slice_mode == SliceMode::kPad) {  // padding on right and bottom directions
2174       auto padding_h = patch_h * num_height - s.height;
2175       auto padding_w = patch_w * num_width - s.width;
2176       out_img = cv::Mat(s.height + padding_h, s.width + padding_w, in_img.type(), cv::Scalar::all(fill_value));
2177       in_img.copyTo(out_img(cv::Rect(0, 0, s.width, s.height)));
2178     } else {
2179       out_img = in_img;
2180     }
2181     for (int i = 0; i < num_height; ++i) {
2182       for (int j = 0; j < num_width; ++j) {
2183         std::shared_ptr<CVTensor> patch_cv;
2184         cv::Rect rect(j * patch_w, i * patch_h, patch_w, patch_h);
2185         cv::Mat patch(out_img(rect));
2186         RETURN_IF_NOT_OK(CVTensor::CreateFromMat(patch, input_cv->Rank(), &patch_cv));
2187         (*output).push_back(std::static_pointer_cast<Tensor>(patch_cv));
2188       }
2189     }
2190     return Status::OK();
2191   } catch (const cv::Exception &e) {
2192     RETURN_STATUS_UNEXPECTED("SlicePatches: " + std::string(e.what()));
2193   }
2194 }
2195 
Solarize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::vector<float> & threshold)2196 Status Solarize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
2197                 const std::vector<float> &threshold) {
2198   try {
2199     RETURN_IF_NOT_OK(ValidateImage(input, "Solarize", {1, 2, 3, 4, 5, 6, 11, 12}, {2, 3}, {1, 3}));
2200     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2201     cv::Mat input_img = input_cv->mat();
2202     if (!input_cv->mat().data) {
2203       RETURN_STATUS_UNEXPECTED("Solarize: load image failed.");
2204     }
2205 
2206     std::shared_ptr<CVTensor> mask_mat_tensor;
2207     std::shared_ptr<CVTensor> output_cv_tensor;
2208     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_img, input_cv->Rank(), &mask_mat_tensor));
2209 
2210     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv_tensor));
2211     RETURN_UNEXPECTED_IF_NULL(mask_mat_tensor);
2212     RETURN_UNEXPECTED_IF_NULL(output_cv_tensor);
2213 
2214     auto threshold_min = threshold[0], threshold_max = threshold[1];
2215 
2216     if (threshold_min == threshold_max) {
2217       mask_mat_tensor->mat().setTo(0, ~(input_cv->mat() >= threshold_min));
2218     } else {
2219       mask_mat_tensor->mat().setTo(0, ~((input_cv->mat() >= threshold_min) & (input_cv->mat() <= threshold_max)));
2220     }
2221 
2222     // solarize desired portion
2223     const float max_size = 255.f;
2224     output_cv_tensor->mat() = cv::Scalar::all(max_size) - mask_mat_tensor->mat();
2225     input_cv->mat().copyTo(output_cv_tensor->mat(), input_cv->mat() < threshold_min);
2226     if (threshold_min < threshold_max) {
2227       input_cv->mat().copyTo(output_cv_tensor->mat(), input_cv->mat() > threshold_max);
2228     }
2229 
2230     *output = std::static_pointer_cast<Tensor>(output_cv_tensor);
2231   }
2232 
2233   catch (const cv::Exception &e) {
2234     RETURN_STATUS_UNEXPECTED("Solarize: " + std::string(e.what()));
2235   }
2236   return Status::OK();
2237 }
2238 
ToTensor(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const DataType & data_type)2239 Status ToTensor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) {
2240   try {
2241     std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2242     if (!input_cv->mat().data) {
2243       RETURN_STATUS_UNEXPECTED("[Internal ERROR] ToTensor: load image failed.");
2244     }
2245     if (input_cv->Rank() == kMinImageRank) {
2246       // If input tensor is 2D, we assume we have HW dimensions
2247       RETURN_IF_NOT_OK(input_cv->ExpandDim(kMinImageRank));
2248     }
2249     CHECK_FAIL_RETURN_UNEXPECTED(
2250       input_cv->shape().Size() > kChannelIndexHWC,
2251       "ToTensor: rank of input data should be greater than: " + std::to_string(kChannelIndexHWC) +
2252         ", but got:" + std::to_string(input_cv->shape().Size()));
2253     int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
2254     if (input_cv->shape().Size() != kDefaultImageRank) {
2255       RETURN_STATUS_UNEXPECTED("ToTensor: image shape should be <H,W,C>, but got rank: " +
2256                                std::to_string(input_cv->shape().Size()));
2257     }
2258 
2259     int height = static_cast<int>(input_cv->shape()[0]);
2260     int width = static_cast<int>(input_cv->shape()[1]);
2261 
2262     // OpenCv has a bug in extractChannel when the type is float16.
2263     // To avoid the segfault, we cast to float32 first.
2264     if (input_cv->type() == DataType(DataType::DE_FLOAT16)) {
2265       RETURN_IF_NOT_OK(TypeCast(input_cv, output, DataType(DataType::DE_FLOAT32)));
2266       input_cv = CVTensor::AsCVTensor(*output);
2267     }
2268 
2269     std::shared_ptr<CVTensor> output_cv;
2270     // Reshape from HCW to CHW
2271     RETURN_IF_NOT_OK(
2272       CVTensor::CreateEmpty(TensorShape{num_channels, height, width}, DataType(DataType::DE_FLOAT32), &output_cv));
2273     // Rescale tensor by dividing by 255
2274     const auto kMaxBitValueinFloat = static_cast<float>(kMaxBitValue);
2275     for (int i = 0; i < num_channels; ++i) {
2276       cv::Mat mat_t;
2277       cv::extractChannel(input_cv->mat(), mat_t, i);
2278       cv::Mat mat;
2279       RETURN_IF_NOT_OK(output_cv->MatAtIndex({i}, &mat));
2280       mat_t.convertTo(mat, CV_32F, 1 / kMaxBitValueinFloat, 0);
2281     }
2282 
2283     // Process tensor output according to desired output data type
2284     if (data_type != DataType(DataType::DE_FLOAT32)) {
2285       RETURN_IF_NOT_OK(TypeCast(output_cv, output, data_type));
2286     } else {
2287       *output = std::move(output_cv);
2288     }
2289     return Status::OK();
2290   } catch (const cv::Exception &e) {
2291     RETURN_STATUS_UNEXPECTED("ToTensor: " + std::string(e.what()));
2292   }
2293 }
2294 
2295 // round half to even
Round(float value)2296 float Round(float value) {
2297   const int32_t kEven = 2;
2298   float rnd = round(value);
2299   float rnd_l = floor(value);
2300   float rnd_h = ceil(value);
2301   if (value - rnd_l == kHalf) {
2302     if (common::IsDoubleEqual(fmod(rnd, kEven), 0.0)) {
2303       return rnd;
2304     } else if (value > 0) {
2305       return rnd_l;
2306     } else {
2307       return rnd_h;
2308     }
2309   }
2310   return rnd;
2311 }
2312 
Linspace(float start,float end,int n,float scale,float offset,bool round)2313 std::vector<float> Linspace(float start, float end, int n, float scale, float offset, bool round) {
2314   std::vector<float> linear(n);
2315   float step = (n == 1) ? 0 : (end - start) / static_cast<float>(n - 1);
2316   for (size_t i = 0; i < linear.size(); ++i) {
2317     linear[i] = (start + static_cast<float>(i) * step) * scale + offset;
2318     if (round) {
2319       linear[i] = Round(linear[i]);
2320     }
2321   }
2322   return linear;
2323 }
2324 
ApplyAugment(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::string & op_name,float magnitude,InterpolationMode interpolation,const std::vector<uint8_t> & fill_value)2325 Status ApplyAugment(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::string &op_name,
2326                     float magnitude, InterpolationMode interpolation, const std::vector<uint8_t> &fill_value) {
2327   if (op_name == "ShearX") {
2328     float_t shear = magnitude * 180.F / CV_PI;
2329     AffineOp affine(0.0, {0, 0}, 1.0, {shear, 0.0}, interpolation, fill_value);
2330     RETURN_IF_NOT_OK(affine.Compute(input, output));
2331   } else if (op_name == "ShearY") {
2332     float_t shear = magnitude * 180.F / CV_PI;
2333     AffineOp affine(0.0, {0, 0}, 1.0, {0.0, shear}, interpolation, fill_value);
2334     RETURN_IF_NOT_OK(affine.Compute(input, output));
2335   } else if (op_name == "TranslateX") {
2336     float_t translate = magnitude;
2337     AffineOp affine(0.0, {translate, 0}, 1.0, {0.0, 0.0}, interpolation, fill_value);
2338     RETURN_IF_NOT_OK(affine.Compute(input, output));
2339   } else if (op_name == "TranslateY") {
2340     float_t translate = magnitude;
2341     AffineOp affine(0.0, {0, translate}, 1.0, {0.0, 0.0}, interpolation, fill_value);
2342     RETURN_IF_NOT_OK(affine.Compute(input, output));
2343   } else if (op_name == "Rotate") {
2344     RETURN_IF_NOT_OK(Rotate(input, output, {}, magnitude, interpolation, false, fill_value[kRIndex],
2345                             fill_value[kBIndex], fill_value[kGIndex]));
2346   } else if (op_name == "Brightness") {
2347     RETURN_IF_NOT_OK(AdjustBrightness(input, output, 1 + magnitude));
2348   } else if (op_name == "Color") {
2349     RETURN_IF_NOT_OK(AdjustSaturation(input, output, 1 + magnitude));
2350   } else if (op_name == "Contrast") {
2351     RETURN_IF_NOT_OK(AdjustContrast(input, output, 1 + magnitude));
2352   } else if (op_name == "Sharpness") {
2353     SharpnessOp sharpness(1 + magnitude);
2354     RETURN_IF_NOT_OK(sharpness.Compute(input, output));
2355   } else if (op_name == "Posterize") {
2356     PosterizeOp posterize(static_cast<int>(magnitude));
2357     RETURN_IF_NOT_OK(posterize.Compute(input, output));
2358   } else if (op_name == "Solarize") {
2359     RETURN_IF_NOT_OK(Solarize(input, output, {magnitude, magnitude}));
2360   } else if (op_name == "AutoContrast") {
2361     RETURN_IF_NOT_OK(AutoContrast(input, output, 0.0, {}));
2362   } else if (op_name == "Equalize") {
2363     RETURN_IF_NOT_OK(Equalize(input, output));
2364   } else if (op_name == "Identity") {
2365     *output = std::static_pointer_cast<Tensor>(input);
2366   } else if (op_name == "Invert") {
2367     InvertOp invert;
2368     RETURN_IF_NOT_OK(invert.Compute(input, output));
2369   } else {
2370     RETURN_STATUS_UNEXPECTED("ApplyAugment: the provided operator " + op_name + " is not supported.");
2371   }
2372   return Status::OK();
2373 }
2374 
EncodeJpeg(const std::shared_ptr<Tensor> & image,std::shared_ptr<Tensor> * output,int quality)2375 Status EncodeJpeg(const std::shared_ptr<Tensor> &image, std::shared_ptr<Tensor> *output, int quality) {
2376   RETURN_UNEXPECTED_IF_NULL(output);
2377 
2378   std::string err_msg;
2379   if (image->type() != DataType::DE_UINT8) {
2380     err_msg = "EncodeJpeg: The type of the image data should be UINT8, but got " + image->type().ToString() + ".";
2381     RETURN_STATUS_UNEXPECTED(err_msg);
2382   }
2383 
2384   TensorShape shape = image->shape();
2385   int rank = static_cast<int>(shape.Rank());
2386   if (rank < kMinImageRank || rank > kDefaultImageRank) {
2387     err_msg = "EncodeJpeg: The image has invalid dimensions. It should have two or three dimensions, but got ";
2388     err_msg += std::to_string(rank) + " dimensions.";
2389     RETURN_STATUS_UNEXPECTED(err_msg);
2390   }
2391   int channels;
2392   if (rank == kDefaultImageRank) {
2393     channels = static_cast<int>(shape[kMinImageRank]);
2394     if (channels != kMinImageChannel && channels != kDefaultImageChannel) {
2395       err_msg = "EncodeJpeg: The image has invalid channels. It should have 1 or 3 channels, but got ";
2396       err_msg += std::to_string(channels) + " channels.";
2397       RETURN_STATUS_UNEXPECTED(err_msg);
2398     }
2399   } else {
2400     channels = 1;
2401   }
2402 
2403   if (quality < kMinJpegQuality || quality > kMaxJpegQuality) {
2404     err_msg = "EncodeJpeg: Invalid quality " + std::to_string(quality) + ", should be in range of [" +
2405               std::to_string(kMinJpegQuality) + ", " + std::to_string(kMaxJpegQuality) + "].";
2406 
2407     RETURN_STATUS_UNEXPECTED(err_msg);
2408   }
2409 
2410   std::vector<int> params = {cv::IMWRITE_JPEG_QUALITY,  quality, cv::IMWRITE_JPEG_PROGRESSIVE,  0,
2411                              cv::IMWRITE_JPEG_OPTIMIZE, 0,       cv::IMWRITE_JPEG_RST_INTERVAL, 0};
2412 
2413   std::vector<unsigned char> buffer;
2414   cv::Mat image_matrix;
2415 
2416   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(image);
2417   image_matrix = input_cv->mat();
2418   if (!image_matrix.data) {
2419     RETURN_STATUS_UNEXPECTED("EncodeJpeg: Load the image tensor failed.");
2420   }
2421 
2422   if (channels == kMinImageChannel) {
2423     CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".JPEG", image_matrix, buffer, params),
2424                                  "EncodeJpeg: Failed to encode image.");
2425   } else {
2426     cv::Mat image_bgr;
2427     cv::cvtColor(image_matrix, image_bgr, cv::COLOR_RGB2BGR);
2428     CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".JPEG", image_bgr, buffer, params),
2429                                  "EncodeJpeg: Failed to encode image.");
2430   }
2431 
2432   TensorShape tensor_shape = TensorShape({(long int)buffer.size()});
2433   RETURN_IF_NOT_OK(Tensor::CreateFromMemory(tensor_shape, DataType(DataType::DE_UINT8), buffer.data(), output));
2434 
2435   return Status::OK();
2436 }
2437 
EncodePng(const std::shared_ptr<Tensor> & image,std::shared_ptr<Tensor> * output,int compression_level)2438 Status EncodePng(const std::shared_ptr<Tensor> &image, std::shared_ptr<Tensor> *output, int compression_level) {
2439   RETURN_UNEXPECTED_IF_NULL(output);
2440 
2441   std::string err_msg;
2442   if (image->type() != DataType::DE_UINT8) {
2443     err_msg = "EncodePng: The type of the image data should be UINT8, but got " + image->type().ToString() + ".";
2444     RETURN_STATUS_UNEXPECTED(err_msg);
2445   }
2446 
2447   TensorShape shape = image->shape();
2448   int rank = static_cast<int>(shape.Rank());
2449   if (rank < kMinImageRank || rank > kDefaultImageRank) {
2450     err_msg = "EncodePng: The image has invalid dimensions. It should have two or three dimensions, but got ";
2451     err_msg += std::to_string(rank) + " dimensions.";
2452     RETURN_STATUS_UNEXPECTED(err_msg);
2453   }
2454   int channels;
2455   if (rank == kDefaultImageRank) {
2456     channels = static_cast<int>(shape[kMinImageRank]);
2457     if (channels != kMinImageChannel && channels != kDefaultImageChannel) {
2458       err_msg = "EncodePng: The image has invalid channels. It should have 1 or 3 channels, but got ";
2459       err_msg += std::to_string(channels) + " channels.";
2460       RETURN_STATUS_UNEXPECTED(err_msg);
2461     }
2462   } else {
2463     channels = 1;
2464   }
2465 
2466   if (compression_level < kMinPngCompression || compression_level > kMaxPngCompression) {
2467     err_msg = "EncodePng: Invalid compression_level " + std::to_string(compression_level) +
2468               ", should be in range of [" + std::to_string(kMinPngCompression) + ", " +
2469               std::to_string(kMaxPngCompression) + "].";
2470     RETURN_STATUS_UNEXPECTED(err_msg);
2471   }
2472 
2473   std::vector<int> params = {cv::IMWRITE_PNG_COMPRESSION, compression_level, cv::IMWRITE_PNG_STRATEGY,
2474                              cv::IMWRITE_PNG_STRATEGY_RLE};
2475   std::vector<unsigned char> buffer;
2476   cv::Mat image_matrix;
2477 
2478   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(image);
2479   image_matrix = input_cv->mat();
2480   if (!image_matrix.data) {
2481     RETURN_STATUS_UNEXPECTED("EncodePng: Load the image tensor failed.");
2482   }
2483 
2484   if (channels == kMinImageChannel) {
2485     CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".PNG", image_matrix, buffer, params),
2486                                  "EncodePng: Failed to encode image.");
2487   } else {
2488     cv::Mat image_bgr;
2489     cv::cvtColor(image_matrix, image_bgr, cv::COLOR_RGB2BGR);
2490     CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".PNG", image_bgr, buffer, params), "EncodePng: Failed to encode image.");
2491   }
2492 
2493   TensorShape tensor_shape = TensorShape({(long int)buffer.size()});
2494   RETURN_IF_NOT_OK(Tensor::CreateFromMemory(tensor_shape, DataType(DataType::DE_UINT8), buffer.data(), output));
2495 
2496   return Status::OK();
2497 }
2498 
ReadFile(const std::string & filename,std::shared_ptr<Tensor> * output)2499 Status ReadFile(const std::string &filename, std::shared_ptr<Tensor> *output) {
2500   RETURN_UNEXPECTED_IF_NULL(output);
2501 
2502   auto realpath = FileUtils::GetRealPath(filename.c_str());
2503   if (!realpath.has_value()) {
2504     RETURN_STATUS_UNEXPECTED("ReadFile: Invalid file path, " + filename + " does not exist.");
2505   }
2506   if (!Path(realpath.value()).IsFile()) {
2507     RETURN_STATUS_UNEXPECTED("ReadFile: Invalid file path, " + filename + " is not a regular file.");
2508   }
2509 
2510   RETURN_IF_NOT_OK(Tensor::CreateFromFile(realpath.value(), output));
2511   return Status::OK();
2512 }
2513 
ReadImage(const std::string & filename,std::shared_ptr<Tensor> * output,ImageReadMode mode)2514 Status ReadImage(const std::string &filename, std::shared_ptr<Tensor> *output, ImageReadMode mode) {
2515   RETURN_UNEXPECTED_IF_NULL(output);
2516 
2517   auto realpath = FileUtils::GetRealPath(filename.c_str());
2518   if (!realpath.has_value()) {
2519     std::string err_msg = "ReadImage: Invalid file path, " + filename + " does not exist.";
2520     RETURN_STATUS_UNEXPECTED(err_msg);
2521   }
2522   if (!Path(realpath.value()).IsFile()) {
2523     RETURN_STATUS_UNEXPECTED("ReadImage: Invalid file path, " + filename + " is not a regular file.");
2524   }
2525 
2526   cv::Mat image;
2527   int cv_mode = static_cast<int>(mode) - 1;
2528   image = cv::imread(realpath.value(), cv_mode);
2529   if (image.data == nullptr) {
2530     RETURN_STATUS_UNEXPECTED("ReadImage: Failed to read file " + filename);
2531   }
2532 
2533   std::shared_ptr<CVTensor> output_cv;
2534   if (mode == ImageReadMode::kCOLOR || image.channels() > 1) {
2535     cv::Mat image_rgb;
2536     cv::cvtColor(image, image_rgb, cv::COLOR_BGRA2RGB);
2537     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(image_rgb, kDefaultImageRank, &output_cv));
2538   } else {
2539     RETURN_IF_NOT_OK(CVTensor::CreateFromMat(image, kDefaultImageRank, &output_cv));
2540   }
2541   *output = std::static_pointer_cast<Tensor>(output_cv);
2542 
2543   return Status::OK();
2544 }
2545 
WriteFile(const std::string & filename,const std::shared_ptr<Tensor> & data)2546 Status WriteFile(const std::string &filename, const std::shared_ptr<Tensor> &data) {
2547   std::string err_msg;
2548 
2549   if (data->type() != DataType::DE_UINT8) {
2550     err_msg = "WriteFile: The type of the elements of data should be UINT8, but got " + data->type().ToString() + ".";
2551     RETURN_STATUS_UNEXPECTED(err_msg);
2552   }
2553 
2554   long int data_size = data->Size();
2555   const char *data_buffer;
2556   if (data_size >= kDeMaxDim || data_size < 0) {
2557     err_msg = "WriteFile: Invalid data->Size() , should be >= 0 && < " + std::to_string(kDeMaxDim);
2558     err_msg += " , but got " + std::to_string(data_size) + " for " + filename;
2559     RETURN_STATUS_UNEXPECTED(err_msg);
2560   }
2561   if (data_size > 0) {
2562     if (data->type() == DataType::DE_BYTES) {
2563       data_buffer = (const char *)data->GetStringsBuffer();
2564     } else {
2565       data_buffer = (const char *)data->GetMutableBuffer();
2566     }
2567     if (data_buffer == nullptr) {
2568       err_msg = "WriteFile: Invalid data->GetBufferSize() , should not be nullptr.";
2569       RETURN_STATUS_UNEXPECTED(err_msg);
2570     }
2571     TensorShape shape = data->shape();
2572     int rank = static_cast<int>(shape.Rank());
2573     if (rank != kMinImageChannel) {
2574       err_msg = "WriteFile: The data has invalid dimensions. It should have only one dimension, but got ";
2575       err_msg += std::to_string(rank) + " dimensions.";
2576       RETURN_STATUS_UNEXPECTED(err_msg);
2577     }
2578   }
2579 
2580   Path file(filename);
2581   if (!file.Exists()) {
2582     int file_descriptor;
2583     RETURN_IF_NOT_OK(file.CreateFile(&file_descriptor));
2584     RETURN_IF_NOT_OK(file.CloseFile(file_descriptor));
2585   }
2586   auto realpath = FileUtils::GetRealPath(filename.c_str());
2587   if (!realpath.has_value()) {
2588     RETURN_STATUS_UNEXPECTED("WriteFile: Invalid file path, " + filename + " failed to get the real path.");
2589   }
2590   if (!Path(realpath.value()).IsFile()) {
2591     RETURN_STATUS_UNEXPECTED("WriteFile: Invalid file path, " + filename + " is not a regular file.");
2592   }
2593 
2594   std::ofstream fs(realpath.value().c_str(), std::ios::out | std::ios::trunc | std::ios::binary);
2595   CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "WriteFile: Failed to open the file: " + filename + " for writing.");
2596 
2597   if (data_size > 0) {
2598     fs.write(data_buffer, data_size);
2599     if (fs.fail()) {
2600       err_msg = "WriteFile: Failed to write the file " + filename;
2601       fs.close();
2602       RETURN_STATUS_UNEXPECTED(err_msg);
2603     }
2604   }
2605   fs.close();
2606   ChangeFileMode(realpath.value(), S_IRUSR | S_IWUSR);
2607   return Status::OK();
2608 }
2609 
WriteJpeg(const std::string & filename,const std::shared_ptr<Tensor> & image,int quality)2610 Status WriteJpeg(const std::string &filename, const std::shared_ptr<Tensor> &image, int quality) {
2611   std::string err_msg;
2612 
2613   if (image->type() != DataType::DE_UINT8) {
2614     err_msg = "WriteJpeg: The type of the elements of image should be UINT8, but got " + image->type().ToString() + ".";
2615     RETURN_STATUS_UNEXPECTED(err_msg);
2616   }
2617   TensorShape shape = image->shape();
2618   int rank = static_cast<int>(shape.Rank());
2619   if (rank < kMinImageRank || rank > kDefaultImageRank) {
2620     err_msg = "WriteJpeg: The image has invalid dimensions. It should have two or three dimensions, but got ";
2621     err_msg += std::to_string(rank) + " dimensions.";
2622     RETURN_STATUS_UNEXPECTED(err_msg);
2623   }
2624   int channels;
2625   if (rank == kDefaultImageRank) {
2626     channels = static_cast<int>(shape[kMinImageRank]);
2627     if (channels != kMinImageChannel && channels != kDefaultImageChannel) {
2628       err_msg = "WriteJpeg: The image has invalid channels. It should have 1 or 3 channels, but got ";
2629       err_msg += std::to_string(channels) + " channels.";
2630       RETURN_STATUS_UNEXPECTED(err_msg);
2631     }
2632   } else {
2633     channels = 1;
2634   }
2635 
2636   if (quality < kMinJpegQuality || quality > kMaxJpegQuality) {
2637     err_msg = "WriteJpeg: Invalid quality " + std::to_string(quality) + ", should be in range of [" +
2638               std::to_string(kMinJpegQuality) + ", " + std::to_string(kMaxJpegQuality) + "].";
2639     RETURN_STATUS_UNEXPECTED(err_msg);
2640   }
2641 
2642   std::vector<int> params = {cv::IMWRITE_JPEG_QUALITY,  quality, cv::IMWRITE_JPEG_PROGRESSIVE,  0,
2643                              cv::IMWRITE_JPEG_OPTIMIZE, 0,       cv::IMWRITE_JPEG_RST_INTERVAL, 0};
2644 
2645   std::vector<unsigned char> buffer;
2646   cv::Mat image_matrix;
2647 
2648   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(image);
2649   image_matrix = input_cv->mat();
2650   if (!image_matrix.data) {
2651     RETURN_STATUS_UNEXPECTED("WriteJpeg: Load the image tensor failed.");
2652   }
2653 
2654   if (channels == kMinImageChannel) {
2655     CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".JPEG", image_matrix, buffer, params),
2656                                  "WriteJpeg: Failed to encode image.");
2657   } else {
2658     cv::Mat image_bgr;
2659     cv::cvtColor(image_matrix, image_bgr, cv::COLOR_RGB2BGR);
2660     CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".JPEG", image_bgr, buffer, params),
2661                                  "WriteJpeg: Failed to encode image.");
2662   }
2663 
2664   Path file(filename);
2665   if (!file.Exists()) {
2666     int file_descriptor;
2667     RETURN_IF_NOT_OK(file.CreateFile(&file_descriptor));
2668     RETURN_IF_NOT_OK(file.CloseFile(file_descriptor));
2669   }
2670   auto realpath = FileUtils::GetRealPath(filename.c_str());
2671   if (!realpath.has_value()) {
2672     RETURN_STATUS_UNEXPECTED("WriteJpeg: Invalid file path, " + filename + " failed to get the real path.");
2673   }
2674   if (!Path(realpath.value()).IsFile()) {
2675     RETURN_STATUS_UNEXPECTED("WriteJpeg: Invalid file path, " + filename + " is not a regular file.");
2676   }
2677 
2678   std::ofstream fs(realpath.value().c_str(), std::ios::out | std::ios::trunc | std::ios::binary);
2679   CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "WriteJpeg: Failed to open the file " + filename + " for writing.");
2680 
2681   fs.write((const char *)buffer.data(), (long int)buffer.size());
2682   if (fs.fail()) {
2683     fs.close();
2684     RETURN_STATUS_UNEXPECTED("WriteJpeg: Failed to write the file " + filename);
2685   }
2686   fs.close();
2687   ChangeFileMode(realpath.value(), S_IRUSR | S_IWUSR);
2688   return Status::OK();
2689 }
2690 
WritePng(const std::string & filename,const std::shared_ptr<Tensor> & image,int compression_level)2691 Status WritePng(const std::string &filename, const std::shared_ptr<Tensor> &image, int compression_level) {
2692   std::string err_msg;
2693 
2694   if (image->type() != DataType::DE_UINT8) {
2695     err_msg = "WritePng: The type of the elements of image should be UINT8, but got " + image->type().ToString() + ".";
2696     RETURN_STATUS_UNEXPECTED(err_msg);
2697   }
2698   TensorShape shape = image->shape();
2699   int rank = static_cast<int>(shape.Rank());
2700   if (rank < kMinImageRank || rank > kDefaultImageRank) {
2701     err_msg = "WritePng: The image has invalid dimensions. It should have two or three dimensions, but got ";
2702     err_msg += std::to_string(rank) + " dimensions.";
2703     RETURN_STATUS_UNEXPECTED(err_msg);
2704   }
2705   int channels;
2706   if (rank == kDefaultImageRank) {
2707     channels = static_cast<int>(shape[kMinImageRank]);
2708     if (channels != kMinImageChannel && channels != kDefaultImageChannel) {
2709       err_msg = "WritePng: The image has invalid channels. It should have 1 or 3 channels, but got ";
2710       err_msg += std::to_string(channels) + " channels.";
2711       RETURN_STATUS_UNEXPECTED(err_msg);
2712     }
2713   } else {
2714     channels = 1;
2715   }
2716 
2717   if (compression_level < kMinPngCompression || compression_level > kMaxPngCompression) {
2718     err_msg = "WritePng: Invalid compression_level " + std::to_string(compression_level) + ", should be in range of [" +
2719               std::to_string(kMinPngCompression) + ", " + std::to_string(kMaxPngCompression) + "].";
2720     RETURN_STATUS_UNEXPECTED(err_msg);
2721   }
2722 
2723   std::vector<int> params = {cv::IMWRITE_PNG_COMPRESSION, compression_level, cv::IMWRITE_PNG_STRATEGY,
2724                              cv::IMWRITE_PNG_STRATEGY_RLE};
2725   std::vector<unsigned char> buffer;
2726   cv::Mat image_matrix;
2727 
2728   std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(image);
2729   image_matrix = input_cv->mat();
2730   if (!image_matrix.data) {
2731     RETURN_STATUS_UNEXPECTED("WritePng: Load the image tensor failed.");
2732   }
2733 
2734   if (channels == kMinImageChannel) {
2735     CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".PNG", image_matrix, buffer, params),
2736                                  "WritePng: Failed to encode image.");
2737   } else {
2738     cv::Mat image_bgr;
2739     cv::cvtColor(image_matrix, image_bgr, cv::COLOR_RGB2BGR);
2740     CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".PNG", image_bgr, buffer, params), "WritePng: Failed to encode image.");
2741   }
2742 
2743   Path file(filename);
2744   if (!file.Exists()) {
2745     int file_descriptor;
2746     RETURN_IF_NOT_OK(file.CreateFile(&file_descriptor));
2747     RETURN_IF_NOT_OK(file.CloseFile(file_descriptor));
2748   }
2749   auto realpath = FileUtils::GetRealPath(filename.c_str());
2750   if (!realpath.has_value()) {
2751     RETURN_STATUS_UNEXPECTED("WritePng: Invalid file path, " + filename + " failed to get the real path.");
2752   }
2753   struct stat sb {};
2754   stat(realpath.value().c_str(), &sb);
2755   if (S_ISREG(sb.st_mode) == 0) {
2756     RETURN_STATUS_UNEXPECTED("WritePng: Invalid file path, " + filename + " is not a regular file.");
2757   }
2758 
2759   std::ofstream fs(realpath.value().c_str(), std::ios::out | std::ios::trunc | std::ios::binary);
2760   CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "WritePng: Failed to open the file " + filename + " for writing.");
2761 
2762   fs.write((const char *)buffer.data(), (long int)buffer.size());
2763   if (fs.fail()) {
2764     fs.close();
2765     RETURN_STATUS_UNEXPECTED("WritePng: Failed to write the file " + filename);
2766   }
2767   fs.close();
2768   ChangeFileMode(realpath.value(), S_IRUSR | S_IWUSR);
2769   return Status::OK();
2770 }
2771 
2772 // support list
2773 const unsigned char kBmpMagic[] = "\x42\x4D";
2774 constexpr dsize_t kBmpMagicLen = 2;
2775 const unsigned char kTiffMagic1[] = "\x4D\x4D";
2776 const unsigned char kTiffMagic2[] = "\x49\x49";
2777 constexpr dsize_t kTiffMagicLen = 2;
2778 
DumpImageAndAppendStatus(const std::shared_ptr<Tensor> & image,const Status & status)2779 Status DumpImageAndAppendStatus(const std::shared_ptr<Tensor> &image, const Status &status) {
2780   Status local_status = status;
2781   std::string file_name = "./abnormal_image.";
2782   std::string file_suffix;
2783   std::string error_info = local_status.GetErrDescription();
2784 
2785   uint32_t image_length = 0;
2786   uchar *image_ptr = nullptr;
2787   if (image->type() == DataType::DE_BYTES) {
2788     RETURN_IF_NOT_OK(image->GetStringLength(&image_length));
2789     image_ptr = image->GetStringsBuffer();
2790   } else {
2791     image_length = image->SizeInBytes();
2792     image_ptr = image->GetMutableBuffer();
2793   }
2794 
2795   if (image_length == 0) {
2796     return local_status;
2797   }
2798 
2799   if (memcmp(image_ptr, kJpegMagic, kJpegMagicLen) == 0) {  // support
2800     file_suffix = "jpg";
2801   } else if (memcmp(image_ptr, kPngMagic, kPngMagicLen) == 0) {  // support
2802     file_suffix = "png";
2803   } else if (memcmp(image_ptr, kBmpMagic, kBmpMagicLen) == 0) {  // support
2804     file_suffix = "bmp";
2805   } else if (memcmp(image_ptr, kTiffMagic1, kTiffMagicLen) == 0 ||  // support
2806              memcmp(image_ptr, kTiffMagic2, kTiffMagicLen) == 0) {
2807     file_suffix = "tif";
2808   } else {
2809     file_suffix = "exception";
2810     error_info += " Unknown image type.";
2811   }
2812 
2813   auto ret = WriteFile(file_name + file_suffix, image);
2814   if (ret == Status::OK()) {
2815     error_info += " Dump the abnormal image to [" + (file_name + file_suffix) +
2816                   "]. You can check this image first through the image viewer. If you find that " +
2817                   "the image is abnormal, delete it from the dataset and re-run.";
2818   }
2819   local_status.SetErrDescription(error_info);
2820   return local_status;
2821 }
2822 
2823 // unsupported list
2824 const unsigned char kGifMagic[] = "\x47\x49\x46";
2825 constexpr dsize_t kGifMagicLen = 3;
2826 const unsigned char kWebpMagic[] = "\x00\x57\x45\x42";
2827 constexpr dsize_t kWebpMagicLen = 4;
2828 
CheckUnsupportedImage(const std::shared_ptr<Tensor> & image)2829 Status CheckUnsupportedImage(const std::shared_ptr<Tensor> &image) {
2830   bool unsupport_flag = false;
2831 
2832   std::string file_name = "./unsupported_image.";
2833   std::string file_suffix;
2834   if (image->SizeInBytes() == 0) {
2835     RETURN_STATUS_UNEXPECTED("Image file size is 0.");
2836   }
2837 
2838   uchar *image_ptr = nullptr;
2839   if (image->type() == DataType::DE_BYTES) {
2840     image_ptr = image->GetStringsBuffer();
2841   } else {
2842     image_ptr = image->GetMutableBuffer();
2843   }
2844 
2845   if (memcmp(image_ptr, kGifMagic, kGifMagicLen) == 0) {  // unsupported
2846     file_suffix = "gif";
2847     unsupport_flag = true;
2848   } else if (memcmp(image_ptr + 7, kWebpMagic, kWebpMagicLen) == 0) {  // unsupported: skip the 7 bytes
2849     file_suffix = "webp";
2850     unsupport_flag = true;
2851   }
2852 
2853   if (unsupport_flag) {
2854     auto ret = WriteFile(file_name + file_suffix, image);
2855     if (ret == Status::OK()) {
2856       RETURN_STATUS_UNEXPECTED("Unsupported image type [" + file_suffix + "] and dump the image to [" +
2857                                (file_name + file_suffix) + "]. Please delete it from the dataset and re-run.");
2858     } else {
2859       ret.SetErrDescription("Unsupported image type [" + file_suffix + "], but dump the image failed. " +
2860                             "Error info: " + ret.GetErrDescription());
2861       return ret;
2862     }
2863   }
2864   return Status::OK();
2865 }
2866 }  // namespace dataset
2867 }  // namespace mindspore
2868