1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/kernels/image/image_utils.h"
17 #include <opencv2/imgproc/types_c.h>
18 #include <algorithm>
19 #include <limits>
20 #include <vector>
21 #include <stdexcept>
22 #include <opencv2/imgcodecs.hpp>
23 #include "utils/ms_utils.h"
24 #include "minddata/dataset/core/cv_tensor.h"
25 #include "minddata/dataset/core/tensor.h"
26 #include "minddata/dataset/core/tensor_shape.h"
27 #include "minddata/dataset/include/dataset/constants.h"
28 #include "minddata/dataset/kernels/image/math_utils.h"
29 #include "minddata/dataset/kernels/image/resize_cubic_op.h"
30
31 const int32_t MAX_INT_PRECISION = 16777216; // float int precision is 16777216
32 const int32_t DEFAULT_NUM_HEIGHT = 1;
33 const int32_t DEFAULT_NUM_WIDTH = 1;
34
35 namespace mindspore {
36 namespace dataset {
GetCVInterpolationMode(InterpolationMode mode)37 int GetCVInterpolationMode(InterpolationMode mode) {
38 switch (mode) {
39 case InterpolationMode::kLinear:
40 return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
41 case InterpolationMode::kCubic:
42 return static_cast<int>(cv::InterpolationFlags::INTER_CUBIC);
43 case InterpolationMode::kArea:
44 return static_cast<int>(cv::InterpolationFlags::INTER_AREA);
45 case InterpolationMode::kNearestNeighbour:
46 return static_cast<int>(cv::InterpolationFlags::INTER_NEAREST);
47 default:
48 return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
49 }
50 }
51
GetCVBorderType(BorderType type)52 int GetCVBorderType(BorderType type) {
53 switch (type) {
54 case BorderType::kConstant:
55 return static_cast<int>(cv::BorderTypes::BORDER_CONSTANT);
56 case BorderType::kEdge:
57 return static_cast<int>(cv::BorderTypes::BORDER_REPLICATE);
58 case BorderType::kReflect:
59 return static_cast<int>(cv::BorderTypes::BORDER_REFLECT101);
60 case BorderType::kSymmetric:
61 return static_cast<int>(cv::BorderTypes::BORDER_REFLECT);
62 default:
63 return static_cast<int>(cv::BorderTypes::BORDER_CONSTANT);
64 }
65 }
66
GetConvertShape(ConvertMode convert_mode,const std::shared_ptr<CVTensor> & input_cv,std::vector<dsize_t> * node)67 Status GetConvertShape(ConvertMode convert_mode, const std::shared_ptr<CVTensor> &input_cv,
68 std::vector<dsize_t> *node) {
69 std::vector<ConvertMode> one_channels = {ConvertMode::COLOR_BGR2GRAY, ConvertMode::COLOR_RGB2GRAY,
70 ConvertMode::COLOR_BGRA2GRAY, ConvertMode::COLOR_RGBA2GRAY};
71 std::vector<ConvertMode> three_channels = {
72 ConvertMode::COLOR_BGRA2BGR, ConvertMode::COLOR_RGBA2RGB, ConvertMode::COLOR_RGBA2BGR, ConvertMode::COLOR_BGRA2RGB,
73 ConvertMode::COLOR_BGR2RGB, ConvertMode::COLOR_RGB2BGR, ConvertMode::COLOR_GRAY2BGR, ConvertMode::COLOR_GRAY2RGB};
74 std::vector<ConvertMode> four_channels = {ConvertMode::COLOR_BGR2BGRA, ConvertMode::COLOR_RGB2RGBA,
75 ConvertMode::COLOR_BGR2RGBA, ConvertMode::COLOR_RGB2BGRA,
76 ConvertMode::COLOR_BGRA2RGBA, ConvertMode::COLOR_RGBA2BGRA,
77 ConvertMode::COLOR_GRAY2BGRA, ConvertMode::COLOR_GRAY2RGBA};
78 if (std::find(three_channels.begin(), three_channels.end(), convert_mode) != three_channels.end()) {
79 *node = {input_cv->shape()[0], input_cv->shape()[1], 3};
80 } else if (std::find(four_channels.begin(), four_channels.end(), convert_mode) != four_channels.end()) {
81 *node = {input_cv->shape()[0], input_cv->shape()[1], 4};
82 } else if (std::find(one_channels.begin(), one_channels.end(), convert_mode) != one_channels.end()) {
83 *node = {input_cv->shape()[0], input_cv->shape()[1]};
84 } else {
85 RETURN_STATUS_UNEXPECTED(
86 "The mode of image channel conversion must be in ConvertMode, which mainly includes "
87 "conversion between RGB, BGR, GRAY, RGBA etc.");
88 }
89 return Status::OK();
90 }
91
CheckTensorShape(const std::shared_ptr<Tensor> & tensor,const int & channel)92 bool CheckTensorShape(const std::shared_ptr<Tensor> &tensor, const int &channel) {
93 if (tensor == nullptr) {
94 return false;
95 }
96 bool rc = false;
97 if (tensor->shape().Size() <= channel) {
98 return false;
99 }
100 if (tensor->Rank() != DEFAULT_IMAGE_RANK ||
101 (tensor->shape()[channel] != 1 && tensor->shape()[channel] != DEFAULT_IMAGE_CHANNELS)) {
102 rc = true;
103 }
104 return rc;
105 }
106
Flip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output,int flip_code)107 Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code) {
108 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
109
110 if (input_cv->Rank() == 1 || input_cv->mat().dims > 2) {
111 RETURN_STATUS_UNEXPECTED("Flip: shape of input is not <H,W,C> or <H,W>, but got rank:" +
112 std::to_string(input_cv->Rank()));
113 }
114
115 std::shared_ptr<CVTensor> output_cv;
116 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
117
118 if (input_cv->mat().data) {
119 try {
120 cv::flip(input_cv->mat(), output_cv->mat(), flip_code);
121 *output = std::static_pointer_cast<Tensor>(output_cv);
122 return Status::OK();
123 } catch (const cv::Exception &e) {
124 RETURN_STATUS_UNEXPECTED("Flip: " + std::string(e.what()));
125 }
126 } else {
127 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Flip: allocate memory failed.");
128 }
129 }
130
HorizontalFlip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)131 Status HorizontalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
132 return Flip(std::move(input), output, 1);
133 }
134
VerticalFlip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)135 Status VerticalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
136 return Flip(std::move(input), output, 0);
137 }
138
Resize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t output_height,int32_t output_width,double fx,double fy,InterpolationMode mode)139 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height,
140 int32_t output_width, double fx, double fy, InterpolationMode mode) {
141 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
142 if (!input_cv->mat().data) {
143 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Resize: load image failed.");
144 }
145 RETURN_IF_NOT_OK(ValidateImageRank("Resize", input_cv->Rank()));
146
147 cv::Mat in_image = input_cv->mat();
148 const uint32_t kResizeShapeLimits = 1000;
149 // resize image too large or too small, 1000 is arbitrarily chosen here to prevent open cv from segmentation fault
150 if (output_height > in_image.rows * kResizeShapeLimits || output_width > in_image.cols * kResizeShapeLimits) {
151 std::string err_msg =
152 "Resize: the resizing width or height is too big, it's 1000 times bigger than the original image, got output "
153 "height: " +
154 std::to_string(output_height) + ", width: " + std::to_string(output_width) +
155 ", and original image size:" + std::to_string(in_image.rows) + ", " + std::to_string(in_image.cols);
156 return Status(StatusCode::kMDShapeMisMatch, err_msg);
157 }
158 if (output_height == 0 || output_width == 0) {
159 std::string err_msg = "Resize: the resizing width or height is invalid, width or height is zero.";
160 return Status(StatusCode::kMDShapeMisMatch, err_msg);
161 }
162
163 if (mode == InterpolationMode::kCubicPil) {
164 LiteMat imIn, imOut;
165 std::shared_ptr<Tensor> output_tensor;
166 TensorShape new_shape = TensorShape({output_height, output_width, 3});
167 RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input_cv->type(), &output_tensor));
168 uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
169 imOut.Init(output_width, output_height, input_cv->shape()[2], reinterpret_cast<void *>(buffer), LDataType::UINT8);
170 imIn.Init(input_cv->shape()[1], input_cv->shape()[0], input_cv->shape()[2], input_cv->mat().data, LDataType::UINT8);
171 if (ResizeCubic(imIn, imOut, output_width, output_height) == false) {
172 RETURN_STATUS_UNEXPECTED("Resize: failed to do resize, please check the error msg.");
173 }
174 *output = output_tensor;
175 return Status::OK();
176 }
177 try {
178 TensorShape shape{output_height, output_width};
179 int num_channels = input_cv->shape()[CHANNEL_INDEX];
180 if (input_cv->Rank() == DEFAULT_IMAGE_RANK) shape = shape.AppendDim(num_channels);
181 std::shared_ptr<CVTensor> output_cv;
182 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
183
184 auto cv_mode = GetCVInterpolationMode(mode);
185 cv::resize(in_image, output_cv->mat(), cv::Size(output_width, output_height), fx, fy, cv_mode);
186 *output = std::static_pointer_cast<Tensor>(output_cv);
187 return Status::OK();
188 } catch (const cv::Exception &e) {
189 RETURN_STATUS_UNEXPECTED("Resize: " + std::string(e.what()));
190 }
191 }
192
IsNonEmptyJPEG(const std::shared_ptr<Tensor> & input)193 bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input) {
194 const unsigned char *kJpegMagic = (unsigned char *)"\xFF\xD8\xFF";
195 constexpr dsize_t kJpegMagicLen = 3;
196 return input->SizeInBytes() > kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0;
197 }
198
IsNonEmptyPNG(const std::shared_ptr<Tensor> & input)199 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input) {
200 const unsigned char *kPngMagic = (unsigned char *)"\x89\x50\x4E\x47";
201 constexpr dsize_t kPngMagicLen = 4;
202 return input->SizeInBytes() > kPngMagicLen && memcmp(input->GetBuffer(), kPngMagic, kPngMagicLen) == 0;
203 }
204
Decode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)205 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
206 if (IsNonEmptyJPEG(input)) {
207 return JpegCropAndDecode(input, output);
208 } else {
209 return DecodeCv(input, output);
210 }
211 }
212
DecodeCv(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)213 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
214 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
215 if (!input_cv->mat().data) {
216 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: load image failed.");
217 }
218 try {
219 cv::Mat img_mat = cv::imdecode(input_cv->mat(), cv::IMREAD_COLOR | cv::IMREAD_IGNORE_ORIENTATION);
220 if (img_mat.data == nullptr) {
221 std::string err = "Decode: image decode failed.";
222 RETURN_STATUS_UNEXPECTED(err);
223 }
224 cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB));
225 std::shared_ptr<CVTensor> output_cv;
226 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, 3, &output_cv));
227 *output = std::static_pointer_cast<Tensor>(output_cv);
228 return Status::OK();
229 } catch (const cv::Exception &e) {
230 RETURN_STATUS_UNEXPECTED("Decode: " + std::string(e.what()));
231 }
232 }
233
JpegInitSource(j_decompress_ptr cinfo)234 static void JpegInitSource(j_decompress_ptr cinfo) {}
235
JpegFillInputBuffer(j_decompress_ptr cinfo)236 static boolean JpegFillInputBuffer(j_decompress_ptr cinfo) {
237 if (cinfo->src->bytes_in_buffer == 0) {
238 // Under ARM platform raise runtime_error may cause core problem,
239 // so we catch runtime_error and just return FALSE.
240 try {
241 ERREXIT(cinfo, JERR_INPUT_EMPTY);
242 } catch (std::runtime_error &e) {
243 return FALSE;
244 }
245 return FALSE;
246 }
247 return TRUE;
248 }
249
JpegTermSource(j_decompress_ptr cinfo)250 static void JpegTermSource(j_decompress_ptr cinfo) {}
251
JpegSkipInputData(j_decompress_ptr cinfo,int64_t jump)252 static void JpegSkipInputData(j_decompress_ptr cinfo, int64_t jump) {
253 if (jump < 0) {
254 return;
255 }
256 if (static_cast<size_t>(jump) > cinfo->src->bytes_in_buffer) {
257 cinfo->src->bytes_in_buffer = 0;
258 return;
259 } else {
260 cinfo->src->bytes_in_buffer -= jump;
261 cinfo->src->next_input_byte += jump;
262 }
263 }
264
JpegSetSource(j_decompress_ptr cinfo,const void * data,int64_t datasize)265 void JpegSetSource(j_decompress_ptr cinfo, const void *data, int64_t datasize) {
266 cinfo->src = static_cast<struct jpeg_source_mgr *>(
267 (*cinfo->mem->alloc_small)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)));
268 cinfo->src->init_source = JpegInitSource;
269 cinfo->src->fill_input_buffer = JpegFillInputBuffer;
270 #if defined(_WIN32) || defined(_WIN64) || defined(ENABLE_ARM32) || defined(__APPLE__)
271 cinfo->src->skip_input_data = reinterpret_cast<void (*)(j_decompress_ptr, long)>(JpegSkipInputData);
272 #else
273 cinfo->src->skip_input_data = JpegSkipInputData;
274 #endif
275 cinfo->src->resync_to_restart = jpeg_resync_to_restart;
276 cinfo->src->term_source = JpegTermSource;
277 cinfo->src->bytes_in_buffer = datasize;
278 cinfo->src->next_input_byte = static_cast<const JOCTET *>(data);
279 }
280
JpegReadScanlines(jpeg_decompress_struct * const cinfo,int max_scanlines_to_read,JSAMPLE * buffer,int buffer_size,int crop_w,int crop_w_aligned,int offset,int stride)281 static Status JpegReadScanlines(jpeg_decompress_struct *const cinfo, int max_scanlines_to_read, JSAMPLE *buffer,
282 int buffer_size, int crop_w, int crop_w_aligned, int offset, int stride) {
283 // scanlines will be read to this buffer first, must have the number
284 // of components equal to the number of components in the image
285 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int64_t>::max() / cinfo->output_components) > crop_w_aligned,
286 "JpegReadScanlines: multiplication out of bounds.");
287 int64_t scanline_size = crop_w_aligned * cinfo->output_components;
288 std::vector<JSAMPLE> scanline(scanline_size);
289 JSAMPLE *scanline_ptr = &scanline[0];
290 while (cinfo->output_scanline < static_cast<unsigned int>(max_scanlines_to_read)) {
291 int num_lines_read = 0;
292 try {
293 num_lines_read = jpeg_read_scanlines(cinfo, &scanline_ptr, 1);
294 } catch (std::runtime_error &e) {
295 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: image decode failed.");
296 }
297 if (cinfo->out_color_space == JCS_CMYK && num_lines_read > 0) {
298 for (int i = 0; i < crop_w; ++i) {
299 const int cmyk_pixel = 4 * i + offset;
300 const int c = scanline_ptr[cmyk_pixel];
301 const int m = scanline_ptr[cmyk_pixel + 1];
302 const int y = scanline_ptr[cmyk_pixel + 2];
303 const int k = scanline_ptr[cmyk_pixel + 3];
304 int r, g, b;
305 if (cinfo->saw_Adobe_marker) {
306 r = (k * c) / 255;
307 g = (k * m) / 255;
308 b = (k * y) / 255;
309 } else {
310 r = (255 - c) * (255 - k) / 255;
311 g = (255 - m) * (255 - k) / 255;
312 b = (255 - y) * (255 - k) / 255;
313 }
314 buffer[3 * i + 0] = r;
315 buffer[3 * i + 1] = g;
316 buffer[3 * i + 2] = b;
317 }
318 } else if (num_lines_read > 0) {
319 int copy_status = memcpy_s(buffer, buffer_size, scanline_ptr + offset, stride);
320 if (copy_status != 0) {
321 jpeg_destroy_decompress(cinfo);
322 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: memcpy failed.");
323 }
324 } else {
325 jpeg_destroy_decompress(cinfo);
326 std::string err_msg = "[Internal ERROR] Decode: image decode failed.";
327 RETURN_STATUS_UNEXPECTED(err_msg);
328 }
329 buffer += stride;
330 buffer_size = buffer_size - stride;
331 }
332 return Status::OK();
333 }
334
JpegSetColorSpace(jpeg_decompress_struct * cinfo)335 static Status JpegSetColorSpace(jpeg_decompress_struct *cinfo) {
336 switch (cinfo->num_components) {
337 case 1:
338 // we want to output 3 components if it's grayscale
339 cinfo->out_color_space = JCS_RGB;
340 return Status::OK();
341 case 3:
342 cinfo->out_color_space = JCS_RGB;
343 return Status::OK();
344 case 4:
345 // Need to manually convert to RGB
346 cinfo->out_color_space = JCS_CMYK;
347 return Status::OK();
348 default:
349 jpeg_destroy_decompress(cinfo);
350 std::string err_msg = "[Internal ERROR] Decode: image decode failed.";
351 RETURN_STATUS_UNEXPECTED(err_msg);
352 }
353 }
354
JpegErrorExitCustom(j_common_ptr cinfo)355 void JpegErrorExitCustom(j_common_ptr cinfo) {
356 char jpeg_last_error_msg[JMSG_LENGTH_MAX];
357 (*(cinfo->err->format_message))(cinfo, jpeg_last_error_msg);
358 throw std::runtime_error(jpeg_last_error_msg);
359 }
360
JpegCropAndDecode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int crop_x,int crop_y,int crop_w,int crop_h)361 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int crop_x, int crop_y,
362 int crop_w, int crop_h) {
363 struct jpeg_decompress_struct cinfo;
364 auto DestroyDecompressAndReturnError = [&cinfo](const std::string &err) {
365 jpeg_destroy_decompress(&cinfo);
366 RETURN_STATUS_UNEXPECTED(err);
367 };
368 struct JpegErrorManagerCustom jerr;
369 cinfo.err = jpeg_std_error(&jerr.pub);
370 jerr.pub.error_exit = JpegErrorExitCustom;
371 try {
372 jpeg_create_decompress(&cinfo);
373 JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
374 (void)jpeg_read_header(&cinfo, TRUE);
375 RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo));
376 jpeg_calc_output_dimensions(&cinfo);
377 } catch (std::runtime_error &e) {
378 return DestroyDecompressAndReturnError(e.what());
379 }
380 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_w) > crop_x,
381 "JpegCropAndDecode: addition(crop x and crop width) out of bounds.");
382 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_h) > crop_y,
383 "JpegCropAndDecode: addition(crop y and crop height) out of bounds.");
384 if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) {
385 crop_w = cinfo.output_width;
386 crop_h = cinfo.output_height;
387 } else if (crop_w == 0 || static_cast<unsigned int>(crop_w + crop_x) > cinfo.output_width || crop_h == 0 ||
388 static_cast<unsigned int>(crop_h + crop_y) > cinfo.output_height) {
389 return DestroyDecompressAndReturnError("Crop: invalid crop size.");
390 }
391 const int mcu_size = cinfo.min_DCT_scaled_size;
392 CHECK_FAIL_RETURN_UNEXPECTED(mcu_size != 0, "JpegCropAndDecode: divisor mcu_size is zero.");
393 unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size;
394 unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned;
395 try {
396 (void)jpeg_start_decompress(&cinfo);
397 jpeg_crop_scanline(&cinfo, &crop_x_aligned, &crop_w_aligned);
398 } catch (std::runtime_error &e) {
399 return DestroyDecompressAndReturnError(e.what());
400 }
401 JDIMENSION skipped_scanlines = jpeg_skip_scanlines(&cinfo, crop_y);
402 // three number of output components, always convert to RGB and output
403 constexpr int kOutNumComponents = 3;
404 TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
405 std::shared_ptr<Tensor> output_tensor;
406 RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
407 const int buffer_size = output_tensor->SizeInBytes();
408 JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
409 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<float_t>::max() - skipped_scanlines) > crop_h,
410 "JpegCropAndDecode: addition out of bounds.");
411 const int max_scanlines_to_read = skipped_scanlines + crop_h;
412 // stride refers to output tensor, which has 3 components at most
413 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / crop_w) > kOutNumComponents,
414 "JpegCropAndDecode: multiplication out of bounds.");
415 const int stride = crop_w * kOutNumComponents;
416 // offset is calculated for scanlines read from the image, therefore
417 // has the same number of components as the image
418 int minius_value = crop_x - crop_x_aligned;
419 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<float_t>::max() / minius_value) > cinfo.output_components,
420 "JpegCropAndDecode: multiplication out of bounds.");
421 const int offset = minius_value * cinfo.output_components;
422 RETURN_IF_NOT_OK(
423 JpegReadScanlines(&cinfo, max_scanlines_to_read, buffer, buffer_size, crop_w, crop_w_aligned, offset, stride));
424 *output = output_tensor;
425 jpeg_destroy_decompress(&cinfo);
426 return Status::OK();
427 }
428
Rescale(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float rescale,float shift)429 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift) {
430 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
431 if (!input_cv->mat().data) {
432 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Rescale: load image failed.");
433 }
434 cv::Mat input_image = input_cv->mat();
435 std::shared_ptr<CVTensor> output_cv;
436 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv));
437 try {
438 input_image.convertTo(output_cv->mat(), CV_32F, rescale, shift);
439 *output = std::static_pointer_cast<Tensor>(output_cv);
440 } catch (const cv::Exception &e) {
441 RETURN_STATUS_UNEXPECTED("Rescale: " + std::string(e.what()));
442 }
443 return Status::OK();
444 }
445
Crop(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int x,int y,int w,int h)446 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h) {
447 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
448 if (!input_cv->mat().data) {
449 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Crop: load image failed.");
450 }
451 RETURN_IF_NOT_OK(ValidateImageRank("Crop", input_cv->Rank()));
452 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - y) > h,
453 "Crop: addition(x and height) out of bounds.");
454 // account for integer overflow
455 if (y < 0 || (y + h) > input_cv->shape()[0] || (y + h) < 0) {
456 RETURN_STATUS_UNEXPECTED(
457 "Crop: invalid y coordinate value for crop, y coordinate value exceeds the boundary of the image, got y: " +
458 std::to_string(y));
459 }
460 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - x) > w, "Crop: addition out of bounds.");
461 // account for integer overflow
462 if (x < 0 || (x + w) > input_cv->shape()[1] || (x + w) < 0) {
463 RETURN_STATUS_UNEXPECTED(
464 "Crop: invalid x coordinate value for crop, "
465 "x coordinate value exceeds the boundary of the image, got x: " +
466 std::to_string(x));
467 }
468 try {
469 TensorShape shape{h, w};
470 if (input_cv->Rank() == DEFAULT_IMAGE_RANK) {
471 int num_channels = input_cv->shape()[CHANNEL_INDEX];
472 shape = shape.AppendDim(num_channels);
473 }
474 std::shared_ptr<CVTensor> output_cv;
475 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
476 cv::Rect roi(x, y, w, h);
477 (input_cv->mat())(roi).copyTo(output_cv->mat());
478 *output = std::static_pointer_cast<Tensor>(output_cv);
479 return Status::OK();
480 } catch (const cv::Exception &e) {
481 RETURN_STATUS_UNEXPECTED("Crop: " + std::string(e.what()));
482 }
483 }
484
ConvertColor(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,ConvertMode convert_mode)485 Status ConvertColor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, ConvertMode convert_mode) {
486 try {
487 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
488 RETURN_IF_NOT_OK(ValidateImageRank("ConvertColor", input_cv->Rank()));
489 if (!input_cv->mat().data) {
490 RETURN_STATUS_UNEXPECTED("[Internal ERROR] ConvertColor: load image failed.");
491 }
492 if (input_cv->Rank() == DEFAULT_IMAGE_RANK) {
493 int num_channels = input_cv->shape()[CHANNEL_INDEX];
494 if (num_channels != DEFAULT_IMAGE_CHANNELS && num_channels != MAX_IMAGE_CHANNELS) {
495 RETURN_STATUS_UNEXPECTED("ConvertColor: number of channels of image should be 3 or 4, but got:" +
496 std::to_string(num_channels));
497 }
498 }
499 std::vector<dsize_t> node;
500 RETURN_IF_NOT_OK(GetConvertShape(convert_mode, input_cv, &node));
501 if (node.empty()) {
502 RETURN_STATUS_UNEXPECTED(
503 "ConvertColor: convert mode must be in ConvertMode, which mainly includes conversion "
504 "between RGB, BGR, GRAY, RGBA etc.");
505 }
506 TensorShape out_shape = TensorShape(node);
507 std::shared_ptr<CVTensor> output_cv;
508 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
509 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(convert_mode));
510 *output = std::static_pointer_cast<Tensor>(output_cv);
511 return Status::OK();
512 } catch (const cv::Exception &e) {
513 RETURN_STATUS_UNEXPECTED("ConvertColor: " + std::string(e.what()));
514 }
515 }
516
HwcToChw(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)517 Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
518 try {
519 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
520 if (!input_cv->mat().data) {
521 RETURN_STATUS_UNEXPECTED("[Internal ERROR] HWC2CHW: load image failed.");
522 }
523 if (input_cv->Rank() == 2) {
524 // If input tensor is 2D, we assume we have hw dimensions
525 *output = input;
526 return Status::OK();
527 }
528 CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > CHANNEL_INDEX, "HWC2CHW: invalid shape.");
529 int num_channels = input_cv->shape()[CHANNEL_INDEX];
530 if (input_cv->shape().Size() < MIN_IMAGE_DIMENSION || input_cv->shape().Size() > DEFAULT_IMAGE_CHANNELS ||
531 (input_cv->shape().Size() == DEFAULT_IMAGE_CHANNELS && num_channels != DEFAULT_IMAGE_CHANNELS &&
532 num_channels != MIN_IMAGE_CHANNELS)) {
533 RETURN_STATUS_UNEXPECTED("HWC2CHW: image shape is not <H,W,C>, but got rank: " +
534 std::to_string(input_cv->shape().Size()));
535 }
536 cv::Mat output_img;
537
538 int height = input_cv->shape()[0];
539 int width = input_cv->shape()[1];
540
541 std::shared_ptr<CVTensor> output_cv;
542 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(TensorShape{num_channels, height, width}, input_cv->type(), &output_cv));
543 for (int i = 0; i < num_channels; ++i) {
544 cv::Mat mat;
545 RETURN_IF_NOT_OK(output_cv->MatAtIndex({i}, &mat));
546 cv::extractChannel(input_cv->mat(), mat, i);
547 }
548 *output = std::move(output_cv);
549 return Status::OK();
550 } catch (const cv::Exception &e) {
551 RETURN_STATUS_UNEXPECTED("HWC2CHW: " + std::string(e.what()));
552 }
553 }
554
MaskWithTensor(const std::shared_ptr<Tensor> & sub_mat,std::shared_ptr<Tensor> * input,int x,int y,int crop_width,int crop_height,ImageFormat image_format)555 Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Tensor> *input, int x, int y,
556 int crop_width, int crop_height, ImageFormat image_format) {
557 if (image_format == ImageFormat::HWC) {
558 if (CheckTensorShape(*input, 2)) {
559 RETURN_STATUS_UNEXPECTED(
560 "CutMixBatch: MaskWithTensor failed: "
561 "input shape doesn't match <H,W,C> format.");
562 }
563 if (CheckTensorShape(sub_mat, 2)) {
564 RETURN_STATUS_UNEXPECTED(
565 "CutMixBatch: MaskWithTensor failed: "
566 "sub_mat shape doesn't match <H,W,C> format.");
567 }
568 int number_of_channels = (*input)->shape()[CHANNEL_INDEX];
569 for (int i = 0; i < crop_width; i++) {
570 for (int j = 0; j < crop_height; j++) {
571 for (int c = 0; c < number_of_channels; c++) {
572 RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i, c}, {y + j, x + i, c}));
573 }
574 }
575 }
576 } else if (image_format == ImageFormat::CHW) {
577 if (CheckTensorShape(*input, 0)) {
578 RETURN_STATUS_UNEXPECTED(
579 "CutMixBatch: MaskWithTensor failed: "
580 "input shape doesn't match <C,H,W> format.");
581 }
582 if (CheckTensorShape(sub_mat, 0)) {
583 RETURN_STATUS_UNEXPECTED(
584 "CutMixBatch: MaskWithTensor failed: "
585 "sub_mat shape doesn't match <C,H,W> format.");
586 }
587 int number_of_channels = (*input)->shape()[0];
588 for (int i = 0; i < crop_width; i++) {
589 for (int j = 0; j < crop_height; j++) {
590 for (int c = 0; c < number_of_channels; c++) {
591 RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {c, j, i}, {c, y + j, x + i}));
592 }
593 }
594 }
595 } else if (image_format == ImageFormat::HW) {
596 if ((*input)->Rank() != MIN_IMAGE_DIMENSION) {
597 RETURN_STATUS_UNEXPECTED(
598 "CutMixBatch: MaskWithTensor failed: "
599 "input shape doesn't match <H,W> format.");
600 }
601 if (sub_mat->Rank() != MIN_IMAGE_DIMENSION) {
602 RETURN_STATUS_UNEXPECTED(
603 "CutMixBatch: MaskWithTensor failed: "
604 "sub_mat shape doesn't match <H,W> format.");
605 }
606 for (int i = 0; i < crop_width; i++) {
607 for (int j = 0; j < crop_height; j++) {
608 RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i}, {y + j, x + i}));
609 }
610 }
611 } else {
612 RETURN_STATUS_UNEXPECTED(
613 "CutMixBatch: MaskWithTensor failed: "
614 "image format must be <C,H,W>, <H,W,C>, or <H,W>.");
615 }
616 return Status::OK();
617 }
618
CopyTensorValue(const std::shared_ptr<Tensor> & source_tensor,std::shared_ptr<Tensor> * dest_tensor,const std::vector<int64_t> & source_indx,const std::vector<int64_t> & dest_indx)619 Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor,
620 const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx) {
621 if (source_tensor->type() != (*dest_tensor)->type())
622 RETURN_STATUS_UNEXPECTED(
623 "CutMixBatch: CopyTensorValue failed: "
624 "source and destination tensor must have the same type.");
625 if (source_tensor->type() == DataType::DE_UINT8) {
626 uint8_t pixel_value = 0;
627 RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
628 RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
629 } else if (source_tensor->type() == DataType::DE_FLOAT32) {
630 float pixel_value = 0;
631 RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
632 RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
633 } else {
634 RETURN_STATUS_UNEXPECTED(
635 "CutMixBatch: CopyTensorValue failed: "
636 "Tensor type is not supported. Tensor type must be float32 or uint8.");
637 }
638 return Status::OK();
639 }
640
SwapRedAndBlue(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)641 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
642 try {
643 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
644 CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > CHANNEL_INDEX, "SwapRedAndBlue: shape is invalid.");
645 int num_channels = input_cv->shape()[CHANNEL_INDEX];
646 if (input_cv->shape().Size() != 3 || num_channels != DEFAULT_IMAGE_CHANNELS) {
647 RETURN_STATUS_UNEXPECTED("SwapRedBlue: image shape is not <H,W,C>.");
648 }
649 std::shared_ptr<CVTensor> output_cv;
650 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
651
652 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_BGR2RGB));
653 *output = std::static_pointer_cast<Tensor>(output_cv);
654 return Status::OK();
655 } catch (const cv::Exception &e) {
656 RETURN_STATUS_UNEXPECTED("SwapRedBlue: " + std::string(e.what()));
657 }
658 }
659
CropAndResize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int x,int y,int crop_height,int crop_width,int target_height,int target_width,InterpolationMode mode)660 Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y,
661 int crop_height, int crop_width, int target_height, int target_width, InterpolationMode mode) {
662 try {
663 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
664 if (!input_cv->mat().data) {
665 RETURN_STATUS_UNEXPECTED("[Internal ERROR] CropAndResize: load image failed.");
666 }
667 RETURN_IF_NOT_OK(ValidateImageRank("CropAndResize", input_cv->Rank()));
668 // image too large or too small, 1000 is arbitrary here to prevent opencv from segmentation fault
669 const uint32_t kCropShapeLimits = 1000;
670 if (crop_height == 0 || crop_width == 0 || target_height == 0 || target_height > crop_height * kCropShapeLimits ||
671 target_width == 0 || target_width > crop_width * kCropShapeLimits) {
672 std::string err_msg =
673 "CropAndResize: the resizing width or height 1) is too big, it's up to " + std::to_string(kCropShapeLimits) +
674 " times the original image; 2) can not be 0. Detail info is: crop_height: " + std::to_string(crop_height) +
675 ", crop_width: " + std::to_string(crop_width) + ", target_height: " + std::to_string(target_height) +
676 ", target_width: " + std::to_string(target_width);
677 RETURN_STATUS_UNEXPECTED(err_msg);
678 }
679 cv::Rect roi(x, y, crop_width, crop_height);
680 auto cv_mode = GetCVInterpolationMode(mode);
681 cv::Mat cv_in = input_cv->mat();
682
683 if (mode == InterpolationMode::kCubicPil) {
684 cv::Mat input_roi = cv_in(roi);
685 std::shared_ptr<CVTensor> input_image;
686 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, input_cv->Rank(), &input_image));
687 LiteMat imIn, imOut;
688 std::shared_ptr<Tensor> output_tensor;
689 TensorShape new_shape = TensorShape({target_height, target_width, 3});
690 RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input_cv->type(), &output_tensor));
691 uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
692 imOut.Init(target_width, target_height, input_cv->shape()[2], reinterpret_cast<void *>(buffer), LDataType::UINT8);
693 imIn.Init(input_image->shape()[1], input_image->shape()[0], input_image->shape()[2], input_image->mat().data,
694 LDataType::UINT8);
695 if (ResizeCubic(imIn, imOut, target_width, target_height) == false) {
696 RETURN_STATUS_UNEXPECTED("Resize: failed to do resize, please check the error msg.");
697 }
698 *output = output_tensor;
699 return Status::OK();
700 }
701
702 TensorShape shape{target_height, target_width};
703 int num_channels = input_cv->shape()[CHANNEL_INDEX];
704 if (input_cv->Rank() == DEFAULT_IMAGE_RANK) shape = shape.AppendDim(num_channels);
705 std::shared_ptr<CVTensor> cvt_out;
706 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &cvt_out));
707 cv::resize(cv_in(roi), cvt_out->mat(), cv::Size(target_width, target_height), 0, 0, cv_mode);
708 *output = std::static_pointer_cast<Tensor>(cvt_out);
709 return Status::OK();
710 } catch (const cv::Exception &e) {
711 RETURN_STATUS_UNEXPECTED("CropAndResize: " + std::string(e.what()));
712 }
713 }
714
Rotate(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> center,float degree,InterpolationMode interpolation,bool expand,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)715 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> center,
716 float degree, InterpolationMode interpolation, bool expand, uint8_t fill_r, uint8_t fill_g,
717 uint8_t fill_b) {
718 try {
719 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
720 if (!input_cv->mat().data) {
721 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Rotate: load image failed.");
722 }
723 RETURN_IF_NOT_OK(ValidateImageRank("Rotate", input_cv->Rank()));
724
725 cv::Mat input_img = input_cv->mat();
726 if (input_img.cols > (MAX_INT_PRECISION * 2) || input_img.rows > (MAX_INT_PRECISION * 2)) {
727 RETURN_STATUS_UNEXPECTED("Rotate: image is too large and center is not precise.");
728 }
729 float fx = 0, fy = 0;
730 if (center.empty()) {
731 // default to center of image
732 fx = (input_img.cols - 1) / 2.0;
733 fy = (input_img.rows - 1) / 2.0;
734 } else {
735 fx = center[0];
736 fy = center[1];
737 }
738 cv::Mat output_img;
739 cv::Scalar fill_color = cv::Scalar(fill_b, fill_g, fill_r);
740 // maybe don't use uint32 for image dimension here
741 cv::Point2f pc(fx, fy);
742 cv::Mat rot = cv::getRotationMatrix2D(pc, degree, 1.0);
743 std::shared_ptr<CVTensor> output_cv;
744 if (!expand) {
745 // this case means that the shape doesn't change, size stays the same
746 // We may not need this memcpy if it is in place.
747 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
748 // using inter_nearest to comply with python default
749 cv::warpAffine(input_img, output_cv->mat(), rot, input_img.size(), GetCVInterpolationMode(interpolation),
750 cv::BORDER_CONSTANT, fill_color);
751 } else {
752 // we resize here since the shape changes
753 // create a new bounding box with the rotate
754 cv::Rect2f bbox = cv::RotatedRect(pc, input_img.size(), degree).boundingRect2f();
755 rot.at<double>(0, 2) += bbox.width / 2.0 - input_img.cols / 2.0;
756 rot.at<double>(1, 2) += bbox.height / 2.0 - input_img.rows / 2.0;
757 // use memcpy and don't compute the new shape since openCV has a rounding problem
758 cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation),
759 cv::BORDER_CONSTANT, fill_color);
760 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &output_cv));
761 RETURN_UNEXPECTED_IF_NULL(output_cv);
762 }
763 *output = std::static_pointer_cast<Tensor>(output_cv);
764 } catch (const cv::Exception &e) {
765 RETURN_STATUS_UNEXPECTED("Rotate: " + std::string(e.what()));
766 }
767 return Status::OK();
768 }
769
770 template <typename T>
Normalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std)771 void Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
772 std::vector<float> std) {
773 auto itr_out = (*output)->begin<float>();
774 auto itr = input->begin<T>();
775 auto end = input->end<T>();
776 int64_t num_channels = (*output)->shape()[CHANNEL_INDEX];
777
778 while (itr != end) {
779 for (int64_t i = 0; i < num_channels; i++) {
780 *itr_out = static_cast<float>(*itr) / std[i] - mean[i];
781 ++itr_out;
782 ++itr;
783 }
784 }
785 }
786
Normalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std)787 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
788 std::vector<float> std) {
789 RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType(DataType::DE_FLOAT32), output));
790 if (input->Rank() == MIN_IMAGE_DIMENSION) {
791 RETURN_IF_NOT_OK((*output)->ExpandDim(MIN_IMAGE_DIMENSION));
792 }
793
794 CHECK_FAIL_RETURN_UNEXPECTED((*output)->Rank() == DEFAULT_IMAGE_RANK, "Normalize: image shape is not <H,W,C>.");
795 CHECK_FAIL_RETURN_UNEXPECTED(std.size() == mean.size(),
796 "Normalize: mean and std vectors are not of same size, got size of std:" +
797 std::to_string(std.size()) + ", and mean size:" + std::to_string(mean.size()));
798
799 // caller provided 1 mean/std value and there are more than one channel --> duplicate mean/std value
800 if (mean.size() == 1 && (*output)->shape()[CHANNEL_INDEX] != 1) {
801 for (int64_t i = 0; i < (*output)->shape()[CHANNEL_INDEX] - 1; i++) {
802 mean.push_back(mean[0]);
803 std.push_back(std[0]);
804 }
805 }
806 CHECK_FAIL_RETURN_UNEXPECTED((*output)->shape()[CHANNEL_INDEX] == mean.size(),
807 "Normalize: number of channels does not match the size of mean and std vectors, got "
808 "channels: " +
809 std::to_string((*output)->shape()[CHANNEL_INDEX]) +
810 ", size of mean:" + std::to_string(mean.size()));
811
812 switch (input->type().value()) {
813 case DataType::DE_BOOL:
814 Normalize<bool>(input, output, mean, std);
815 break;
816 case DataType::DE_INT8:
817 Normalize<int8_t>(input, output, mean, std);
818 break;
819 case DataType::DE_UINT8:
820 Normalize<uint8_t>(input, output, mean, std);
821 break;
822 case DataType::DE_INT16:
823 Normalize<int16_t>(input, output, mean, std);
824 break;
825 case DataType::DE_UINT16:
826 Normalize<uint16_t>(input, output, mean, std);
827 break;
828 case DataType::DE_INT32:
829 Normalize<int32_t>(input, output, mean, std);
830 break;
831 case DataType::DE_UINT32:
832 Normalize<uint32_t>(input, output, mean, std);
833 break;
834 case DataType::DE_INT64:
835 Normalize<int64_t>(input, output, mean, std);
836 break;
837 case DataType::DE_UINT64:
838 Normalize<uint64_t>(input, output, mean, std);
839 break;
840 case DataType::DE_FLOAT16:
841 Normalize<float16>(input, output, mean, std);
842 break;
843 case DataType::DE_FLOAT32:
844 Normalize<float>(input, output, mean, std);
845 break;
846 case DataType::DE_FLOAT64:
847 Normalize<double>(input, output, mean, std);
848 break;
849 default:
850 RETURN_STATUS_UNEXPECTED(
851 "Normalize: unsupported type, currently supported types include "
852 "[bool,int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,int64_t,uint64_t,float16,float,double].");
853 }
854
855 if (input->Rank() == MIN_IMAGE_DIMENSION) {
856 (*output)->Squeeze();
857 }
858 return Status::OK();
859 }
860
NormalizePad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::shared_ptr<Tensor> & mean,const std::shared_ptr<Tensor> & std,const std::string & dtype)861 Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
862 const std::shared_ptr<Tensor> &mean, const std::shared_ptr<Tensor> &std, const std::string &dtype) {
863 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
864 if (!(input_cv->mat().data && input_cv->Rank() == DEFAULT_IMAGE_CHANNELS)) {
865 RETURN_STATUS_UNEXPECTED("[Internal ERROR] NormalizePad: load image failed.");
866 }
867 DataType tensor_type = DataType(DataType::DE_FLOAT32);
868 int compute_type = CV_32F;
869 int channel_type = CV_32FC1;
870 if (dtype == "float16") {
871 compute_type = CV_16F;
872 channel_type = CV_16FC1;
873 tensor_type = DataType(DataType::DE_FLOAT16);
874 }
875 cv::Mat in_image = input_cv->mat();
876 std::shared_ptr<CVTensor> output_cv;
877 TensorShape new_shape({input_cv->shape()[0], input_cv->shape()[1], input_cv->shape()[2] + 1});
878 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(new_shape, tensor_type, &output_cv));
879 mean->Squeeze();
880 if (mean->type() != DataType::DE_FLOAT32 || mean->Rank() != 1 || mean->shape()[0] != DEFAULT_IMAGE_CHANNELS) {
881 std::string err_msg =
882 "NormalizePad: mean tensor should be of size 3 and type float, but got rank: " + std::to_string(mean->Rank()) +
883 ", and type: " + mean->type().ToString();
884 return Status(StatusCode::kMDShapeMisMatch, err_msg);
885 }
886 std->Squeeze();
887 if (std->type() != DataType::DE_FLOAT32 || std->Rank() != 1 || std->shape()[0] != DEFAULT_IMAGE_CHANNELS) {
888 std::string err_msg =
889 "NormalizePad: std tensor should be of size 3 and type float, but got rank: " + std::to_string(std->Rank()) +
890 ", and type: " + std->type().ToString();
891 return Status(StatusCode::kMDShapeMisMatch, err_msg);
892 }
893 try {
894 // NOTE: We are assuming the input image is in RGB and the mean
895 // and std are in RGB
896 std::vector<cv::Mat> rgb;
897 cv::split(in_image, rgb);
898 if (rgb.size() != DEFAULT_IMAGE_CHANNELS) {
899 RETURN_STATUS_UNEXPECTED("NormalizePad: input image is not in RGB, got rank: " + std::to_string(in_image.dims));
900 }
901 for (int8_t i = 0; i < DEFAULT_IMAGE_CHANNELS; i++) {
902 float mean_c, std_c;
903 RETURN_IF_NOT_OK(mean->GetItemAt<float>(&mean_c, {i}));
904 RETURN_IF_NOT_OK(std->GetItemAt<float>(&std_c, {i}));
905 rgb[i].convertTo(rgb[i], compute_type, 1.0 / std_c, (-mean_c / std_c));
906 }
907 rgb.push_back(cv::Mat::zeros(in_image.rows, in_image.cols, channel_type));
908 cv::merge(rgb, output_cv->mat());
909 *output = std::static_pointer_cast<Tensor>(output_cv);
910 return Status::OK();
911 } catch (const cv::Exception &e) {
912 RETURN_STATUS_UNEXPECTED("NormalizePad: " + std::string(e.what()));
913 }
914 }
915
AdjustBrightness(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & alpha)916 Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha) {
917 try {
918 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
919 cv::Mat input_img = input_cv->mat();
920 if (!input_cv->mat().data) {
921 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustBrightness: load image failed.");
922 }
923 CHECK_FAIL_RETURN_UNEXPECTED(
924 input_cv->shape().Size() > CHANNEL_INDEX,
925 "AdjustBrightness: image rank should not bigger than:" + std::to_string(CHANNEL_INDEX) +
926 ", but got: " + std::to_string(input_cv->shape().Size()));
927 int num_channels = input_cv->shape()[CHANNEL_INDEX];
928 // Rank of the image represents how many dimensions, image is expected to be HWC
929 if (input_cv->Rank() != DEFAULT_IMAGE_RANK || num_channels != DEFAULT_IMAGE_CHANNELS) {
930 RETURN_STATUS_UNEXPECTED("AdjustBrightness: image shape is not <H,W,C> or channel is not 3, got image rank: " +
931 std::to_string(input_cv->Rank()) + ", and channel:" + std::to_string(num_channels));
932 }
933 std::shared_ptr<CVTensor> output_cv;
934 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
935 output_cv->mat() = input_img * alpha;
936 *output = std::static_pointer_cast<Tensor>(output_cv);
937 } catch (const cv::Exception &e) {
938 RETURN_STATUS_UNEXPECTED("AdjustBrightness: " + std::string(e.what()));
939 }
940 return Status::OK();
941 }
942
AdjustContrast(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & alpha)943 Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha) {
944 try {
945 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
946 cv::Mat input_img = input_cv->mat();
947 if (!input_cv->mat().data) {
948 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustContrast: load image failed.");
949 }
950 CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > CHANNEL_INDEX,
951 "AdjustContrast: image rank should not bigger than:" + std::to_string(CHANNEL_INDEX) +
952 ", but got: " + std::to_string(input_cv->shape().Size()));
953 int num_channels = input_cv->shape()[CHANNEL_INDEX];
954 if (input_cv->Rank() != DEFAULT_IMAGE_CHANNELS || num_channels != DEFAULT_IMAGE_CHANNELS) {
955 RETURN_STATUS_UNEXPECTED("AdjustContrast: image shape is not <H,W,C> or channel is not 3, got image rank: " +
956 std::to_string(input_cv->Rank()) + ", and channel:" + std::to_string(num_channels));
957 }
958 cv::Mat gray, output_img;
959 cv::cvtColor(input_img, gray, CV_RGB2GRAY);
960 int mean_img = static_cast<int>(cv::mean(gray).val[0] + 0.5);
961 std::shared_ptr<CVTensor> output_cv;
962 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
963 output_img = cv::Mat::zeros(input_img.rows, input_img.cols, CV_8UC1);
964 output_img = output_img + mean_img;
965 cv::cvtColor(output_img, output_img, CV_GRAY2RGB);
966 output_cv->mat() = output_img * (1.0 - alpha) + input_img * alpha;
967 *output = std::static_pointer_cast<Tensor>(output_cv);
968 } catch (const cv::Exception &e) {
969 RETURN_STATUS_UNEXPECTED("AdjustContrast: " + std::string(e.what()));
970 }
971 return Status::OK();
972 }
973
AdjustGamma(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & gamma,const float & gain)974 Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &gamma,
975 const float &gain) {
976 try {
977 int num_channels = 1;
978 if (input->Rank() < 2) {
979 RETURN_STATUS_UNEXPECTED("AdjustGamma: input tensor is not in shape of <...,H,W,C> or <H,W>.");
980 }
981 if (input->Rank() > 2) {
982 num_channels = input->shape()[-1];
983 }
984 if (num_channels != 1 && num_channels != 3) {
985 RETURN_STATUS_UNEXPECTED("AdjustGamma: channel of input image should be 1 or 3, but got: " +
986 std::to_string(num_channels));
987 }
988 if (input->type().IsFloat()) {
989 for (auto itr = input->begin<float>(); itr != input->end<float>(); itr++) {
990 *itr = pow((*itr) * gain, gamma);
991 *itr = std::min(std::max((*itr), 0.0f), 1.0f);
992 }
993 *output = input;
994 } else {
995 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
996 if (!input_cv->mat().data) {
997 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustGamma: load image failed.");
998 }
999 cv::Mat input_img = input_cv->mat();
1000 std::shared_ptr<CVTensor> output_cv;
1001 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1002 uchar LUT[256] = {};
1003 for (int i = 0; i < 256; i++) {
1004 float f = i / 255.0;
1005 f = pow(f, gamma);
1006 LUT[i] = static_cast<uchar>(floor(std::min(f * (255.0 + 1 - 1e-3) * gain, 255.0)));
1007 }
1008 if (input_img.channels() == 1) {
1009 cv::MatIterator_<uchar> it = input_img.begin<uchar>();
1010 cv::MatIterator_<uchar> it_end = input_img.end<uchar>();
1011 for (; it != it_end; ++it) {
1012 *it = LUT[(*it)];
1013 }
1014 } else {
1015 cv::MatIterator_<cv::Vec3b> it = input_img.begin<cv::Vec3b>();
1016 cv::MatIterator_<cv::Vec3b> it_end = input_img.end<cv::Vec3b>();
1017 for (; it != it_end; ++it) {
1018 (*it)[0] = LUT[(*it)[0]];
1019 (*it)[1] = LUT[(*it)[1]];
1020 (*it)[2] = LUT[(*it)[2]];
1021 }
1022 }
1023 output_cv->mat() = input_img * 1;
1024 *output = std::static_pointer_cast<Tensor>(output_cv);
1025 }
1026 } catch (const cv::Exception &e) {
1027 RETURN_STATUS_UNEXPECTED("AdjustGamma: " + std::string(e.what()));
1028 }
1029 return Status::OK();
1030 }
1031
AutoContrast(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & cutoff,const std::vector<uint32_t> & ignore)1032 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &cutoff,
1033 const std::vector<uint32_t> &ignore) {
1034 try {
1035 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1036 if (!input_cv->mat().data) {
1037 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AutoContrast: load image failed.");
1038 }
1039 if (input_cv->Rank() != DEFAULT_IMAGE_RANK && input_cv->Rank() != MIN_IMAGE_DIMENSION) {
1040 RETURN_STATUS_UNEXPECTED("AutoContrast: image channel should be 1 or 3, but got: " +
1041 std::to_string(input_cv->Rank()));
1042 }
1043 // Reshape to extend dimension if rank is 2 for algorithm to work. then reshape output to be of rank 2 like input
1044 if (input_cv->Rank() == MIN_IMAGE_DIMENSION) {
1045 RETURN_IF_NOT_OK(input_cv->ExpandDim(MIN_IMAGE_DIMENSION));
1046 }
1047 // Get number of channels and image matrix
1048 std::size_t num_of_channels = input_cv->shape()[CHANNEL_INDEX];
1049 if (num_of_channels != MIN_IMAGE_CHANNELS && num_of_channels != DEFAULT_IMAGE_CHANNELS) {
1050 RETURN_STATUS_UNEXPECTED("AutoContrast: channel of input image should be 1 or 3, but got: " +
1051 std::to_string(num_of_channels));
1052 }
1053 cv::Mat image = input_cv->mat();
1054 // Separate the image to channels
1055 std::vector<cv::Mat> planes(num_of_channels);
1056 cv::split(image, planes);
1057 cv::Mat b_hist, g_hist, r_hist;
1058 // Establish the number of bins and set variables for histogram
1059 int32_t hist_size = 256;
1060 int32_t channels = 0;
1061 float range[] = {0, 256};
1062 const float *hist_range[] = {range};
1063 bool uniform = true, accumulate = false;
1064 // Set up lookup table for LUT(Look up table algorithm)
1065 std::vector<int32_t> table;
1066 std::vector<cv::Mat> image_result;
1067 for (std::size_t layer = 0; layer < planes.size(); layer++) {
1068 // Reset lookup table
1069 table = std::vector<int32_t>{};
1070 // Calculate Histogram for channel
1071 cv::Mat hist;
1072 cv::calcHist(&planes[layer], 1, &channels, cv::Mat(), hist, 1, &hist_size, hist_range, uniform, accumulate);
1073 hist.convertTo(hist, CV_32SC1);
1074 std::vector<int32_t> hist_vec;
1075 hist.col(0).copyTo(hist_vec);
1076 // Ignore values in ignore
1077 for (const auto &item : ignore) hist_vec[item] = 0;
1078 int32_t hi = 255;
1079 int32_t lo = 0;
1080 RETURN_IF_NOT_OK(ComputeUpperAndLowerPercentiles(&hist_vec, cutoff, cutoff, &hi, &lo));
1081 if (hi <= lo) {
1082 for (int32_t i = 0; i < 256; i++) {
1083 table.push_back(i);
1084 }
1085 } else {
1086 const float scale = 255.0 / (hi - lo);
1087 const float offset = -1 * lo * scale;
1088 for (int32_t i = 0; i < 256; i++) {
1089 int32_t ix = static_cast<int32_t>(i * scale + offset);
1090 ix = std::max(ix, 0);
1091 ix = std::min(ix, MAX_BIT_VALUE);
1092 table.push_back(ix);
1093 }
1094 }
1095 cv::Mat result_layer;
1096 cv::LUT(planes[layer], table, result_layer);
1097 image_result.push_back(result_layer);
1098 }
1099 cv::Mat result;
1100 cv::merge(image_result, result);
1101 result.convertTo(result, input_cv->mat().type());
1102 std::shared_ptr<CVTensor> output_cv;
1103 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1104 (*output) = std::static_pointer_cast<Tensor>(output_cv);
1105 RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape()));
1106 } catch (const cv::Exception &e) {
1107 RETURN_STATUS_UNEXPECTED("AutoContrast: " + std::string(e.what()));
1108 }
1109 return Status::OK();
1110 }
1111
AdjustSaturation(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & alpha)1112 Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha) {
1113 try {
1114 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1115 cv::Mat input_img = input_cv->mat();
1116 if (!input_cv->mat().data) {
1117 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustSaturation: load image failed.");
1118 }
1119 CHECK_FAIL_RETURN_UNEXPECTED(
1120 input_cv->shape().Size() > CHANNEL_INDEX,
1121 "AdjustSaturation: image rank should not bigger than: " + std::to_string(CHANNEL_INDEX) +
1122 ", but got: " + std::to_string(input_cv->shape().Size()));
1123 int num_channels = input_cv->shape()[CHANNEL_INDEX];
1124 if (input_cv->Rank() != DEFAULT_IMAGE_RANK || num_channels != DEFAULT_IMAGE_CHANNELS) {
1125 RETURN_STATUS_UNEXPECTED("AdjustSaturation: image shape is not <H,W,C> or channel is not 3, but got rank: " +
1126 std::to_string(input_cv->Rank()) + ", and channel: " + std::to_string(num_channels));
1127 }
1128 std::shared_ptr<CVTensor> output_cv;
1129 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1130 cv::Mat output_img = output_cv->mat();
1131 cv::Mat gray;
1132 cv::cvtColor(input_img, gray, CV_RGB2GRAY);
1133 cv::cvtColor(gray, output_img, CV_GRAY2RGB);
1134 output_cv->mat() = output_img * (1.0 - alpha) + input_img * alpha;
1135 *output = std::static_pointer_cast<Tensor>(output_cv);
1136 } catch (const cv::Exception &e) {
1137 RETURN_STATUS_UNEXPECTED("AdjustSaturation: " + std::string(e.what()));
1138 }
1139 return Status::OK();
1140 }
1141
AdjustHue(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const float & hue)1142 Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &hue) {
1143 if (hue > 0.5 || hue < -0.5) {
1144 RETURN_STATUS_UNEXPECTED("AdjustHue: invalid parameter, hue should within [-0.5, 0.5], but got: " +
1145 std::to_string(hue));
1146 }
1147 try {
1148 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1149 cv::Mat input_img = input_cv->mat();
1150 if (!input_cv->mat().data) {
1151 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustHue: load image failed.");
1152 }
1153 CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > 2,
1154 "AdjustHue: image rank should not bigger than:" + std::to_string(2) +
1155 ", but got: " + std::to_string(input_cv->shape().Size()));
1156 int num_channels = input_cv->shape()[2];
1157 if (input_cv->Rank() != DEFAULT_IMAGE_RANK || num_channels != DEFAULT_IMAGE_CHANNELS) {
1158 RETURN_STATUS_UNEXPECTED("AdjustHue: image shape is not <H,W,C> or channel is not 3, but got rank: " +
1159 std::to_string(input_cv->Rank()) + ", and channel: " + std::to_string(num_channels));
1160 }
1161 std::shared_ptr<CVTensor> output_cv;
1162 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1163 cv::Mat output_img;
1164 cv::cvtColor(input_img, output_img, CV_RGB2HSV_FULL);
1165 for (int y = 0; y < output_img.cols; y++) {
1166 for (int x = 0; x < output_img.rows; x++) {
1167 uint8_t cur1 = output_img.at<cv::Vec3b>(cv::Point(y, x))[0];
1168 uint8_t h_hue = 0;
1169 h_hue = static_cast<uint8_t>(hue * MAX_BIT_VALUE);
1170 cur1 += h_hue;
1171 output_img.at<cv::Vec3b>(cv::Point(y, x))[0] = cur1;
1172 }
1173 }
1174 cv::cvtColor(output_img, output_cv->mat(), CV_HSV2RGB_FULL);
1175 *output = std::static_pointer_cast<Tensor>(output_cv);
1176 } catch (const cv::Exception &e) {
1177 RETURN_STATUS_UNEXPECTED("AdjustHue: " + std::string(e.what()));
1178 }
1179 return Status::OK();
1180 }
1181
Equalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1182 Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1183 try {
1184 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1185 if (!input_cv->mat().data) {
1186 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Equalize: load image failed.");
1187 }
1188 if (input_cv->Rank() != DEFAULT_IMAGE_RANK && input_cv->Rank() != MIN_IMAGE_DIMENSION) {
1189 RETURN_STATUS_UNEXPECTED("Equalize: image rank should be 1 or 3, but got: " + std::to_string(input_cv->Rank()));
1190 }
1191 // For greyscale images, extend dimension if rank is 2 and reshape output to be of rank 2.
1192 if (input_cv->Rank() == MIN_IMAGE_DIMENSION) {
1193 RETURN_IF_NOT_OK(input_cv->ExpandDim(MIN_IMAGE_DIMENSION));
1194 }
1195 // Get number of channels and image matrix
1196 std::size_t num_of_channels = input_cv->shape()[CHANNEL_INDEX];
1197 if (num_of_channels != MIN_IMAGE_CHANNELS && num_of_channels != DEFAULT_IMAGE_CHANNELS) {
1198 RETURN_STATUS_UNEXPECTED("Equalize: channel of input image should be 1 or 3, but got: " +
1199 std::to_string(num_of_channels));
1200 }
1201 cv::Mat image = input_cv->mat();
1202 // Separate the image to channels
1203 std::vector<cv::Mat> planes(num_of_channels);
1204 cv::split(image, planes);
1205 // Equalize each channel separately
1206 std::vector<cv::Mat> image_result;
1207 for (std::size_t layer = 0; layer < planes.size(); layer++) {
1208 cv::Mat channel_result;
1209 cv::equalizeHist(planes[layer], channel_result);
1210 image_result.push_back(channel_result);
1211 }
1212 cv::Mat result;
1213 cv::merge(image_result, result);
1214 std::shared_ptr<CVTensor> output_cv;
1215 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1216 (*output) = std::static_pointer_cast<Tensor>(output_cv);
1217 RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape()));
1218 } catch (const cv::Exception &e) {
1219 RETURN_STATUS_UNEXPECTED("Equalize: " + std::string(e.what()));
1220 }
1221 return Status::OK();
1222 }
1223
Erase(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t box_height,int32_t box_width,int32_t num_patches,bool bounded,bool random_color,std::mt19937 * rnd,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)1224 Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t box_height,
1225 int32_t box_width, int32_t num_patches, bool bounded, bool random_color, std::mt19937 *rnd, uint8_t fill_r,
1226 uint8_t fill_g, uint8_t fill_b) {
1227 try {
1228 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1229 CHECK_FAIL_RETURN_UNEXPECTED(input_cv->shape().Size() > CHANNEL_INDEX, "Erase: shape is invalid.");
1230 int num_channels = input_cv->shape()[CHANNEL_INDEX];
1231 if (input_cv->mat().data == nullptr) {
1232 RETURN_STATUS_UNEXPECTED("[Internal ERROR] CutOut: load image failed.");
1233 }
1234 if (input_cv->Rank() != DEFAULT_IMAGE_RANK || num_channels != DEFAULT_IMAGE_CHANNELS) {
1235 RETURN_STATUS_UNEXPECTED("CutOut: image shape is not <H,W,C> or channel is not 3, but got rank: " +
1236 std::to_string(input_cv->Rank()) + ", and channel: " + std::to_string(num_channels));
1237 }
1238 cv::Mat input_img = input_cv->mat();
1239 int32_t image_h = input_cv->shape()[0];
1240 int32_t image_w = input_cv->shape()[1];
1241 // check if erase size is bigger than image itself
1242 if (box_height > image_h || box_width > image_w) {
1243 RETURN_STATUS_UNEXPECTED(
1244 "CutOut: box size is too large for image erase, got box height: " + std::to_string(box_height) +
1245 "box weight: " + std::to_string(box_width) + ", and image height: " + std::to_string(image_h) +
1246 ", image width: " + std::to_string(image_w));
1247 }
1248
1249 // for random color
1250 std::normal_distribution<double> normal_distribution(0, 1);
1251 std::uniform_int_distribution<int> height_distribution_bound(0, image_h - box_height);
1252 std::uniform_int_distribution<int> width_distribution_bound(0, image_w - box_width);
1253 std::uniform_int_distribution<int> height_distribution_unbound(0, image_h + box_height);
1254 std::uniform_int_distribution<int> width_distribution_unbound(0, image_w + box_width);
1255 // core logic
1256 // update values based on random erasing or cutout
1257
1258 for (int32_t i = 0; i < num_patches; i++) {
1259 // rows in cv mat refers to the height of the cropped box
1260 // we determine h_start and w_start using two different distributions as erasing is used by two different
1261 // image augmentations. The bounds are also different in each case.
1262 int32_t h_start = (bounded) ? height_distribution_bound(*rnd) : (height_distribution_unbound(*rnd) - box_height);
1263 int32_t w_start = (bounded) ? width_distribution_bound(*rnd) : (width_distribution_unbound(*rnd) - box_width);
1264
1265 int32_t max_width = (w_start + box_width > image_w) ? image_w : w_start + box_width;
1266 int32_t max_height = (h_start + box_height > image_h) ? image_h : h_start + box_height;
1267 // check for starting range >= 0, here the start range is checked after for cut out, for random erasing
1268 // w_start and h_start will never be less than 0.
1269 h_start = (h_start < 0) ? 0 : h_start;
1270 w_start = (w_start < 0) ? 0 : w_start;
1271 for (int y = w_start; y < max_width; y++) {
1272 for (int x = h_start; x < max_height; x++) {
1273 if (random_color) {
1274 // fill each box with a random value
1275 input_img.at<cv::Vec3b>(cv::Point(y, x))[0] = static_cast<int32_t>(normal_distribution(*rnd));
1276 input_img.at<cv::Vec3b>(cv::Point(y, x))[1] = static_cast<int32_t>(normal_distribution(*rnd));
1277 input_img.at<cv::Vec3b>(cv::Point(y, x))[2] = static_cast<int32_t>(normal_distribution(*rnd));
1278 } else {
1279 input_img.at<cv::Vec3b>(cv::Point(y, x))[0] = fill_r;
1280 input_img.at<cv::Vec3b>(cv::Point(y, x))[1] = fill_g;
1281 input_img.at<cv::Vec3b>(cv::Point(y, x))[2] = fill_b;
1282 }
1283 }
1284 }
1285 }
1286 *output = std::static_pointer_cast<Tensor>(input);
1287 return Status::OK();
1288 } catch (const cv::Exception &e) {
1289 RETURN_STATUS_UNEXPECTED("CutOut: " + std::string(e.what()));
1290 }
1291 }
1292
Pad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const int32_t & pad_top,const int32_t & pad_bottom,const int32_t & pad_left,const int32_t & pad_right,const BorderType & border_types,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)1293 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
1294 const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
1295 uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) {
1296 try {
1297 // input image
1298 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1299
1300 // validate rank
1301 if (input_cv->Rank() == 1 || input_cv->mat().dims > MIN_IMAGE_DIMENSION) {
1302 RETURN_STATUS_UNEXPECTED("Pad: input shape is not <H,W,C> or <H, W>, got rank: " +
1303 std::to_string(input_cv->Rank()));
1304 }
1305
1306 // get the border type in openCV
1307 auto b_type = GetCVBorderType(border_types);
1308 // output image
1309 cv::Mat out_image;
1310 if (b_type == cv::BORDER_CONSTANT) {
1311 cv::Scalar fill_color = cv::Scalar(fill_b, fill_g, fill_r);
1312 cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type, fill_color);
1313 } else {
1314 cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type);
1315 }
1316 std::shared_ptr<CVTensor> output_cv;
1317 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, input_cv->Rank(), &output_cv));
1318 // pad the dimension if shape information is only 2 dimensional, this is grayscale
1319 int num_channels = input_cv->shape()[CHANNEL_INDEX];
1320 if (input_cv->Rank() == DEFAULT_IMAGE_RANK && num_channels == MIN_IMAGE_CHANNELS &&
1321 output_cv->Rank() == MIN_IMAGE_DIMENSION)
1322 RETURN_IF_NOT_OK(output_cv->ExpandDim(CHANNEL_INDEX));
1323 *output = std::static_pointer_cast<Tensor>(output_cv);
1324 return Status::OK();
1325 } catch (const cv::Exception &e) {
1326 RETURN_STATUS_UNEXPECTED("Pad: " + std::string(e.what()));
1327 }
1328 }
1329
RgbaToRgb(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1330 Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1331 try {
1332 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
1333 int num_channels = input_cv->shape()[CHANNEL_INDEX];
1334 if (input_cv->shape().Size() != DEFAULT_IMAGE_CHANNELS || num_channels != 4) {
1335 std::string err_msg = "RgbaToRgb: rank of image is not: " + std::to_string(DEFAULT_IMAGE_CHANNELS) +
1336 ", but got: " + std::to_string(input_cv->shape().Size()) +
1337 ", or channels of image should be 4, but got: " + std::to_string(num_channels);
1338 RETURN_STATUS_UNEXPECTED(err_msg);
1339 }
1340 TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3});
1341 std::shared_ptr<CVTensor> output_cv;
1342 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1343 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGBA2RGB));
1344 *output = std::static_pointer_cast<Tensor>(output_cv);
1345 return Status::OK();
1346 } catch (const cv::Exception &e) {
1347 RETURN_STATUS_UNEXPECTED("RgbaToRgb: " + std::string(e.what()));
1348 }
1349 }
1350
RgbaToBgr(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1351 Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1352 try {
1353 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
1354 int num_channels = input_cv->shape()[CHANNEL_INDEX];
1355 if (input_cv->shape().Size() != DEFAULT_IMAGE_CHANNELS || num_channels != 4) {
1356 std::string err_msg = "RgbaToBgr: rank of image is not: " + std::to_string(DEFAULT_IMAGE_CHANNELS) +
1357 ", but got: " + std::to_string(input_cv->shape().Size()) +
1358 ", or channels of image should be 4, but got: " + std::to_string(num_channels);
1359 RETURN_STATUS_UNEXPECTED(err_msg);
1360 }
1361 TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3});
1362 std::shared_ptr<CVTensor> output_cv;
1363 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1364 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGBA2BGR));
1365 *output = std::static_pointer_cast<Tensor>(output_cv);
1366 return Status::OK();
1367 } catch (const cv::Exception &e) {
1368 RETURN_STATUS_UNEXPECTED("RgbaToBgr: " + std::string(e.what()));
1369 }
1370 }
1371
RgbToBgr(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1372 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1373 try {
1374 auto input_type = input->type();
1375 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1376 if (!input_cv->mat().data) {
1377 RETURN_STATUS_UNEXPECTED("[Internal ERROR] RgbToBgr: load image failed.");
1378 }
1379 if (input_cv->Rank() != 3 || input_cv->shape()[2] != 3) {
1380 RETURN_STATUS_UNEXPECTED("RgbToBgr: input tensor is not in shape of <H,W,C> or channel is not 3, got rank: " +
1381 std::to_string(input_cv->Rank()) +
1382 ", and channel: " + std::to_string(input_cv->shape()[2]));
1383 }
1384
1385 cv::Mat image = input_cv->mat().clone();
1386 if (input_type == DataType::DE_FLOAT16 || input_type == DataType::DE_INT16 || input_type == DataType::DE_UINT16) {
1387 for (int i = 0; i < input_cv->mat().rows; ++i) {
1388 cv::Vec3s *p1 = input_cv->mat().ptr<cv::Vec3s>(i);
1389 cv::Vec3s *p2 = image.ptr<cv::Vec3s>(i);
1390 for (int j = 0; j < input_cv->mat().cols; ++j) {
1391 p2[j][2] = p1[j][0];
1392 p2[j][1] = p1[j][1];
1393 p2[j][0] = p1[j][2];
1394 }
1395 }
1396 } else if (input_type == DataType::DE_FLOAT32 || input_type == DataType::DE_INT32) {
1397 for (int i = 0; i < input_cv->mat().rows; ++i) {
1398 cv::Vec3f *p1 = input_cv->mat().ptr<cv::Vec3f>(i);
1399 cv::Vec3f *p2 = image.ptr<cv::Vec3f>(i);
1400 for (int j = 0; j < input_cv->mat().cols; ++j) {
1401 p2[j][2] = p1[j][0];
1402 p2[j][1] = p1[j][1];
1403 p2[j][0] = p1[j][2];
1404 }
1405 }
1406 } else if (input_type == DataType::DE_FLOAT64) {
1407 for (int i = 0; i < input_cv->mat().rows; ++i) {
1408 cv::Vec3d *p1 = input_cv->mat().ptr<cv::Vec3d>(i);
1409 cv::Vec3d *p2 = image.ptr<cv::Vec3d>(i);
1410 for (int j = 0; j < input_cv->mat().cols; ++j) {
1411 p2[j][2] = p1[j][0];
1412 p2[j][1] = p1[j][1];
1413 p2[j][0] = p1[j][2];
1414 }
1415 }
1416 } else {
1417 cv::cvtColor(input_cv->mat(), image, cv::COLOR_RGB2BGR);
1418 }
1419
1420 std::shared_ptr<CVTensor> output_cv;
1421 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(image, input_cv->Rank(), &output_cv));
1422
1423 *output = std::static_pointer_cast<Tensor>(output_cv);
1424 return Status::OK();
1425 } catch (const cv::Exception &e) {
1426 RETURN_STATUS_UNEXPECTED("RgbToBgr: " + std::string(e.what()));
1427 }
1428 }
1429
RgbToGray(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1430 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1431 try {
1432 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
1433 if (input_cv->Rank() != DEFAULT_IMAGE_RANK || input_cv->shape()[CHANNEL_INDEX] != DEFAULT_IMAGE_CHANNELS) {
1434 RETURN_STATUS_UNEXPECTED(
1435 "RgbToGray: image shape is not <H,W,C> or channel is not 3, got rank: " + std::to_string(input_cv->Rank()) +
1436 ", and channel: " + std::to_string(input_cv->shape()[2]));
1437 }
1438 TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1]});
1439 std::shared_ptr<CVTensor> output_cv;
1440 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1441 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGB2GRAY));
1442 *output = std::static_pointer_cast<Tensor>(output_cv);
1443 return Status::OK();
1444 } catch (const cv::Exception &e) {
1445 RETURN_STATUS_UNEXPECTED("RgbToGray: " + std::string(e.what()));
1446 }
1447 }
1448
GetJpegImageInfo(const std::shared_ptr<Tensor> & input,int * img_width,int * img_height)1449 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height) {
1450 struct jpeg_decompress_struct cinfo {};
1451 struct JpegErrorManagerCustom jerr {};
1452 cinfo.err = jpeg_std_error(&jerr.pub);
1453 jerr.pub.error_exit = JpegErrorExitCustom;
1454 try {
1455 jpeg_create_decompress(&cinfo);
1456 JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
1457 (void)jpeg_read_header(&cinfo, TRUE);
1458 jpeg_calc_output_dimensions(&cinfo);
1459 } catch (std::runtime_error &e) {
1460 jpeg_destroy_decompress(&cinfo);
1461 RETURN_STATUS_UNEXPECTED(e.what());
1462 }
1463 *img_height = cinfo.output_height;
1464 *img_width = cinfo.output_width;
1465 jpeg_destroy_decompress(&cinfo);
1466 return Status::OK();
1467 }
1468
Affine(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::vector<float_t> & mat,InterpolationMode interpolation,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)1469 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::vector<float_t> &mat,
1470 InterpolationMode interpolation, uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) {
1471 try {
1472 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1473 RETURN_IF_NOT_OK(ValidateImageRank("Affine", input_cv->Rank()));
1474
1475 cv::Mat affine_mat(mat);
1476 affine_mat = affine_mat.reshape(1, {2, 3});
1477
1478 std::shared_ptr<CVTensor> output_cv;
1479 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1480 RETURN_UNEXPECTED_IF_NULL(output_cv);
1481 cv::warpAffine(input_cv->mat(), output_cv->mat(), affine_mat, input_cv->mat().size(),
1482 GetCVInterpolationMode(interpolation), cv::BORDER_CONSTANT, cv::Scalar(fill_r, fill_g, fill_b));
1483 (*output) = std::static_pointer_cast<Tensor>(output_cv);
1484 return Status::OK();
1485 } catch (const cv::Exception &e) {
1486 RETURN_STATUS_UNEXPECTED("Affine: " + std::string(e.what()));
1487 }
1488 }
1489
GaussianBlur(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t kernel_x,int32_t kernel_y,float sigma_x,float sigma_y)1490 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_x,
1491 int32_t kernel_y, float sigma_x, float sigma_y) {
1492 try {
1493 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1494 if (input_cv->mat().data == nullptr) {
1495 RETURN_STATUS_UNEXPECTED("[Internal ERROR] GaussianBlur: load image failed.");
1496 }
1497 cv::Mat output_cv_mat;
1498 cv::GaussianBlur(input_cv->mat(), output_cv_mat, cv::Size(kernel_x, kernel_y), static_cast<double>(sigma_x),
1499 static_cast<double>(sigma_y));
1500 std::shared_ptr<CVTensor> output_cv;
1501 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, input_cv->Rank(), &output_cv));
1502 (*output) = std::static_pointer_cast<Tensor>(output_cv);
1503 return Status::OK();
1504 } catch (const cv::Exception &e) {
1505 RETURN_STATUS_UNEXPECTED("GaussianBlur: " + std::string(e.what()));
1506 }
1507 }
1508
ComputePatchSize(const std::shared_ptr<CVTensor> & input_cv,std::shared_ptr<std::pair<int32_t,int32_t>> * patch_size,int32_t num_height,int32_t num_width,SliceMode slice_mode)1509 Status ComputePatchSize(const std::shared_ptr<CVTensor> &input_cv,
1510 std::shared_ptr<std::pair<int32_t, int32_t>> *patch_size, int32_t num_height, int32_t num_width,
1511 SliceMode slice_mode) {
1512 if (input_cv->mat().data == nullptr) {
1513 RETURN_STATUS_UNEXPECTED("[Internal ERROR] SlicePatches: Tensor could not convert to CV Tensor.");
1514 }
1515 RETURN_IF_NOT_OK(ValidateImageRank("Affine", input_cv->Rank()));
1516
1517 cv::Mat in_img = input_cv->mat();
1518 cv::Size s = in_img.size();
1519 if (num_height == 0 || num_height > s.height) {
1520 RETURN_STATUS_UNEXPECTED(
1521 "SlicePatches: The number of patches on height axis equals 0 or is greater than height, got number of patches:" +
1522 std::to_string(num_height));
1523 }
1524 if (num_width == 0 || num_width > s.width) {
1525 RETURN_STATUS_UNEXPECTED(
1526 "SlicePatches: The number of patches on width axis equals 0 or is greater than width, got number of patches:" +
1527 std::to_string(num_width));
1528 }
1529 int32_t patch_h = s.height / num_height;
1530 if (s.height % num_height != 0) {
1531 if (slice_mode == SliceMode::kPad) {
1532 patch_h += 1; // patch_h * num_height - s.height
1533 }
1534 }
1535 int32_t patch_w = s.width / num_width;
1536 if (s.width % num_width != 0) {
1537 if (slice_mode == SliceMode::kPad) {
1538 patch_w += 1; // patch_w * num_width - s.width
1539 }
1540 }
1541 (*patch_size)->first = patch_h;
1542 (*patch_size)->second = patch_w;
1543 return Status::OK();
1544 }
1545
SlicePatches(const std::shared_ptr<Tensor> & input,std::vector<std::shared_ptr<Tensor>> * output,int32_t num_height,int32_t num_width,SliceMode slice_mode,uint8_t fill_value)1546 Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::shared_ptr<Tensor>> *output,
1547 int32_t num_height, int32_t num_width, SliceMode slice_mode, uint8_t fill_value) {
1548 if (num_height == DEFAULT_NUM_HEIGHT && num_width == DEFAULT_NUM_WIDTH) {
1549 (*output).push_back(input);
1550 return Status::OK();
1551 }
1552
1553 auto patch_size = std::make_shared<std::pair<int32_t, int32_t>>(0, 0);
1554 int32_t patch_h = 0;
1555 int32_t patch_w = 0;
1556
1557 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1558 RETURN_IF_NOT_OK(ComputePatchSize(input_cv, &patch_size, num_height, num_width, slice_mode));
1559 std::tie(patch_h, patch_w) = *patch_size;
1560
1561 cv::Mat in_img = input_cv->mat();
1562 cv::Size s = in_img.size();
1563 try {
1564 cv::Mat out_img;
1565 if (slice_mode == SliceMode::kPad) { // padding on right and bottom directions
1566 auto padding_h = patch_h * num_height - s.height;
1567 auto padding_w = patch_w * num_width - s.width;
1568 out_img = cv::Mat(s.height + padding_h, s.width + padding_w, in_img.type(), cv::Scalar::all(fill_value));
1569 in_img.copyTo(out_img(cv::Rect(0, 0, s.width, s.height)));
1570 } else {
1571 out_img = in_img;
1572 }
1573 for (int i = 0; i < num_height; ++i) {
1574 for (int j = 0; j < num_width; ++j) {
1575 std::shared_ptr<CVTensor> patch_cv;
1576 cv::Rect rect(j * patch_w, i * patch_h, patch_w, patch_h);
1577 cv::Mat patch(out_img(rect));
1578 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(patch, input_cv->Rank(), &patch_cv));
1579 (*output).push_back(std::static_pointer_cast<Tensor>(patch_cv));
1580 }
1581 }
1582 return Status::OK();
1583 } catch (const cv::Exception &e) {
1584 RETURN_STATUS_UNEXPECTED("SlicePatches: " + std::string(e.what()));
1585 }
1586 }
1587
ValidateImageRank(const std::string & op_name,int32_t rank)1588 Status ValidateImageRank(const std::string &op_name, int32_t rank) {
1589 if (rank != 2 && rank != 3) {
1590 std::string err_msg = op_name + ": image shape is not <H,W,C> or <H, W>, but got rank:" + std::to_string(rank);
1591 RETURN_STATUS_UNEXPECTED(err_msg);
1592 }
1593 return Status::OK();
1594 }
1595 } // namespace dataset
1596 } // namespace mindspore
1597