1 /**
2 * Copyright 2020-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/kernels/image/image_utils.h"
17
18 #include <opencv2/imgproc/types_c.h>
19
20 #include <algorithm>
21 #include <fstream>
22 #include <limits>
23 #include <stdexcept>
24 #include <string>
25 #include <vector>
26
27 #include <opencv2/imgcodecs.hpp>
28
29 #include "minddata/dataset/core/cv_tensor.h"
30 #include "minddata/dataset/core/tensor.h"
31 #include "minddata/dataset/core/tensor_shape.h"
32 #include "minddata/dataset/include/dataset/constants.h"
33 #include "minddata/dataset/kernels/data/data_utils.h"
34 #include "minddata/dataset/kernels/image/affine_op.h"
35 #include "minddata/dataset/kernels/image/invert_op.h"
36 #include "minddata/dataset/kernels/image/math_utils.h"
37 #include "minddata/dataset/kernels/image/posterize_op.h"
38 #include "minddata/dataset/kernels/image/resize_cubic_op.h"
39 #include "minddata/dataset/kernels/image/sharpness_op.h"
40 #include "utils/file_utils.h"
41 #include "utils/ms_utils.h"
42
43 const int32_t MAX_INT_PRECISION = 16777216; // float int precision is 16777216
44 const int32_t DOUBLING_FACTOR = 2; // used as multiplier with MAX_INT_PRECISION
45 const int32_t DEFAULT_NUM_HEIGHT = 1;
46 const int32_t DEFAULT_NUM_WIDTH = 1;
47
48 namespace mindspore {
49 namespace dataset {
GetCVInterpolationMode(InterpolationMode mode)50 int GetCVInterpolationMode(InterpolationMode mode) {
51 switch (mode) {
52 case InterpolationMode::kLinear:
53 return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
54 case InterpolationMode::kCubic:
55 return static_cast<int>(cv::InterpolationFlags::INTER_CUBIC);
56 case InterpolationMode::kArea:
57 return static_cast<int>(cv::InterpolationFlags::INTER_AREA);
58 case InterpolationMode::kNearestNeighbour:
59 return static_cast<int>(cv::InterpolationFlags::INTER_NEAREST);
60 default:
61 return static_cast<int>(cv::InterpolationFlags::INTER_LINEAR);
62 }
63 }
64
GetCVBorderType(BorderType type)65 int GetCVBorderType(BorderType type) {
66 switch (type) {
67 case BorderType::kConstant:
68 return static_cast<int>(cv::BorderTypes::BORDER_CONSTANT);
69 case BorderType::kEdge:
70 return static_cast<int>(cv::BorderTypes::BORDER_REPLICATE);
71 case BorderType::kReflect:
72 return static_cast<int>(cv::BorderTypes::BORDER_REFLECT101);
73 case BorderType::kSymmetric:
74 return static_cast<int>(cv::BorderTypes::BORDER_REFLECT);
75 default:
76 return static_cast<int>(cv::BorderTypes::BORDER_CONSTANT);
77 }
78 }
79
GetConvertShape(ConvertMode convert_mode,const std::shared_ptr<CVTensor> & input_cv,std::vector<dsize_t> * node)80 Status GetConvertShape(ConvertMode convert_mode, const std::shared_ptr<CVTensor> &input_cv,
81 std::vector<dsize_t> *node) {
82 RETURN_UNEXPECTED_IF_NULL(node);
83 std::vector<ConvertMode> one_channels = {ConvertMode::COLOR_BGR2GRAY, ConvertMode::COLOR_RGB2GRAY,
84 ConvertMode::COLOR_BGRA2GRAY, ConvertMode::COLOR_RGBA2GRAY};
85 std::vector<ConvertMode> three_channels = {
86 ConvertMode::COLOR_BGRA2BGR, ConvertMode::COLOR_RGBA2RGB, ConvertMode::COLOR_RGBA2BGR, ConvertMode::COLOR_BGRA2RGB,
87 ConvertMode::COLOR_BGR2RGB, ConvertMode::COLOR_RGB2BGR, ConvertMode::COLOR_GRAY2BGR, ConvertMode::COLOR_GRAY2RGB};
88 std::vector<ConvertMode> four_channels = {ConvertMode::COLOR_BGR2BGRA, ConvertMode::COLOR_RGB2RGBA,
89 ConvertMode::COLOR_BGR2RGBA, ConvertMode::COLOR_RGB2BGRA,
90 ConvertMode::COLOR_BGRA2RGBA, ConvertMode::COLOR_RGBA2BGRA,
91 ConvertMode::COLOR_GRAY2BGRA, ConvertMode::COLOR_GRAY2RGBA};
92 if (std::find(three_channels.begin(), three_channels.end(), convert_mode) != three_channels.end()) {
93 *node = {input_cv->shape()[0], input_cv->shape()[1], 3};
94 } else if (std::find(four_channels.begin(), four_channels.end(), convert_mode) != four_channels.end()) {
95 *node = {input_cv->shape()[0], input_cv->shape()[1], 4};
96 } else if (std::find(one_channels.begin(), one_channels.end(), convert_mode) != one_channels.end()) {
97 *node = {input_cv->shape()[0], input_cv->shape()[1]};
98 } else {
99 RETURN_STATUS_UNEXPECTED(
100 "The mode of image channel conversion must be in ConvertMode, which mainly includes "
101 "conversion between RGB, BGR, GRAY, RGBA etc.");
102 }
103 return Status::OK();
104 }
105
ImageNumChannels(const std::shared_ptr<Tensor> & image,dsize_t * channels)106 Status ImageNumChannels(const std::shared_ptr<Tensor> &image, dsize_t *channels) {
107 RETURN_UNEXPECTED_IF_NULL(channels);
108 if (image->Rank() < kMinImageRank) {
109 RETURN_STATUS_UNEXPECTED(
110 "GetImageNumChannels: invalid parameter, image should have at least two dimensions, but got: " +
111 std::to_string(image->Rank()));
112 } else if (image->Rank() == kMinImageRank) {
113 *channels = 1;
114 } else {
115 *channels = image->shape()[-1];
116 }
117 return Status::OK();
118 }
119
ImageSize(const std::shared_ptr<Tensor> & image,std::vector<dsize_t> * size)120 Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *size) {
121 RETURN_UNEXPECTED_IF_NULL(size);
122 *size = std::vector<dsize_t>(kMinImageRank);
123 if (image->Rank() < kMinImageRank) {
124 RETURN_STATUS_UNEXPECTED("GetImageSize: invalid parameter, image should have at least two dimensions, but got: " +
125 std::to_string(image->Rank()));
126 } else if (image->Rank() == kMinImageRank) {
127 (*size)[0] = image->shape()[0];
128 (*size)[1] = image->shape()[1];
129 } else {
130 const int32_t kHeightIndexFromBack = -3;
131 const int32_t kWidthIndexFromBack = -2;
132 (*size)[0] = image->shape()[kHeightIndexFromBack];
133 (*size)[1] = image->shape()[kWidthIndexFromBack];
134 }
135 return Status::OK();
136 }
137
ValidateImage(const std::shared_ptr<Tensor> & image,const std::string & op_name,const std::set<uint8_t> & valid_dtype,const std::set<dsize_t> & valid_rank,const std::set<dsize_t> & valid_channel)138 Status ValidateImage(const std::shared_ptr<Tensor> &image, const std::string &op_name,
139 const std::set<uint8_t> &valid_dtype, const std::set<dsize_t> &valid_rank,
140 const std::set<dsize_t> &valid_channel) {
141 // Validate image dtype
142 if (!valid_dtype.empty()) {
143 auto dtype = image->type();
144 if (valid_dtype.find(dtype.value()) == valid_dtype.end()) {
145 std::string err_msg = op_name + ": the data type of image tensor does not match the requirement of operator.";
146 err_msg += " Expecting tensor in type of " + DataTypeSetToString(valid_dtype);
147 err_msg += ". But got type " + dtype.ToString() + ".";
148 RETURN_STATUS_UNEXPECTED(err_msg);
149 }
150 }
151 // Validate image rank
152 auto rank = image->Rank();
153 if (!valid_rank.empty()) {
154 if (valid_rank.find(rank) == valid_rank.end()) {
155 std::string err_msg = op_name + ": the dimension of image tensor does not match the requirement of operator.";
156 err_msg += " Expecting tensor in dimension of " + NumberSetToString(valid_rank);
157 if (valid_rank == std::set<dsize_t>({kMinImageRank, kDefaultImageRank})) {
158 err_msg += ", in shape of <H, W> or <H, W, C>";
159 } else if (valid_rank == std::set<dsize_t>({kMinImageRank})) {
160 err_msg += ", in shape of <H, W>";
161 } else if (valid_rank == std::set<dsize_t>({kDefaultImageRank})) {
162 err_msg += ", in shape of <H, W, C>";
163 }
164 err_msg += ". But got dimension " + std::to_string(rank) + ".";
165 if (rank == 1) {
166 err_msg += " You may need to perform Decode first.";
167 }
168 RETURN_STATUS_UNEXPECTED(err_msg);
169 }
170 } else {
171 if (rank < kMinImageRank) {
172 std::string err_msg =
173 op_name + ": the image tensor should have at least two dimensions. You may need to perform Decode first.";
174 RETURN_STATUS_UNEXPECTED(err_msg);
175 }
176 }
177 // Validate image channel
178 if (!valid_channel.empty()) {
179 dsize_t channel = 1;
180 RETURN_IF_NOT_OK(ImageNumChannels(image, &channel));
181 if (valid_channel.find(channel) == valid_channel.end()) {
182 std::string err_msg = op_name + ": the channel of image tensor does not match the requirement of operator.";
183 err_msg += " Expecting tensor in channel of " + NumberSetToString(valid_channel);
184 err_msg += ". But got channel " + std::to_string(channel) + ".";
185 RETURN_STATUS_UNEXPECTED(err_msg);
186 }
187 }
188 return Status::OK();
189 }
190
ValidateImageDtype(const std::string & op_name,DataType dtype)191 Status ValidateImageDtype(const std::string &op_name, DataType dtype) {
192 uint8_t type = dtype.AsCVType();
193 if (type == kCVInvalidType) {
194 std::string type_name = "unknown";
195 if (dtype.value() < DataType::NUM_OF_TYPES) {
196 type_name = std::string(DataType::kTypeInfo[dtype.value()].name_);
197 }
198 std::string err_msg = op_name + ": Cannot convert [" + type_name + "] to OpenCV type." +
199 " Currently unsupported data type: [uint32, int64, uint64, string]";
200 RETURN_STATUS_UNEXPECTED(err_msg);
201 }
202 return Status::OK();
203 }
204
ValidateImageRank(const std::string & op_name,int32_t rank)205 Status ValidateImageRank(const std::string &op_name, int32_t rank) {
206 if (rank != kMinImageRank && rank != kDefaultImageRank) {
207 std::string err_msg =
208 op_name + ": input tensor is not in shape of <H,W> or <H,W,C>, but got rank: " + std::to_string(rank);
209 if (rank == 1) {
210 err_msg = err_msg + ". You may need to perform Decode first.";
211 }
212 RETURN_STATUS_UNEXPECTED(err_msg);
213 }
214 return Status::OK();
215 }
216
CheckTensorShape(const std::shared_ptr<Tensor> & tensor,const int & channel)217 bool CheckTensorShape(const std::shared_ptr<Tensor> &tensor, const int &channel) {
218 if (tensor == nullptr) {
219 return false;
220 }
221 bool rc = false;
222 if (tensor->shape().Size() <= channel) {
223 return false;
224 }
225 if (tensor->Rank() != kDefaultImageRank ||
226 (tensor->shape()[channel] != 1 && tensor->shape()[channel] != kDefaultImageChannel)) {
227 rc = true;
228 }
229 return rc;
230 }
231
Flip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output,int flip_code)232 Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code) {
233 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
234 if (!input_cv->mat().data) {
235 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Flip: load image failed.");
236 }
237
238 std::shared_ptr<CVTensor> output_cv;
239 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
240
241 try {
242 cv::flip(input_cv->mat(), output_cv->mat(), flip_code);
243 *output = std::static_pointer_cast<Tensor>(output_cv);
244 } catch (const cv::Exception &e) {
245 RETURN_STATUS_UNEXPECTED("Flip: " + std::string(e.what()));
246 }
247 return Status::OK();
248 }
249
HorizontalFlip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)250 Status HorizontalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
251 return Flip(std::move(input), output, 1);
252 }
253
VerticalFlip(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)254 Status VerticalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
255 return Flip(std::move(input), output, 0);
256 }
257
Resize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t output_height,int32_t output_width,double fx,double fy,InterpolationMode mode)258 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height,
259 int32_t output_width, double fx, double fy, InterpolationMode mode) {
260 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
261 if (!input_cv->mat().data) {
262 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Resize: load image failed.");
263 }
264 RETURN_IF_NOT_OK(ValidateImageRank("Resize", input_cv->Rank()));
265
266 cv::Mat in_image = input_cv->mat();
267 const uint32_t kResizeShapeLimits = 1000;
268 // resize image too large or too small, 1000 is arbitrarily chosen here to prevent open cv from segmentation fault
269 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kResizeShapeLimits) > in_image.rows,
270 "Resize: in_image rows out of bounds.");
271 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kResizeShapeLimits) > in_image.cols,
272 "Resize: in_image cols out of bounds.");
273 if (output_height > in_image.rows * kResizeShapeLimits || output_width > in_image.cols * kResizeShapeLimits) {
274 RETURN_STATUS_ERROR(
275 StatusCode::kMDShapeMisMatch,
276 "Resize: the resizing width or height is too big, it's 1000 times bigger than the original image, got output "
277 "height: " +
278 std::to_string(output_height) + ", width: " + std::to_string(output_width) +
279 ", and original image size:" + std::to_string(in_image.rows) + ", " + std::to_string(in_image.cols));
280 }
281 if (output_height == 0 || output_width == 0) {
282 RETURN_STATUS_ERROR(StatusCode::kMDShapeMisMatch,
283 "Resize: the input value of 'resize' is invalid, width or height is zero.");
284 }
285
286 if (mode == InterpolationMode::kCubicPil) {
287 if (input_cv->shape().Size() != kDefaultImageChannel ||
288 input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
289 RETURN_STATUS_UNEXPECTED("Resize: Interpolation mode PILCUBIC only supports image with 3 channels, but got: " +
290 input_cv->shape().ToString());
291 }
292
293 LiteMat imIn, imOut;
294 std::shared_ptr<Tensor> output_tensor;
295 TensorShape new_shape = TensorShape({output_height, output_width, 3});
296 RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input_cv->type(), &output_tensor));
297 uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
298 imOut.Init(output_width, output_height, static_cast<int>(input_cv->shape()[kChannelIndexHWC]),
299 reinterpret_cast<void *>(buffer), LDataType::UINT8);
300 imIn.Init(static_cast<int>(input_cv->shape()[1]), static_cast<int>(input_cv->shape()[0]),
301 static_cast<int>(input_cv->shape()[kChannelIndexHWC]), input_cv->mat().data, LDataType::UINT8);
302 if (ResizeCubic(imIn, imOut, output_width, output_height) == false) {
303 RETURN_STATUS_UNEXPECTED("Resize: failed to do resize, please check the error msg.");
304 }
305 *output = output_tensor;
306 return Status::OK();
307 }
308 try {
309 TensorShape shape{output_height, output_width};
310 if (input_cv->Rank() == kDefaultImageRank) {
311 int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
312 shape = shape.AppendDim(num_channels);
313 }
314 std::shared_ptr<CVTensor> output_cv;
315 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
316
317 auto cv_mode = GetCVInterpolationMode(mode);
318 cv::resize(in_image, output_cv->mat(), cv::Size(output_width, output_height), fx, fy, cv_mode);
319 *output = std::static_pointer_cast<Tensor>(output_cv);
320 return Status::OK();
321 } catch (const cv::Exception &e) {
322 RETURN_STATUS_UNEXPECTED("Resize: " + std::string(e.what()));
323 }
324 }
325
326 const unsigned char kJpegMagic[] = "\xFF\xD8\xFF";
327 constexpr dsize_t kJpegMagicLen = 3;
328 const unsigned char kPngMagic[] = "\x89\x50\x4E\x47";
329 constexpr dsize_t kPngMagicLen = 4;
330
IsNonEmptyJPEG(const std::shared_ptr<Tensor> & input)331 bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input) {
332 if (input->type() == DataType::DE_BYTES) {
333 uint32_t len = 0;
334 if (input->GetStringLength(&len) != Status::OK()) {
335 MS_LOG(ERROR) << "Get string length from bytes field failed.";
336 return false;
337 }
338 return len > kJpegMagicLen && memcmp(input->GetStringsBuffer(), kJpegMagic, kJpegMagicLen) == 0;
339 }
340 return input->SizeInBytes() > kJpegMagicLen && memcmp(input->GetMutableBuffer(), kJpegMagic, kJpegMagicLen) == 0;
341 }
342
IsNonEmptyPNG(const std::shared_ptr<Tensor> & input)343 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input) {
344 if (input->type() == DataType::DE_BYTES) {
345 uint32_t len = 0;
346 if (input->GetStringLength(&len) != Status::OK()) {
347 MS_LOG(ERROR) << "Get string length from bytes field failed.";
348 return false;
349 }
350 return len > kPngMagicLen && memcmp(input->GetStringsBuffer(), kPngMagic, kPngMagicLen) == 0;
351 }
352 return input->SizeInBytes() > kPngMagicLen && memcmp(input->GetMutableBuffer(), kPngMagic, kPngMagicLen) == 0;
353 }
354
Decode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)355 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
356 RETURN_IF_NOT_OK(CheckUnsupportedImage(input));
357
358 if (input->type() == DataType::DE_BYTES && input->shape().NumOfElements() != 1) {
359 RETURN_STATUS_UNEXPECTED("Decode: couldn't decode bytes field with multi dims.");
360 }
361
362 Status ret;
363 if (IsNonEmptyJPEG(input)) {
364 ret = JpegCropAndDecode(input, output);
365 } else {
366 ret = DecodeCv(input, output);
367 }
368
369 // decode failed and dump it
370 if (ret != Status::OK()) {
371 return DumpImageAndAppendStatus(input, ret);
372 }
373 return ret;
374 }
375
DecodeCv(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)376 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
377 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
378 if (!input_cv->mat().data) {
379 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: load image failed.");
380 }
381 try {
382 cv::Mat img_mat = cv::imdecode(input_cv->mat(), cv::IMREAD_COLOR | cv::IMREAD_IGNORE_ORIENTATION);
383 if (img_mat.data == nullptr) {
384 std::string err = "Decode: image decode failed.";
385 RETURN_STATUS_UNEXPECTED(err);
386 }
387 cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB));
388 std::shared_ptr<CVTensor> output_cv;
389 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, 3, &output_cv));
390 *output = std::static_pointer_cast<Tensor>(output_cv);
391 return Status::OK();
392 } catch (const cv::Exception &e) {
393 RETURN_STATUS_UNEXPECTED("Decode: " + std::string(e.what()));
394 }
395 }
396
JpegInitSource(j_decompress_ptr cinfo)397 static void JpegInitSource(j_decompress_ptr cinfo) {}
398
JpegFillInputBuffer(j_decompress_ptr cinfo)399 static boolean JpegFillInputBuffer(j_decompress_ptr cinfo) {
400 if (cinfo->src->bytes_in_buffer == 0) {
401 // Under ARM platform raise runtime_error may cause core problem,
402 // so we catch runtime_error and just return FALSE.
403 try {
404 ERREXIT(cinfo, JERR_INPUT_EMPTY);
405 } catch (std::runtime_error &e) {
406 return FALSE;
407 }
408 return FALSE;
409 }
410 return TRUE;
411 }
412
JpegTermSource(j_decompress_ptr cinfo)413 static void JpegTermSource(j_decompress_ptr cinfo) {}
414
JpegSkipInputData(j_decompress_ptr cinfo,int64_t jump)415 static void JpegSkipInputData(j_decompress_ptr cinfo, int64_t jump) {
416 if (jump < 0) {
417 return;
418 }
419 if (static_cast<size_t>(jump) > cinfo->src->bytes_in_buffer) {
420 cinfo->src->bytes_in_buffer = 0;
421 return;
422 } else {
423 cinfo->src->bytes_in_buffer -= jump;
424 cinfo->src->next_input_byte += jump;
425 }
426 }
427
JpegSetSource(j_decompress_ptr cinfo,const void * data,int64_t datasize)428 void JpegSetSource(j_decompress_ptr cinfo, const void *data, int64_t datasize) {
429 cinfo->src = static_cast<struct jpeg_source_mgr *>(
430 (*cinfo->mem->alloc_small)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)));
431 cinfo->src->init_source = JpegInitSource;
432 cinfo->src->fill_input_buffer = JpegFillInputBuffer;
433 #if defined(_WIN32) || defined(_WIN64) || defined(ENABLE_ARM32) || defined(__APPLE__)
434 cinfo->src->skip_input_data = reinterpret_cast<void (*)(j_decompress_ptr, long)>(JpegSkipInputData);
435 #else
436 cinfo->src->skip_input_data = JpegSkipInputData;
437 #endif
438 cinfo->src->resync_to_restart = jpeg_resync_to_restart;
439 cinfo->src->term_source = JpegTermSource;
440 cinfo->src->bytes_in_buffer = datasize;
441 cinfo->src->next_input_byte = static_cast<const JOCTET *>(data);
442 }
443
444 thread_local std::vector<Status> jpeg_status;
445
CheckJpegExit(jpeg_decompress_struct * cinfo)446 Status CheckJpegExit(jpeg_decompress_struct *cinfo) {
447 if (!jpeg_status.empty()) {
448 jpeg_destroy_decompress(cinfo);
449 Status s = jpeg_status[0];
450 jpeg_status.clear();
451 return s;
452 }
453 return Status::OK();
454 }
455
JpegReadScanlines(jpeg_decompress_struct * const cinfo,int max_scanlines_to_read,JSAMPLE * buffer,int buffer_size,int crop_w,int crop_w_aligned,int offset,int stride)456 static Status JpegReadScanlines(jpeg_decompress_struct *const cinfo, int max_scanlines_to_read, JSAMPLE *buffer,
457 int buffer_size, int crop_w, int crop_w_aligned, int offset, int stride) {
458 // scanlines will be read to this buffer first, must have the number
459 // of components equal to the number of components in the image
460 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int64_t>::max() / cinfo->output_components) > crop_w_aligned,
461 "JpegReadScanlines: multiplication out of bounds.");
462 int64_t scanline_size = crop_w_aligned * cinfo->output_components;
463 std::vector<JSAMPLE> scanline(scanline_size);
464 JSAMPLE *scanline_ptr = &scanline[0];
465 while (cinfo->output_scanline < static_cast<unsigned int>(max_scanlines_to_read)) {
466 int num_lines_read = 0;
467 try {
468 num_lines_read = static_cast<int>(jpeg_read_scanlines(cinfo, &scanline_ptr, 1));
469 RETURN_IF_NOT_OK(CheckJpegExit(cinfo));
470 } catch (std::runtime_error &e) {
471 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: image decode failed.");
472 }
473 if (cinfo->out_color_space == JCS_CMYK && num_lines_read > 0) {
474 for (int i = 0; i < crop_w; ++i) {
475 const int cmyk_pixel = 4 * i + offset;
476 const int c = scanline_ptr[cmyk_pixel];
477 const int m = scanline_ptr[cmyk_pixel + 1];
478 const int y = scanline_ptr[cmyk_pixel + 2];
479 const int k = scanline_ptr[cmyk_pixel + 3];
480 int r, g, b;
481 if (cinfo->saw_Adobe_marker) {
482 r = (k * c) / kMaxBitValue;
483 g = (k * m) / kMaxBitValue;
484 b = (k * y) / kMaxBitValue;
485 } else {
486 r = (kMaxBitValue - c) * (kMaxBitValue - k) / kMaxBitValue;
487 g = (kMaxBitValue - m) * (kMaxBitValue - k) / kMaxBitValue;
488 b = (kMaxBitValue - y) * (kMaxBitValue - k) / kMaxBitValue;
489 }
490 buffer[kDefaultImageChannel * i + kRIndex] = r;
491 buffer[kDefaultImageChannel * i + kGIndex] = g;
492 buffer[kDefaultImageChannel * i + kBIndex] = b;
493 }
494 } else if (num_lines_read > 0) {
495 int copy_status = memcpy_s(buffer, buffer_size, scanline_ptr + offset, stride);
496 if (copy_status != 0) {
497 jpeg_destroy_decompress(cinfo);
498 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Decode: memcpy failed.");
499 }
500 } else {
501 jpeg_destroy_decompress(cinfo);
502 std::string err_msg = "[Internal ERROR] Decode: image decode failed.";
503 RETURN_STATUS_UNEXPECTED(err_msg);
504 }
505 buffer += stride;
506 buffer_size = buffer_size - stride;
507 }
508 return Status::OK();
509 }
510
JpegSetColorSpace(jpeg_decompress_struct * cinfo)511 static Status JpegSetColorSpace(jpeg_decompress_struct *cinfo) {
512 switch (cinfo->num_components) {
513 case 1:
514 // we want to output 3 components if it's grayscale
515 cinfo->out_color_space = JCS_RGB;
516 return Status::OK();
517 case 3:
518 cinfo->out_color_space = JCS_RGB;
519 return Status::OK();
520 case 4:
521 // Need to manually convert to RGB
522 cinfo->out_color_space = JCS_CMYK;
523 return Status::OK();
524 default:
525 jpeg_destroy_decompress(cinfo);
526 std::string err_msg = "[Internal ERROR] Decode: image decode failed.";
527 RETURN_STATUS_UNEXPECTED(err_msg);
528 }
529 }
530
JpegErrorExitCustom(j_common_ptr cinfo)531 void JpegErrorExitCustom(j_common_ptr cinfo) {
532 char jpeg_error_msg[JMSG_LENGTH_MAX];
533 (*(cinfo->err->format_message))(cinfo, jpeg_error_msg);
534 // we encounter core dump when execute jpeg_start_decompress at arm platform,
535 // so we collect Status instead of throwing exception.
536 jpeg_status.emplace_back(
537 STATUS_ERROR(StatusCode::kMDUnexpectedError, "Error raised by libjpeg: " + std::string(jpeg_error_msg)));
538 }
539
JpegCropAndDecode(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int crop_x,int crop_y,int crop_w,int crop_h)540 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int crop_x, int crop_y,
541 int crop_w, int crop_h) {
542 struct jpeg_decompress_struct cinfo {};
543 auto DestroyDecompressAndReturnError = [&cinfo](const std::string &err) {
544 jpeg_destroy_decompress(&cinfo);
545 RETURN_STATUS_UNEXPECTED(err);
546 };
547 struct JpegErrorManagerCustom jerr {};
548 cinfo.err = jpeg_std_error(&jerr.pub);
549 jerr.pub.error_exit = JpegErrorExitCustom;
550 try {
551 jpeg_create_decompress(&cinfo);
552 if (input->type() == DataType::DE_BYTES) {
553 uint32_t len = 0;
554 RETURN_IF_NOT_OK(input->GetStringLength(&len));
555 JpegSetSource(&cinfo, input->GetStringsBuffer(), len);
556 } else {
557 JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes());
558 }
559 (void)jpeg_read_header(&cinfo, TRUE);
560 RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo));
561 jpeg_calc_output_dimensions(&cinfo);
562 RETURN_IF_NOT_OK(CheckJpegExit(&cinfo));
563 } catch (std::runtime_error &e) {
564 return DestroyDecompressAndReturnError(e.what());
565 }
566 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_w) > crop_x,
567 "JpegCropAndDecode: addition(crop x and crop width) out of bounds, got crop x:" +
568 std::to_string(crop_x) + ", and crop width:" + std::to_string(crop_w));
569 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_h) > crop_y,
570 "JpegCropAndDecode: addition(crop y and crop height) out of bounds, got crop y:" +
571 std::to_string(crop_y) + ", and crop height:" + std::to_string(crop_h));
572 if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) {
573 crop_w = static_cast<int>(cinfo.output_width);
574 crop_h = static_cast<int>(cinfo.output_height);
575 } else if (crop_w == 0 || static_cast<unsigned int>(crop_w + crop_x) > cinfo.output_width || crop_h == 0 ||
576 static_cast<unsigned int>(crop_h + crop_y) > cinfo.output_height) {
577 return DestroyDecompressAndReturnError(
578 "Crop: invalid crop size, corresponding crop value equal to 0 or too big, got crop width: " +
579 std::to_string(crop_w) + ", crop height:" + std::to_string(crop_h) +
580 ", and crop x coordinate:" + std::to_string(crop_x) + ", crop y coordinate:" + std::to_string(crop_y));
581 }
582 const int mcu_size = cinfo.min_DCT_scaled_size;
583 CHECK_FAIL_RETURN_UNEXPECTED(mcu_size != 0, "JpegCropAndDecode: divisor mcu_size is zero.");
584 unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size;
585 unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned;
586 try {
587 bool status = jpeg_start_decompress(&cinfo);
588 CHECK_FAIL_RETURN_UNEXPECTED(status, "JpegCropAndDecode: fail to decode, jpeg maybe a multi-scan file or broken.");
589 RETURN_IF_NOT_OK(CheckJpegExit(&cinfo));
590 jpeg_crop_scanline(&cinfo, &crop_x_aligned, &crop_w_aligned);
591 RETURN_IF_NOT_OK(CheckJpegExit(&cinfo));
592 } catch (std::runtime_error &e) {
593 return DestroyDecompressAndReturnError(e.what());
594 }
595 JDIMENSION skipped_scanlines = jpeg_skip_scanlines(&cinfo, crop_y);
596 // three number of output components, always convert to RGB and output
597 constexpr int kOutNumComponents = 3;
598 TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
599 std::shared_ptr<Tensor> output_tensor;
600 RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
601 const int buffer_size = static_cast<int>(output_tensor->SizeInBytes());
602 JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
603 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<float_t>::max() - skipped_scanlines) > crop_h,
604 "JpegCropAndDecode: addition out of bounds.");
605 const int max_scanlines_to_read = static_cast<int>(skipped_scanlines) + crop_h;
606 // stride refers to output tensor, which has 3 components at most
607 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / crop_w) > kOutNumComponents,
608 "JpegCropAndDecode: multiplication out of bounds.");
609 const int stride = crop_w * kOutNumComponents;
610 // offset is calculated for scanlines read from the image, therefore
611 // has the same number of components as the image
612 int minius_value = crop_x - static_cast<int>(crop_x_aligned);
613 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<float_t>::max() / minius_value) > cinfo.output_components,
614 "JpegCropAndDecode: multiplication out of bounds.");
615 const int offset = minius_value * cinfo.output_components;
616 RETURN_IF_NOT_OK(
617 JpegReadScanlines(&cinfo, max_scanlines_to_read, buffer, buffer_size, crop_w, crop_w_aligned, offset, stride));
618 *output = output_tensor;
619 jpeg_destroy_decompress(&cinfo);
620 return Status::OK();
621 }
622
Rescale(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float rescale,float shift)623 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift) {
624 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
625 if (!input_cv->mat().data) {
626 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Rescale: load image failed.");
627 }
628 cv::Mat input_image = input_cv->mat();
629 std::shared_ptr<CVTensor> output_cv;
630 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv));
631 try {
632 input_image.convertTo(output_cv->mat(), CV_32F, rescale, shift);
633 *output = std::static_pointer_cast<Tensor>(output_cv);
634 } catch (const cv::Exception &e) {
635 RETURN_STATUS_UNEXPECTED("Rescale: " + std::string(e.what()));
636 }
637 return Status::OK();
638 }
639
Crop(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int x,int y,int w,int h)640 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h) {
641 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
642 if (!input_cv->mat().data) {
643 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Crop: load image failed.");
644 }
645 RETURN_IF_NOT_OK(ValidateImageRank("Crop", input_cv->Rank()));
646 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - y) > h,
647 "Crop: addition(x and height) out of bounds, got height:" + std::to_string(h) +
648 ", and coordinate y:" + std::to_string(y));
649 // account for integer overflow
650 if (y < 0 || (y + h) > input_cv->shape()[0] || (y + h) < 0) {
651 RETURN_STATUS_UNEXPECTED(
652 "Crop: invalid y coordinate value for crop, y coordinate value exceeds the boundary of the image, got y: " +
653 std::to_string(y));
654 }
655 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - x) > w, "Crop: addition out of bounds.");
656 // account for integer overflow
657 if (x < 0 || (x + w) > input_cv->shape()[1] || (x + w) < 0) {
658 RETURN_STATUS_UNEXPECTED(
659 "Crop: invalid x coordinate value for crop, "
660 "x coordinate value exceeds the boundary of the image, got x: " +
661 std::to_string(x));
662 }
663 try {
664 TensorShape shape{h, w};
665 if (input_cv->Rank() == kDefaultImageRank) {
666 int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
667 shape = shape.AppendDim(num_channels);
668 }
669 std::shared_ptr<CVTensor> output_cv;
670 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
671 cv::Rect roi(x, y, w, h);
672 (input_cv->mat())(roi).copyTo(output_cv->mat());
673 *output = std::static_pointer_cast<Tensor>(output_cv);
674 return Status::OK();
675 } catch (const cv::Exception &e) {
676 RETURN_STATUS_UNEXPECTED("Crop: " + std::string(e.what()));
677 }
678 }
679
ConvertColor(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,ConvertMode convert_mode)680 Status ConvertColor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, ConvertMode convert_mode) {
681 try {
682 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
683 RETURN_IF_NOT_OK(ValidateImageRank("ConvertColor", input_cv->Rank()));
684 if (!input_cv->mat().data) {
685 RETURN_STATUS_UNEXPECTED("[Internal ERROR] ConvertColor: load image failed.");
686 }
687 if (input_cv->Rank() == kDefaultImageRank) {
688 int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
689 if (num_channels != kMinImageChannel && num_channels != kDefaultImageChannel &&
690 num_channels != kMaxImageChannel) {
691 RETURN_STATUS_UNEXPECTED("ConvertColor: number of channels of image should be 1, 3, 4, but got:" +
692 std::to_string(num_channels));
693 }
694 }
695 std::vector<dsize_t> node;
696 RETURN_IF_NOT_OK(GetConvertShape(convert_mode, input_cv, &node));
697 if (node.empty()) {
698 RETURN_STATUS_UNEXPECTED(
699 "ConvertColor: convert mode must be in ConvertMode, which mainly includes conversion "
700 "between RGB, BGR, GRAY, RGBA etc.");
701 }
702 TensorShape out_shape = TensorShape(node);
703 std::shared_ptr<CVTensor> output_cv;
704 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
705 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(convert_mode));
706 *output = std::static_pointer_cast<Tensor>(output_cv);
707 return Status::OK();
708 } catch (const cv::Exception &e) {
709 RETURN_STATUS_UNEXPECTED("ConvertColor: " + std::string(e.what()));
710 }
711 }
712
HwcToChw(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)713 Status HwcToChw(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
714 try {
715 if (input->Rank() == kMinImageRank) {
716 // If input tensor is 2D, we assume we have hw dimensions
717 *output = input;
718 return Status::OK();
719 }
720 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
721 if (!input_cv->mat().data) {
722 RETURN_STATUS_UNEXPECTED("[Internal ERROR] HWC2CHW: load image failed.");
723 }
724 if (input_cv->Rank() != kDefaultImageRank) {
725 RETURN_STATUS_UNEXPECTED("HWC2CHW: image shape should be <H,W> or <H,W,C>, but got rank: " +
726 std::to_string(input_cv->Rank()));
727 }
728 cv::Mat output_img;
729
730 int height = static_cast<int>(input_cv->shape()[0]);
731 int width = static_cast<int>(input_cv->shape()[1]);
732 int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
733
734 std::shared_ptr<CVTensor> output_cv;
735 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(TensorShape{num_channels, height, width}, input_cv->type(), &output_cv));
736
737 for (int i = 0; i < num_channels; ++i) {
738 cv::Mat mat;
739 RETURN_IF_NOT_OK(output_cv->MatAtIndex({i}, &mat));
740 cv::extractChannel(input_cv->mat(), mat, i);
741 }
742 *output = std::move(output_cv);
743 return Status::OK();
744 } catch (const cv::Exception &e) {
745 RETURN_STATUS_UNEXPECTED("HWC2CHW: " + std::string(e.what()));
746 }
747 }
748
MaskWithTensor(const std::shared_ptr<Tensor> & sub_mat,std::shared_ptr<Tensor> * input,int x,int y,int crop_width,int crop_height,ImageFormat image_format)749 Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Tensor> *input, int x, int y,
750 int crop_width, int crop_height, ImageFormat image_format) {
751 constexpr int64_t input_shape = 2;
752 if (image_format == ImageFormat::HWC) {
753 if (CheckTensorShape(*input, input_shape)) {
754 RETURN_STATUS_UNEXPECTED(
755 "CutMixBatch: MaskWithTensor failed: "
756 "input shape doesn't match <H,W,C> format, got shape:" +
757 (*input)->shape().ToString());
758 }
759 if (CheckTensorShape(sub_mat, input_shape)) {
760 RETURN_STATUS_UNEXPECTED(
761 "CutMixBatch: MaskWithTensor failed: "
762 "sub_mat shape doesn't match <H,W,C> format, got shape:" +
763 (*input)->shape().ToString());
764 }
765 int number_of_channels = static_cast<int>((*input)->shape()[kChannelIndexHWC]);
766 for (int i = 0; i < crop_width; i++) {
767 for (int j = 0; j < crop_height; j++) {
768 for (int c = 0; c < number_of_channels; c++) {
769 RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i, c}, {y + j, x + i, c}));
770 }
771 }
772 }
773 } else if (image_format == ImageFormat::CHW) {
774 if (CheckTensorShape(*input, 0)) {
775 RETURN_STATUS_UNEXPECTED(
776 "CutMixBatch: MaskWithTensor failed: "
777 "input shape doesn't match <C,H,W> format, got shape:" +
778 (*input)->shape().ToString());
779 }
780 if (CheckTensorShape(sub_mat, 0)) {
781 RETURN_STATUS_UNEXPECTED(
782 "CutMixBatch: MaskWithTensor failed: "
783 "sub_mat shape doesn't match <C,H,W> format, got shape:" +
784 (*input)->shape().ToString());
785 }
786 int number_of_channels = static_cast<int>((*input)->shape()[0]);
787 for (int i = 0; i < crop_width; i++) {
788 for (int j = 0; j < crop_height; j++) {
789 for (int c = 0; c < number_of_channels; c++) {
790 RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {c, j, i}, {c, y + j, x + i}));
791 }
792 }
793 }
794 } else if (image_format == ImageFormat::HW) {
795 if ((*input)->Rank() != kMinImageRank) {
796 RETURN_STATUS_UNEXPECTED(
797 "CutMixBatch: MaskWithTensor failed: "
798 "input shape doesn't match <H,W> format, got shape:" +
799 (*input)->shape().ToString());
800 }
801 if (sub_mat->Rank() != kMinImageRank) {
802 RETURN_STATUS_UNEXPECTED(
803 "CutMixBatch: MaskWithTensor failed: "
804 "sub_mat shape doesn't match <H,W> format, got shape:" +
805 (*input)->shape().ToString());
806 }
807 for (int i = 0; i < crop_width; i++) {
808 for (int j = 0; j < crop_height; j++) {
809 RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i}, {y + j, x + i}));
810 }
811 }
812 } else {
813 RETURN_STATUS_UNEXPECTED(
814 "CutMixBatch: MaskWithTensor failed: "
815 "image format must be <C,H,W>, <H,W,C>, or <H,W>, got shape:" +
816 (*input)->shape().ToString());
817 }
818 return Status::OK();
819 }
820
CopyTensorValue(const std::shared_ptr<Tensor> & source_tensor,std::shared_ptr<Tensor> * dest_tensor,const std::vector<int64_t> & source_indx,const std::vector<int64_t> & dest_indx)821 Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor,
822 const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx) {
823 if (source_tensor->type() != (*dest_tensor)->type()) {
824 RETURN_STATUS_UNEXPECTED(
825 "CutMixBatch: CopyTensorValue failed: "
826 "source and destination tensor must have the same type.");
827 }
828 if (source_tensor->type() == DataType::DE_UINT8) {
829 uint8_t pixel_value = 0;
830 RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
831 RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
832 } else if (source_tensor->type() == DataType::DE_FLOAT32) {
833 float pixel_value = 0;
834 RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
835 RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
836 } else {
837 RETURN_STATUS_UNEXPECTED(
838 "CutMixBatch: CopyTensorValue failed: "
839 "Tensor type is not supported. Tensor type must be float32 or uint8.");
840 }
841 return Status::OK();
842 }
843
SwapRedAndBlue(std::shared_ptr<Tensor> input,std::shared_ptr<Tensor> * output)844 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
845 try {
846 RETURN_IF_NOT_OK(ValidateImage(input, "SwapRedBlue", {3, 5, 11}));
847 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
848 CHECK_FAIL_RETURN_UNEXPECTED(
849 input_cv->shape().Size() > kChannelIndexHWC,
850 "SwapRedAndBlue: rank of input data should be greater than:" + std::to_string(kChannelIndexHWC) +
851 ", but got:" + std::to_string(input_cv->shape().Size()));
852 int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
853 if (input_cv->shape().Size() != kDefaultImageRank || num_channels != kDefaultImageChannel) {
854 RETURN_STATUS_UNEXPECTED("SwapRedBlue: image shape should be in <H,W,C> format, but got:" +
855 input_cv->shape().ToString());
856 }
857 std::shared_ptr<CVTensor> output_cv;
858 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
859
860 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_BGR2RGB));
861 *output = std::static_pointer_cast<Tensor>(output_cv);
862 return Status::OK();
863 } catch (const cv::Exception &e) {
864 RETURN_STATUS_UNEXPECTED("SwapRedBlue: " + std::string(e.what()));
865 }
866 }
867
CropAndResize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int x,int y,int crop_height,int crop_width,int target_height,int target_width,InterpolationMode mode)868 Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y,
869 int crop_height, int crop_width, int target_height, int target_width, InterpolationMode mode) {
870 try {
871 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
872 if (!input_cv->mat().data) {
873 RETURN_STATUS_UNEXPECTED("[Internal ERROR] CropAndResize: load image failed.");
874 }
875 RETURN_IF_NOT_OK(ValidateImageRank("CropAndResize", input_cv->Rank()));
876 // image too large or too small, 1000 is arbitrary here to prevent opencv from segmentation fault
877 const uint32_t kCropShapeLimits = 1000;
878 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kCropShapeLimits) > crop_height,
879 "CropAndResize: crop_height out of bounds.");
880 CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int>::max() / kCropShapeLimits) > crop_width,
881 "CropAndResize: crop_width out of bounds.");
882 if (crop_height == 0 || crop_width == 0 || target_height == 0 || target_height > crop_height * kCropShapeLimits ||
883 target_width == 0 || target_width > crop_width * kCropShapeLimits) {
884 std::string err_msg =
885 "CropAndResize: the resizing width or height 1) is too big, it's up to " + std::to_string(kCropShapeLimits) +
886 " times the original image; 2) can not be 0. Detail info is: crop_height: " + std::to_string(crop_height) +
887 ", crop_width: " + std::to_string(crop_width) + ", target_height: " + std::to_string(target_height) +
888 ", target_width: " + std::to_string(target_width);
889 RETURN_STATUS_UNEXPECTED(err_msg);
890 }
891 cv::Rect roi(x, y, crop_width, crop_height);
892 auto cv_mode = GetCVInterpolationMode(mode);
893 cv::Mat cv_in = input_cv->mat();
894
895 if (mode == InterpolationMode::kCubicPil) {
896 if (input_cv->shape().Size() != kDefaultImageChannel ||
897 input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
898 RETURN_STATUS_UNEXPECTED(
899 "CropAndResize: Interpolation mode PILCUBIC only supports image with 3 channels, but got: " +
900 input_cv->shape().ToString());
901 }
902
903 cv::Mat input_roi = cv_in(roi);
904 std::shared_ptr<CVTensor> input_image;
905 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, input_cv->Rank(), &input_image));
906 LiteMat imIn, imOut;
907 std::shared_ptr<Tensor> output_tensor;
908 TensorShape new_shape = TensorShape({target_height, target_width, 3});
909 RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input_cv->type(), &output_tensor));
910 uint8_t *buffer = reinterpret_cast<uint8_t *>(&(*output_tensor->begin<uint8_t>()));
911 int input_channel = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
912 imOut.Init(target_width, target_height, input_channel, reinterpret_cast<void *>(buffer), LDataType::UINT8);
913 int input_height = static_cast<int>(input_image->shape()[0]);
914 int input_width = static_cast<int>(input_image->shape()[1]);
915 imIn.Init(input_width, input_height, input_channel, input_image->mat().data, LDataType::UINT8);
916 if (ResizeCubic(imIn, imOut, target_width, target_height) == false) {
917 RETURN_STATUS_UNEXPECTED("Resize: failed to do resize, please check the error msg.");
918 }
919 *output = output_tensor;
920 return Status::OK();
921 }
922
923 TensorShape shape{target_height, target_width};
924 if (input_cv->Rank() == kDefaultImageRank) {
925 int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
926 shape = shape.AppendDim(num_channels);
927 }
928 std::shared_ptr<CVTensor> cvt_out;
929 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &cvt_out));
930 cv::resize(cv_in(roi), cvt_out->mat(), cv::Size(target_width, target_height), 0, 0, cv_mode);
931 *output = std::static_pointer_cast<Tensor>(cvt_out);
932 return Status::OK();
933 } catch (const cv::Exception &e) {
934 RETURN_STATUS_UNEXPECTED("CropAndResize: " + std::string(e.what()));
935 }
936 }
937
Rotate(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> center,float degree,InterpolationMode interpolation,bool expand,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)938 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> center,
939 float degree, InterpolationMode interpolation, bool expand, uint8_t fill_r, uint8_t fill_g,
940 uint8_t fill_b) {
941 try {
942 RETURN_IF_NOT_OK(ValidateImageRank("Rotate", input->Rank()));
943 dsize_t channel = 1;
944 RETURN_IF_NOT_OK(ImageNumChannels(input, &channel));
945 CHECK_FAIL_RETURN_UNEXPECTED(channel <= kMaxImageChannel || interpolation != InterpolationMode::kCubic,
946 "Rotate: interpolation can not be CUBIC when image channel is greater than 4.");
947 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
948 if (!input_cv->mat().data) {
949 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Rotate: load image failed.");
950 }
951
952 cv::Mat input_img = input_cv->mat();
953 if (input_img.cols > (MAX_INT_PRECISION * DOUBLING_FACTOR) ||
954 input_img.rows > (MAX_INT_PRECISION * DOUBLING_FACTOR)) {
955 RETURN_STATUS_UNEXPECTED("Rotate: image is too large and center is not precise, got image width:" +
956 std::to_string(input_img.cols) + ", and image height:" + std::to_string(input_img.rows) +
957 ", both should be small than:" + std::to_string(MAX_INT_PRECISION * DOUBLING_FACTOR));
958 }
959 float fx = 0, fy = 0;
960 if (center.empty()) {
961 // default to center of image
962 fx = (static_cast<float>(input_img.cols) - 1.0F) * kHalf;
963 fy = (static_cast<float>(input_img.rows) - 1.0F) * kHalf;
964 } else {
965 fx = center[0];
966 fy = center[1];
967 }
968 cv::Mat output_img;
969 cv::Scalar fill_color = cv::Scalar(fill_b, fill_g, fill_r);
970 // maybe don't use uint32 for image dimension here
971 cv::Point2f pc(fx, fy);
972 cv::Mat rot = cv::getRotationMatrix2D(pc, degree, 1.0);
973 std::shared_ptr<CVTensor> output_cv;
974 if (!expand) {
975 // this case means that the shape doesn't change, size stays the same
976 // We may not need this memcpy if it is in place.
977 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
978 // using inter_nearest to comply with python default
979 cv::warpAffine(input_img, output_cv->mat(), rot, input_img.size(), GetCVInterpolationMode(interpolation),
980 cv::BORDER_CONSTANT, fill_color);
981 } else {
982 // we resize here since the shape changes
983 // create a new bounding box with the rotate
984 cv::Rect2f bbox = cv::RotatedRect(pc, input_img.size(), degree).boundingRect2f();
985 rot.at<double>(0, 2) += bbox.width / 2.0 - input_img.cols / 2.0;
986 rot.at<double>(1, 2) += bbox.height / 2.0 - input_img.rows / 2.0;
987 // use memcpy and don't compute the new shape since openCV has a rounding problem
988 cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation),
989 cv::BORDER_CONSTANT, fill_color);
990 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &output_cv));
991 RETURN_UNEXPECTED_IF_NULL(output_cv);
992 }
993 *output = std::static_pointer_cast<Tensor>(output_cv);
994 } catch (const cv::Exception &e) {
995 RETURN_STATUS_UNEXPECTED("Rotate: " + std::string(e.what()));
996 }
997 return Status::OK();
998 }
999
1000 template <typename T1, typename T2>
Normalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std,bool is_hwc,bool pad=false)1001 void Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
1002 std::vector<float> std, bool is_hwc, bool pad = false) {
1003 // T1 is the type of input tensor, T2 is the type of output tensor
1004 auto itr_out = (*output)->begin<T2>();
1005 auto itr = input->begin<T1>();
1006 auto end = input->end<T1>();
1007 int64_t num_channels;
1008 if (is_hwc) {
1009 num_channels = (*output)->shape()[kChannelIndexHWC];
1010 while (itr != end) {
1011 for (size_t i = 0; i < num_channels - static_cast<int>(pad); i++) {
1012 *itr_out = static_cast<T2>((static_cast<float>(*itr) - mean[i]) / std[i]);
1013 ++itr_out;
1014 ++itr;
1015 }
1016 }
1017 } else {
1018 num_channels = (*output)->shape()[kChannelIndexCHW];
1019 int64_t height_index = 1;
1020 int64_t width_index = 2;
1021 int64_t channel_len = (*output)->shape()[height_index] * (*output)->shape()[width_index];
1022 while (itr != end) {
1023 for (size_t i = 0; i < num_channels - static_cast<int>(pad); i++) {
1024 for (int64_t j = 0; j < channel_len; j++) {
1025 *itr_out = static_cast<T2>((static_cast<float>(*itr) - mean[i]) / std[i]);
1026 ++itr_out;
1027 ++itr;
1028 }
1029 }
1030 }
1031 }
1032 }
1033
1034 template <typename T>
Normalize_caller(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::vector<float> mean_v,const std::vector<float> std_v,bool is_hwc,bool pad)1035 Status Normalize_caller(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
1036 const std::vector<float> mean_v, const std::vector<float> std_v, bool is_hwc, bool pad) {
1037 switch (static_cast<int>(input->type().value())) {
1038 case DataType::DE_BOOL:
1039 Normalize<bool, T>(input, output, mean_v, std_v, is_hwc, pad);
1040 break;
1041 case DataType::DE_INT8:
1042 Normalize<int8_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1043 break;
1044 case DataType::DE_UINT8:
1045 Normalize<uint8_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1046 break;
1047 case DataType::DE_INT16:
1048 Normalize<int16_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1049 break;
1050 case DataType::DE_UINT16:
1051 Normalize<uint16_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1052 break;
1053 case DataType::DE_INT32:
1054 Normalize<int32_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1055 break;
1056 case DataType::DE_UINT32:
1057 Normalize<uint32_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1058 break;
1059 case DataType::DE_INT64:
1060 Normalize<int64_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1061 break;
1062 case DataType::DE_UINT64:
1063 Normalize<uint64_t, T>(input, output, mean_v, std_v, is_hwc, pad);
1064 break;
1065 case DataType::DE_FLOAT16:
1066 Normalize<float16, T>(input, output, mean_v, std_v, is_hwc, pad);
1067 break;
1068 case DataType::DE_FLOAT32:
1069 Normalize<float, T>(input, output, mean_v, std_v, is_hwc, pad);
1070 break;
1071 case DataType::DE_FLOAT64:
1072 Normalize<double, T>(input, output, mean_v, std_v, is_hwc, pad);
1073 break;
1074 default:
1075 std::string op_name = (pad) ? "NormalizePad" : "Normalize";
1076 RETURN_STATUS_UNEXPECTED(
1077 op_name + ": unsupported type, currently supported types include " +
1078 "[bool,int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,int64_t,uint64_t,float16,float,double].");
1079 }
1080 return Status::OK();
1081 }
1082
Normalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std,bool is_hwc)1083 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
1084 std::vector<float> std, bool is_hwc) {
1085 RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType(DataType::DE_FLOAT32), output));
1086 if (input->Rank() == kMinImageRank) {
1087 RETURN_IF_NOT_OK((*output)->ExpandDim(kMinImageRank));
1088 }
1089
1090 CHECK_FAIL_RETURN_UNEXPECTED((*output)->Rank() == kDefaultImageRank,
1091 "Normalize: output image rank should be: " + std::to_string(kDefaultImageRank) +
1092 ", but got: " + std::to_string((*output)->Rank()));
1093 CHECK_FAIL_RETURN_UNEXPECTED(std.size() == mean.size(),
1094 "Normalize: mean and std vectors are not of same size, got size of std: " +
1095 std::to_string(std.size()) + ", and mean size: " + std::to_string(mean.size()));
1096 int64_t channel_index;
1097 if (is_hwc) {
1098 channel_index = kChannelIndexHWC;
1099 } else {
1100 channel_index = kChannelIndexCHW;
1101 }
1102 // caller provided 1 mean/std value and there is more than one channel --> duplicate mean/std value
1103 if (mean.size() == 1 && (*output)->shape()[channel_index] != 1) {
1104 for (int64_t i = 0; i < (*output)->shape()[channel_index] - 1; i++) {
1105 mean.push_back(mean[0]);
1106 std.push_back(std[0]);
1107 }
1108 }
1109 CHECK_FAIL_RETURN_UNEXPECTED((*output)->shape()[channel_index] == static_cast<dsize_t>(mean.size()),
1110 "Normalize: number of channels does not match the size of mean and std vectors, got "
1111 "channels: " +
1112 std::to_string((*output)->shape()[channel_index]) +
1113 ", size of mean: " + std::to_string(mean.size()));
1114 RETURN_IF_NOT_OK(Normalize_caller<float>(input, output, mean, std, is_hwc, false));
1115
1116 if (input->Rank() == kMinImageRank) {
1117 (*output)->Squeeze();
1118 }
1119 return Status::OK();
1120 }
1121
NormalizePad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<float> mean,std::vector<float> std,const std::string & dtype,bool is_hwc)1122 Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
1123 std::vector<float> std, const std::string &dtype, bool is_hwc) {
1124 RETURN_IF_NOT_OK(ValidateImageRank("NormalizePad", input->Rank()));
1125 int64_t channel_index = kChannelIndexCHW;
1126 if (is_hwc) {
1127 channel_index = kChannelIndexHWC;
1128 }
1129 int32_t channels = 1;
1130 if (input->Rank() == kDefaultImageRank) {
1131 channels = static_cast<int>(input->shape()[channel_index]);
1132 }
1133
1134 if (is_hwc) {
1135 TensorShape new_shape = TensorShape({input->shape()[0], input->shape()[1], channels + 1});
1136 RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, DataType(dtype), output));
1137 RETURN_IF_NOT_OK((*output)->Zero());
1138 } else {
1139 TensorShape new_shape = TensorShape({channels + 1, input->shape()[1], input->shape()[2]});
1140 RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, DataType(dtype), output));
1141 RETURN_IF_NOT_OK((*output)->Zero());
1142 }
1143
1144 // caller provided 1 mean/std value and there are more than one channel --> duplicate mean/std value
1145 if (mean.size() == 1 && channels > 1) {
1146 while (mean.size() < channels) {
1147 mean.push_back(mean[0]);
1148 std.push_back(std[0]);
1149 }
1150 }
1151 CHECK_FAIL_RETURN_UNEXPECTED((*output)->shape()[channel_index] == static_cast<dsize_t>(mean.size()) + 1,
1152 "NormalizePad: number of channels does not match the size of mean and std vectors, got "
1153 "channels: " +
1154 std::to_string((*output)->shape()[channel_index] - 1) +
1155 ", size of mean: " + std::to_string(mean.size()));
1156 if (dtype == "float16") {
1157 RETURN_IF_NOT_OK(Normalize_caller<float16>(input, output, mean, std, is_hwc, true));
1158 } else {
1159 RETURN_IF_NOT_OK(Normalize_caller<float>(input, output, mean, std, is_hwc, true));
1160 }
1161 if (input->Rank() == kMinImageRank) {
1162 (*output)->Squeeze();
1163 }
1164 return Status::OK();
1165 }
1166
AdjustBrightness(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float alpha)1167 Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha) {
1168 try {
1169 RETURN_IF_NOT_OK(ValidateImage(input, "AdjustBrightness", {1, 2, 3, 4, 5, 6, 10, 11, 12}, {2, 3}, {1, 3}));
1170 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1171 cv::Mat input_img = input_cv->mat();
1172 if (!input_cv->mat().data) {
1173 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustBrightness: load image failed.");
1174 }
1175 std::shared_ptr<CVTensor> output_cv;
1176 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1177 output_cv->mat() = input_img * alpha;
1178 *output = std::static_pointer_cast<Tensor>(output_cv);
1179 } catch (const cv::Exception &e) {
1180 RETURN_STATUS_UNEXPECTED("AdjustBrightness: " + std::string(e.what()));
1181 }
1182 return Status::OK();
1183 }
1184
AdjustContrast(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float alpha)1185 Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha) {
1186 try {
1187 RETURN_IF_NOT_OK(ValidateImage(input, "AdjustContrast", {3, 5, 11}, {3}, {3}));
1188 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1189 cv::Mat input_img = input_cv->mat();
1190 if (!input_cv->mat().data) {
1191 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustContrast: load image failed.");
1192 }
1193 cv::Mat gray, output_img;
1194 cv::cvtColor(input_img, gray, CV_RGB2GRAY);
1195 auto mean_img = cv::mean(gray).val[0];
1196 std::shared_ptr<CVTensor> output_cv;
1197 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1198 // thread safe: change cv::Mat::zeros to cv::Mat + setTo
1199 output_img = cv::Mat(input_img.rows, input_img.cols, input_img.depth());
1200 output_img.setTo(cv::Scalar::all(0));
1201 output_img = output_img + mean_img;
1202 cv::cvtColor(output_img, output_img, CV_GRAY2RGB);
1203 output_img = output_img * (1.0 - alpha) + input_img * alpha;
1204 output_img.copyTo(output_cv->mat());
1205 *output = std::static_pointer_cast<Tensor>(output_cv);
1206 } catch (const cv::Exception &e) {
1207 RETURN_STATUS_UNEXPECTED("AdjustContrast: " + std::string(e.what()));
1208 }
1209 return Status::OK();
1210 }
1211
AdjustGamma(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float gamma,float gain)1212 Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float gamma, float gain) {
1213 try {
1214 int num_channels = 1;
1215 if (input->Rank() < kMinImageRank) {
1216 RETURN_STATUS_UNEXPECTED("AdjustGamma: input tensor is not in shape of <...,H,W,C> or <H,W>, got shape:" +
1217 input->shape().ToString());
1218 }
1219 if (input->Rank() > 2) {
1220 num_channels = static_cast<int>(input->shape()[-1]);
1221 }
1222 if (num_channels != 1 && num_channels != 3) {
1223 RETURN_STATUS_UNEXPECTED("AdjustGamma: channel of input image should be 1 or 3, but got: " +
1224 std::to_string(num_channels));
1225 }
1226 if (input->type().IsFloat()) {
1227 for (auto itr = input->begin<float>(); itr != input->end<float>(); itr++) {
1228 *itr = pow((*itr) * gain, gamma);
1229 *itr = std::min(std::max((*itr), 0.0f), 1.0f);
1230 }
1231 *output = input;
1232 } else {
1233 RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input, output));
1234 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(*output);
1235 if (!input_cv->mat().data) {
1236 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustGamma: load image failed.");
1237 }
1238 cv::Mat input_img = input_cv->mat();
1239 uchar LUT[256] = {};
1240 auto kMaxPixelValueFloat = static_cast<float>(kMaxBitValue);
1241 for (int i = 0; i <= kMaxBitValue; i++) {
1242 float f = static_cast<float>(i) / kMaxPixelValueFloat;
1243 f = pow(f, gamma);
1244 LUT[i] =
1245 static_cast<uchar>(floor(std::min(f * (kMaxPixelValueFloat + 1.f - 1e-3f) * gain, kMaxPixelValueFloat)));
1246 }
1247 if (input_img.channels() == 1) {
1248 cv::MatIterator_<uchar> it = input_img.begin<uchar>();
1249 cv::MatIterator_<uchar> it_end = input_img.end<uchar>();
1250 for (; it != it_end; ++it) {
1251 *it = LUT[(*it)];
1252 }
1253 } else {
1254 cv::MatIterator_<cv::Vec3b> it = input_img.begin<cv::Vec3b>();
1255 cv::MatIterator_<cv::Vec3b> it_end = input_img.end<cv::Vec3b>();
1256 for (; it != it_end; ++it) {
1257 (*it)[0] = LUT[(*it)[0]];
1258 (*it)[1] = LUT[(*it)[1]];
1259 (*it)[2] = LUT[(*it)[2]];
1260 }
1261 }
1262 *output = std::static_pointer_cast<Tensor>(input_cv);
1263 }
1264 } catch (const cv::Exception &e) {
1265 RETURN_STATUS_UNEXPECTED("AdjustGamma: " + std::string(e.what()));
1266 }
1267 return Status::OK();
1268 }
1269
AdjustSharpness(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float alpha)1270 Status AdjustSharpness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha) {
1271 try {
1272 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1273 cv::Mat input_img = input_cv->mat();
1274 if (!input_cv->mat().data) {
1275 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Sharpness: load image failed.");
1276 }
1277
1278 if (input_cv->Rank() == 1 || input_cv->mat().dims > 2) {
1279 RETURN_STATUS_UNEXPECTED("Sharpness: shape of input is not <H,W,C> or <H,W>, but got rank: " +
1280 std::to_string(input_cv->Rank()));
1281 }
1282
1283 /// creating a smoothing filter. 1, 1, 1,
1284 /// 1, 5, 1,
1285 /// 1, 1, 1
1286
1287 const float filterMid = 5.0;
1288 const float filterSum = 13.0;
1289 cv::Mat filter = cv::Mat(3, 3, CV_32F, cv::Scalar::all(1.0 / filterSum));
1290 filter.at<float>(1, 1) = filterMid / filterSum;
1291
1292 /// applying filter on channels
1293 cv::Mat result = cv::Mat();
1294 cv::filter2D(input_img, result, -1, filter);
1295
1296 auto height = static_cast<int>(input_cv->shape()[0]);
1297 auto width = static_cast<int>(input_cv->shape()[1]);
1298
1299 /// restoring the edges
1300 input_img.row(0).copyTo(result.row(0));
1301 input_img.row(height - 1).copyTo(result.row(height - 1));
1302 input_img.col(0).copyTo(result.col(0));
1303 input_img.col(width - 1).copyTo(result.col(width - 1));
1304
1305 /// blend based on alpha : (alpha_ *input_img) + ((1.0-alpha_) * result);
1306 cv::addWeighted(input_img, alpha, result, 1.0 - alpha, 0.0, result);
1307
1308 std::shared_ptr<CVTensor> output_cv;
1309 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1310 RETURN_UNEXPECTED_IF_NULL(output_cv);
1311
1312 *output = std::static_pointer_cast<Tensor>(output_cv);
1313 } catch (const cv::Exception &e) {
1314 RETURN_STATUS_UNEXPECTED("Sharpness: " + std::string(e.what()));
1315 }
1316 return Status::OK();
1317 }
1318
AutoContrast(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float cutoff,const std::vector<uint32_t> & ignore)1319 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float cutoff,
1320 const std::vector<uint32_t> &ignore) {
1321 try {
1322 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1323 if (!input_cv->mat().data) {
1324 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AutoContrast: load image failed.");
1325 }
1326 if (input_cv->Rank() != kDefaultImageRank && input_cv->Rank() != kMinImageRank) {
1327 std::string err_msg = "AutoContrast: image rank should be 2 or 3, but got: " + std::to_string(input_cv->Rank());
1328 if (input_cv->Rank() == 1) {
1329 err_msg = err_msg + ", may need to do Decode operation first.";
1330 }
1331 RETURN_STATUS_UNEXPECTED("AutoContrast: image rank should be 2 or 3, but got: " +
1332 std::to_string(input_cv->Rank()));
1333 }
1334 // Reshape to extend dimension if rank is 2 for algorithm to work. then reshape output to be of rank 2 like input
1335 auto input_rank = input_cv->Rank();
1336 if (input_cv->Rank() == kMinImageRank) {
1337 RETURN_IF_NOT_OK(input_cv->ExpandDim(kMinImageRank));
1338 }
1339 // Get number of channels and image matrix
1340 std::size_t num_of_channels = input_cv->shape()[static_cast<size_t>(kChannelIndexHWC)];
1341 if (num_of_channels != kMinImageChannel && num_of_channels != kDefaultImageChannel) {
1342 RETURN_STATUS_UNEXPECTED("AutoContrast: channel of input image should be 1 or 3, but got: " +
1343 std::to_string(num_of_channels));
1344 }
1345 cv::Mat image = input_cv->mat();
1346 // Separate the image to channels
1347 std::vector<cv::Mat> planes(num_of_channels);
1348 cv::split(image, planes);
1349 cv::Mat b_hist, g_hist, r_hist;
1350 // Establish the number of bins and set variables for histogram
1351 int32_t hist_size = 256;
1352 int32_t channels = 0;
1353 float range[] = {0, 256};
1354 const float *hist_range[] = {range};
1355 bool uniform = true, accumulate = false;
1356 // Set up lookup table for LUT(Look up table algorithm)
1357 std::vector<int32_t> table;
1358 std::vector<cv::Mat> image_result;
1359 for (std::size_t layer = 0; layer < planes.size(); layer++) {
1360 // Reset lookup table
1361 table = std::vector<int32_t>{};
1362 // Calculate Histogram for channel
1363 cv::Mat hist;
1364 cv::calcHist(&planes[layer], 1, &channels, cv::Mat(), hist, 1, &hist_size, hist_range, uniform, accumulate);
1365 hist.convertTo(hist, CV_32SC1);
1366 std::vector<int32_t> hist_vec;
1367 hist.col(0).copyTo(hist_vec);
1368 // Ignore values in ignore
1369 for (const auto &item : ignore) {
1370 hist_vec[item] = 0;
1371 }
1372 int32_t hi = kMaxBitValue;
1373 int32_t lo = 0;
1374 RETURN_IF_NOT_OK(ComputeUpperAndLowerPercentiles(&hist_vec, cutoff, cutoff, &hi, &lo));
1375 if (hi <= lo) {
1376 for (int32_t i = 0; i < 256; i++) {
1377 table.push_back(i);
1378 }
1379 } else {
1380 const float scale = static_cast<float>(kMaxBitValue) / static_cast<float>(hi - lo);
1381 const float offset = static_cast<float>(-1 * lo) * scale;
1382 for (int32_t i = 0; i < 256; i++) {
1383 auto ix = static_cast<int32_t>(static_cast<float>(i) * scale + offset);
1384 ix = std::max(ix, 0);
1385 ix = std::min(ix, kMaxBitValue);
1386 table.push_back(ix);
1387 }
1388 }
1389 cv::Mat result_layer;
1390 cv::LUT(planes[layer], table, result_layer);
1391 image_result.push_back(result_layer);
1392 }
1393 cv::Mat result;
1394 cv::merge(image_result, result);
1395 result.convertTo(result, input_cv->mat().type());
1396 std::shared_ptr<CVTensor> output_cv;
1397 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1398 (*output) = std::static_pointer_cast<Tensor>(output_cv);
1399 if (input_rank == kMinImageRank) {
1400 (*output)->Squeeze();
1401 }
1402 } catch (const cv::Exception &e) {
1403 RETURN_STATUS_UNEXPECTED("AutoContrast: " + std::string(e.what()));
1404 }
1405 return Status::OK();
1406 }
1407
AdjustSaturation(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float alpha)1408 Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha) {
1409 try {
1410 RETURN_IF_NOT_OK(ValidateImage(input, "AdjustSaturation", {3, 5, 11}, {3}, {3}));
1411 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1412 cv::Mat input_img = input_cv->mat();
1413 if (!input_cv->mat().data) {
1414 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustSaturation: load image failed.");
1415 }
1416 std::shared_ptr<CVTensor> output_cv;
1417 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1418 cv::Mat output_img = output_cv->mat();
1419 cv::Mat gray;
1420 cv::cvtColor(input_img, gray, CV_RGB2GRAY);
1421 cv::cvtColor(gray, output_img, CV_GRAY2RGB);
1422 output_img = output_img * (1.0 - alpha) + input_img * alpha;
1423 output_img.copyTo(output_cv->mat());
1424 *output = std::static_pointer_cast<Tensor>(output_cv);
1425 } catch (const cv::Exception &e) {
1426 RETURN_STATUS_UNEXPECTED("AdjustSaturation: " + std::string(e.what()));
1427 }
1428 return Status::OK();
1429 }
1430
AdjustHue(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float hue)1431 Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float hue) {
1432 try {
1433 RETURN_IF_NOT_OK(ValidateImage(input, "AdjustHue", {3, 11}, {3}, {3}));
1434 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1435 cv::Mat input_img = input_cv->mat();
1436 if (!input_cv->mat().data) {
1437 RETURN_STATUS_UNEXPECTED("[Internal ERROR] AdjustHue: load image failed.");
1438 }
1439 std::shared_ptr<CVTensor> output_cv;
1440 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1441 cv::Mat output_img;
1442 cv::cvtColor(input_img, output_img, CV_RGB2HSV_FULL);
1443 for (int x = 0; x < output_img.cols; x++) {
1444 for (int y = 0; y < output_img.rows; y++) {
1445 uint8_t cur1 = output_img.at<cv::Vec3b>(cv::Point(x, y))[0];
1446 uint8_t h_hue = 0;
1447 h_hue = static_cast<uint8_t>(hue * kMaxBitValue);
1448 cur1 += h_hue;
1449 output_img.at<cv::Vec3b>(cv::Point(x, y))[0] = cur1;
1450 }
1451 }
1452 cv::cvtColor(output_img, output_cv->mat(), CV_HSV2RGB_FULL);
1453 *output = std::static_pointer_cast<Tensor>(output_cv);
1454 } catch (const cv::Exception &e) {
1455 RETURN_STATUS_UNEXPECTED("AdjustHue: " + std::string(e.what()));
1456 }
1457 return Status::OK();
1458 }
1459
Equalize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1460 Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1461 try {
1462 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1463 if (!input_cv->mat().data) {
1464 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Equalize: load image failed.");
1465 }
1466 if (input_cv->Rank() != kDefaultImageRank && input_cv->Rank() != kMinImageRank) {
1467 RETURN_STATUS_UNEXPECTED("Equalize: image rank should be 2 or 3, but got: " + std::to_string(input_cv->Rank()));
1468 }
1469 // For greyscale images, extend dimension if rank is 2 and reshape output to be of rank 2.
1470 auto input_rank = input_cv->Rank();
1471 if (input_cv->Rank() == kMinImageRank) {
1472 RETURN_IF_NOT_OK(input_cv->ExpandDim(kMinImageRank));
1473 }
1474 // Get number of channels and image matrix
1475 std::size_t num_of_channels = input_cv->shape()[kChannelIndexHWC];
1476 if (num_of_channels != kMinImageChannel && num_of_channels != kDefaultImageChannel) {
1477 RETURN_STATUS_UNEXPECTED("Equalize: channel of input image should be 1 or 3, but got: " +
1478 std::to_string(num_of_channels));
1479 }
1480 cv::Mat image = input_cv->mat();
1481 // Separate the image to channels
1482 std::vector<cv::Mat> planes(num_of_channels);
1483 cv::split(image, planes);
1484 // Equalize each channel separately
1485 std::vector<cv::Mat> image_result;
1486 for (auto &plane : planes) {
1487 cv::Mat channel_result;
1488 cv::equalizeHist(plane, channel_result);
1489 image_result.push_back(channel_result);
1490 }
1491 cv::Mat result;
1492 cv::merge(image_result, result);
1493 std::shared_ptr<CVTensor> output_cv;
1494 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
1495 (*output) = std::static_pointer_cast<Tensor>(output_cv);
1496 if (input_rank == kMinImageRank) {
1497 (*output)->Squeeze();
1498 }
1499 } catch (const cv::Exception &e) {
1500 RETURN_STATUS_UNEXPECTED("Equalize: " + std::string(e.what()));
1501 }
1502 return Status::OK();
1503 }
1504
Invert(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1505 Status Invert(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1506 try {
1507 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1508 cv::Mat input_img = input_cv->mat();
1509 if (!input_cv->mat().data) {
1510 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Invert: load image failed.");
1511 }
1512
1513 std::shared_ptr<CVTensor> output_cv;
1514 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
1515 RETURN_UNEXPECTED_IF_NULL(output_cv);
1516
1517 constexpr auto kMaxPixel = 255.0;
1518 output_cv->mat() = cv::Scalar::all(kMaxPixel) - input_img;
1519 *output = std::static_pointer_cast<Tensor>(output_cv);
1520 } catch (const cv::Exception &e) {
1521 RETURN_STATUS_UNEXPECTED("Invert: " + std::string(e.what()));
1522 }
1523 return Status::OK();
1524 }
1525
Posterize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,uint8_t bits)1526 Status Posterize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, uint8_t bits) {
1527 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1528 if (!input_cv->mat().data) {
1529 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Posterize: load image failed.");
1530 }
1531 if (input_cv->Rank() != 3 && input_cv->Rank() != 2) {
1532 RETURN_STATUS_UNEXPECTED("Posterize: input image is not in shape of <H,W,C> or <H,W>, but got rank: " +
1533 std::to_string(input_cv->Rank()));
1534 }
1535 uint8_t mask_value = ~((uint8_t)(1 << (8 - bits)) - 1);
1536 std::vector<uint8_t> lut_vector;
1537 for (std::size_t i = 0; i < 256; i++) {
1538 lut_vector.push_back(i & mask_value);
1539 }
1540 cv::Mat in_image = input_cv->mat();
1541
1542 cv::Mat output_img;
1543 CHECK_FAIL_RETURN_UNEXPECTED(in_image.depth() == CV_8U || in_image.depth() == CV_8S,
1544 "Posterize: data type of input image should be int8 or uint8, "
1545 "but got " +
1546 input_cv->type().ToString());
1547 cv::LUT(in_image, lut_vector, output_img);
1548 std::shared_ptr<CVTensor> result_tensor;
1549
1550 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &result_tensor));
1551 *output = std::static_pointer_cast<Tensor>(result_tensor);
1552 return Status::OK();
1553 }
1554
ValidateCutOutImage(const std::shared_ptr<Tensor> & input,bool is_hwc,int32_t box_height,int32_t box_width)1555 Status ValidateCutOutImage(const std::shared_ptr<Tensor> &input, bool is_hwc, int32_t box_height, int32_t box_width) {
1556 uint32_t channel_index = is_hwc ? kChannelIndexHWC : kChannelIndexCHW;
1557 uint32_t height_index = is_hwc ? 0 : 1;
1558 uint32_t width_index = is_hwc ? 1 : 2;
1559 std::string right_shape = is_hwc ? "<H,W,C>" : "<C,H,W>";
1560 int64_t image_h = input->shape()[height_index];
1561 int64_t image_w = input->shape()[width_index];
1562
1563 CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() > channel_index, "CutOut: shape is invalid.");
1564
1565 if (input->Rank() != kDefaultImageRank) {
1566 RETURN_STATUS_UNEXPECTED("CutOut: image shape is not " + right_shape +
1567 ", but got rank: " + std::to_string(input->Rank()));
1568 }
1569
1570 if (box_height > image_h || box_width > image_w) {
1571 RETURN_STATUS_UNEXPECTED(
1572 "CutOut: box size is too large for image erase, got box height: " + std::to_string(box_height) +
1573 "box weight: " + std::to_string(box_width) + ", and image height: " + std::to_string(image_h) +
1574 ", image width: " + std::to_string(image_w));
1575 }
1576 return Status::OK();
1577 }
1578
GetPtr(const std::shared_ptr<Tensor> & tensor)1579 uchar *GetPtr(const std::shared_ptr<Tensor> &tensor) {
1580 switch (tensor->type().value()) {
1581 case DataType::DE_BOOL:
1582 return reinterpret_cast<uchar *>(&(*tensor->begin<bool>()));
1583 case DataType::DE_INT8:
1584 return reinterpret_cast<uchar *>(&(*tensor->begin<int8_t>()));
1585 case DataType::DE_UINT8:
1586 return reinterpret_cast<uchar *>(&(*tensor->begin<uint8_t>()));
1587 case DataType::DE_INT16:
1588 return reinterpret_cast<uchar *>(&(*tensor->begin<int16_t>()));
1589 case DataType::DE_UINT16:
1590 return reinterpret_cast<uchar *>(&(*tensor->begin<uint16_t>()));
1591 case DataType::DE_INT32:
1592 return reinterpret_cast<uchar *>(&(*tensor->begin<int32_t>()));
1593 case DataType::DE_UINT32:
1594 return reinterpret_cast<uchar *>(&(*tensor->begin<uint32_t>()));
1595 case DataType::DE_INT64:
1596 return reinterpret_cast<uchar *>(&(*tensor->begin<int64_t>()));
1597 case DataType::DE_UINT64:
1598 return reinterpret_cast<uchar *>(&(*tensor->begin<uint64_t>()));
1599 case DataType::DE_FLOAT16:
1600 return reinterpret_cast<uchar *>(&(*tensor->begin<float16>()));
1601 case DataType::DE_FLOAT32:
1602 return reinterpret_cast<uchar *>(&(*tensor->begin<float>()));
1603 case DataType::DE_FLOAT64:
1604 return reinterpret_cast<uchar *>(&(*tensor->begin<double>()));
1605 default:
1606 return nullptr;
1607 }
1608 }
1609
CutOut(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t box_height,int32_t box_width,int32_t num_patches,bool bounded,bool random_color,std::mt19937 * rnd,std::vector<uint8_t> fill_colors,bool is_hwc)1610 Status CutOut(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t box_height,
1611 int32_t box_width, int32_t num_patches, bool bounded, bool random_color, std::mt19937 *rnd,
1612 std::vector<uint8_t> fill_colors, bool is_hwc) {
1613 try {
1614 RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input, output));
1615 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(*output);
1616 RETURN_IF_NOT_OK(ValidateCutOutImage(input_cv, is_hwc, box_height, box_width));
1617 uint32_t channel_index = is_hwc ? kChannelIndexHWC : kChannelIndexCHW;
1618 uint32_t height_index = is_hwc ? 0 : 1;
1619 uint32_t width_index = is_hwc ? 1 : 2;
1620 uint64_t num_channels = input_cv->shape()[channel_index];
1621 int64_t image_h = input_cv->shape()[height_index];
1622 int64_t image_w = input_cv->shape()[width_index];
1623 uint8_t type_size = input_cv->type().SizeInBytes();
1624 // for random color
1625 std::normal_distribution<double> normal_distribution(0, 1);
1626 std::uniform_int_distribution<int> height_distribution_bound(0, static_cast<int>(image_h) - box_height);
1627 std::uniform_int_distribution<int> width_distribution_bound(0, static_cast<int>(image_w) - box_width);
1628 std::uniform_int_distribution<int> height_distribution_unbound(0, static_cast<int>(image_h) + box_height);
1629 std::uniform_int_distribution<int> width_distribution_unbound(0, static_cast<int>(image_w) + box_width);
1630
1631 if (fill_colors.empty()) {
1632 fill_colors = std::vector<uint8_t>(num_channels, 0);
1633 }
1634 CHECK_FAIL_RETURN_UNEXPECTED(fill_colors.size() == num_channels,
1635 "Number of fill colors (" + std::to_string(fill_colors.size()) +
1636 ") does not match the number of channels (" + std::to_string(num_channels) + ").");
1637 // core logic
1638 // update values based on random erasing or cutout
1639 for (int32_t i = 0; i < num_patches; i++) {
1640 // rows in cv mat refers to the height of the cropped box
1641 // we determine h_start and w_start using two different distributions as erasing is used by two different
1642 // image augmentations. The bounds are also different in each case.
1643 int32_t h_start = (bounded) ? height_distribution_bound(*rnd) : (height_distribution_unbound(*rnd) - box_height);
1644 int32_t w_start = (bounded) ? width_distribution_bound(*rnd) : (width_distribution_unbound(*rnd) - box_width);
1645
1646 int64_t max_width = (w_start + box_width > image_w) ? image_w : w_start + box_width;
1647 int64_t max_height = (h_start + box_height > image_h) ? image_h : h_start + box_height;
1648 // check for starting range >= 0, here the start range is checked after for cut out, for random erasing
1649 // w_start and h_start will never be less than 0.
1650 h_start = (h_start < 0) ? 0 : h_start;
1651 w_start = (w_start < 0) ? 0 : w_start;
1652
1653 if (is_hwc) {
1654 uchar *buffer = GetPtr(input_cv);
1655 int64_t num_bytes = type_size * static_cast<int64_t>(num_channels) * (max_width - w_start);
1656 for (int x = h_start; x < max_height; x++) {
1657 auto ret = memset_s(buffer + (x * image_w + w_start) * num_channels * type_size, num_bytes, 0, num_bytes);
1658 if (ret != EOK) {
1659 RETURN_STATUS_UNEXPECTED("CutOut: memset_s failed for HWC scenario.");
1660 }
1661 }
1662 } else {
1663 int64_t num_bytes = type_size * (max_width - w_start);
1664 for (uint64_t c = 0; c < num_channels; c++) {
1665 uchar *buffer = GetPtr(input_cv) + (type_size * c * image_h * image_w);
1666 for (int x = h_start; x < max_height; x++) {
1667 auto ret = memset_s(buffer + (x * image_w + w_start) * type_size, num_bytes, 0, num_bytes);
1668 if (ret != EOK) {
1669 RETURN_STATUS_UNEXPECTED("CutOut: memset_s failed for CHW scenario.");
1670 }
1671 }
1672 }
1673 }
1674 }
1675
1676 *output = std::static_pointer_cast<Tensor>(input_cv);
1677 return Status::OK();
1678 } catch (const cv::Exception &e) {
1679 RETURN_STATUS_UNEXPECTED("CutOut: " + std::string(e.what()));
1680 }
1681
1682 return Status::OK();
1683 }
1684
Erase(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t top,int32_t left,int32_t height,int32_t width,const std::vector<float> & value,bool inplace)1685 Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t top, int32_t left,
1686 int32_t height, int32_t width, const std::vector<float> &value, bool inplace) {
1687 try {
1688 std::vector<dsize_t> size;
1689 RETURN_IF_NOT_OK(ImageSize(input, &size));
1690 int64_t image_h = size[kHeightIndex];
1691 int64_t image_w = size[kWidthIndex];
1692 if (height > image_h || width > image_w) {
1693 RETURN_STATUS_UNEXPECTED(
1694 "Erase: box size is too large for image erase, got box height: " + std::to_string(height) +
1695 "box weight: " + std::to_string(width) + ", and image height: " + std::to_string(image_h) +
1696 ", image width: " + std::to_string(image_w));
1697 }
1698
1699 std::shared_ptr<CVTensor> input_cv;
1700 if (!inplace) {
1701 RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input, output));
1702 input_cv = CVTensor::AsCVTensor(*output);
1703 } else {
1704 input_cv = CVTensor::AsCVTensor(input);
1705 }
1706 cv::Mat input_img = input_cv->mat();
1707
1708 int32_t h_start = top;
1709 int32_t w_start = left;
1710 h_start = (h_start < 0) ? 0 : h_start;
1711 w_start = (w_start < 0) ? 0 : w_start;
1712
1713 int32_t max_width = (w_start + width > image_w) ? static_cast<int32_t>(image_w) : w_start + width;
1714 int32_t max_height = (h_start + height > image_h) ? static_cast<int32_t>(image_h) : h_start + height;
1715 int32_t true_width = max_width - w_start;
1716 int32_t true_height = max_height - h_start;
1717
1718 float fill_r = value[kRIndex];
1719 float fill_g = value[kRIndex];
1720 float fill_b = value[kRIndex];
1721 const size_t kMaxFillValuesSize = 3;
1722 if (value.size() == kMaxFillValuesSize) {
1723 fill_r = value[kRIndex];
1724 fill_g = value[kGIndex];
1725 fill_b = value[kBIndex];
1726 }
1727
1728 cv::Rect idx = cv::Rect(w_start, h_start, true_width, true_height);
1729 cv::Scalar fill_color = cv::Scalar(fill_r, fill_g, fill_b);
1730 (void)input_img(idx).setTo(fill_color);
1731
1732 if (!inplace) {
1733 *output = std::static_pointer_cast<Tensor>(input_cv);
1734 } else {
1735 std::shared_ptr<CVTensor> output_cv;
1736 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_img, input_cv->Rank(), &output_cv));
1737 *output = std::static_pointer_cast<Tensor>(output_cv);
1738 }
1739
1740 return Status::OK();
1741 } catch (const cv::Exception &e) {
1742 RETURN_STATUS_UNEXPECTED("Erase: " + std::string(e.what()));
1743 }
1744
1745 return Status::OK();
1746 }
1747
Pad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const int32_t & pad_top,const int32_t & pad_bottom,const int32_t & pad_left,const int32_t & pad_right,const BorderType & border_types,uint8_t fill_r,uint8_t fill_g,uint8_t fill_b)1748 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
1749 const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
1750 uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) {
1751 try {
1752 RETURN_IF_NOT_OK(ValidateImage(input, "Pad", {1, 2, 3, 4, 5, 6, 10, 11, 12}, {2, 3}, {1, 3}));
1753
1754 // input image
1755 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1756
1757 if (!input_cv->mat().data) {
1758 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Pad: load image failed.");
1759 }
1760
1761 // get the border type in openCV
1762 auto b_type = GetCVBorderType(border_types);
1763 // output image
1764 cv::Mat out_image;
1765 if (b_type == cv::BORDER_CONSTANT) {
1766 cv::Scalar fill_color = cv::Scalar(fill_r, fill_g, fill_b);
1767 cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type, fill_color);
1768 } else {
1769 cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type);
1770 }
1771 std::shared_ptr<CVTensor> output_cv;
1772 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, input_cv->Rank(), &output_cv));
1773 // pad the dimension if shape information is only 2 dimensional, this is grayscale
1774 if (input_cv->Rank() == kDefaultImageRank && input_cv->shape()[kChannelIndexHWC] == kMinImageChannel &&
1775 output_cv->Rank() == kMinImageRank) {
1776 RETURN_IF_NOT_OK(output_cv->ExpandDim(kChannelIndexHWC));
1777 }
1778 *output = std::static_pointer_cast<Tensor>(output_cv);
1779 return Status::OK();
1780 } catch (const cv::Exception &e) {
1781 RETURN_STATUS_UNEXPECTED("Pad: " + std::string(e.what()));
1782 }
1783 }
1784
Perspective(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::vector<std::vector<int32_t>> & start_points,const std::vector<std::vector<int32_t>> & end_points,InterpolationMode interpolation)1785 Status Perspective(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
1786 const std::vector<std::vector<int32_t>> &start_points,
1787 const std::vector<std::vector<int32_t>> &end_points, InterpolationMode interpolation) {
1788 try {
1789 RETURN_IF_NOT_OK(ValidateImage(input, "Perspective", {1, 2, 3, 4, 5, 6, 10, 11, 12}, {2, 3}));
1790 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1791 if (!input_cv->mat().data) {
1792 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Perspective: load image failed.");
1793 }
1794 const int kListSize = 4;
1795 // Get Point
1796 cv::Point2f cv_src_point[kListSize];
1797 cv::Point2f cv_dst_point[kListSize];
1798 for (int i = 0; i < kListSize; i++) {
1799 cv_src_point[i] = cv::Point2f(static_cast<float>(start_points[i][0]), static_cast<float>(start_points[i][1]));
1800 cv_dst_point[i] = cv::Point2f(static_cast<float>(end_points[i][0]), static_cast<float>(end_points[i][1]));
1801 }
1802
1803 // Perspective Operation
1804 std::shared_ptr<CVTensor> output_cv;
1805 cv::Mat M = cv::getPerspectiveTransform(cv_src_point, cv_dst_point, cv::DECOMP_LU);
1806 cv::Mat src_img = input_cv->mat();
1807
1808 cv::Mat dst_img;
1809 cv::warpPerspective(src_img, dst_img, M, src_img.size(), GetCVInterpolationMode(interpolation));
1810 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(dst_img, input_cv->Rank(), &output_cv));
1811 *output = std::static_pointer_cast<Tensor>(output_cv);
1812 return Status::OK();
1813 } catch (const cv::Exception &e) {
1814 RETURN_STATUS_UNEXPECTED("Perspective: " + std::string(e.what()));
1815 }
1816 }
1817
RandomLighting(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float rnd_r,float rnd_g,float rnd_b)1818 Status RandomLighting(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rnd_r, float rnd_g,
1819 float rnd_b) {
1820 try {
1821 RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input, output));
1822 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(*output);
1823 cv::Mat input_img = input_cv->mat();
1824
1825 if (!input_cv->mat().data) {
1826 RETURN_STATUS_UNEXPECTED(
1827 "RandomLighting: Cannot convert from OpenCV type, unknown "
1828 "CV type. Currently supported data type: [int8, uint8, int16, uint16, "
1829 "int32, float16, float32, float64].");
1830 }
1831
1832 if (input_cv->Rank() != kDefaultImageRank || input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
1833 RETURN_STATUS_UNEXPECTED(
1834 "RandomLighting: input tensor is not in shape of <H,W,C> or channel is not 3, got rank: " +
1835 std::to_string(input_cv->Rank()) + ", and channel: " + std::to_string(input_cv->shape()[kChannelIndexHWC]));
1836 }
1837 auto input_type = input->type();
1838 CHECK_FAIL_RETURN_UNEXPECTED(input_type != DataType::DE_UINT32 && input_type != DataType::DE_UINT64 &&
1839 input_type != DataType::DE_INT64 && !input_type.IsString(),
1840 "RandomLighting: invalid tensor type of uint32, int64, uint64, string or bytes.");
1841
1842 std::vector<std::vector<float>> eig = {{55.46 * -0.5675, 4.794 * 0.7192, 1.148 * 0.4009},
1843 {55.46 * -0.5808, 4.794 * -0.0045, 1.148 * -0.8140},
1844 {55.46 * -0.5836, 4.794 * -0.6948, 1.148 * 0.4203}};
1845
1846 float pca_r = eig[0][0] * rnd_r + eig[0][1] * rnd_g + eig[0][2] * rnd_b;
1847 float pca_g = eig[1][0] * rnd_r + eig[1][1] * rnd_g + eig[1][2] * rnd_b;
1848 float pca_b = eig[2][0] * rnd_r + eig[2][1] * rnd_g + eig[2][2] * rnd_b;
1849 for (int row = 0; row < input_img.rows; row++) {
1850 for (int col = 0; col < input_img.cols; col++) {
1851 auto r = static_cast<float>(input_img.at<cv::Vec3b>(row, col)[0]);
1852 auto g = static_cast<float>(input_img.at<cv::Vec3b>(row, col)[1]);
1853 auto b = static_cast<float>(input_img.at<cv::Vec3b>(row, col)[2]);
1854 input_img.at<cv::Vec3b>(row, col)[kRIndex] = cv::saturate_cast<uchar>(r + pca_r);
1855 input_img.at<cv::Vec3b>(row, col)[kGIndex] = cv::saturate_cast<uchar>(g + pca_g);
1856 input_img.at<cv::Vec3b>(row, col)[kBIndex] = cv::saturate_cast<uchar>(b + pca_b);
1857 }
1858 }
1859
1860 *output = std::static_pointer_cast<Tensor>(input_cv);
1861 return Status::OK();
1862 } catch (const cv::Exception &e) {
1863 RETURN_STATUS_UNEXPECTED("RandomLighting: " + std::string(e.what()));
1864 }
1865 }
1866
RgbaToRgb(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1867 Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1868 try {
1869 RETURN_IF_NOT_OK(ValidateImage(input, "RgbaToRgb", {3, 5, 11}));
1870 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1871 if (input_cv->shape().Size() != kDefaultImageChannel || input_cv->shape()[kChannelIndexHWC] != kMaxImageChannel) {
1872 std::string err_msg =
1873 "RgbaToRgb: rank of image is not: " + std::to_string(kDefaultImageChannel) +
1874 ", but got: " + std::to_string(input_cv->shape().Size()) +
1875 ", or channels of image should be 4, but got: " + std::to_string(input_cv->shape()[kChannelIndexHWC]);
1876 RETURN_STATUS_UNEXPECTED(err_msg);
1877 }
1878 TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3});
1879 std::shared_ptr<CVTensor> output_cv;
1880 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1881 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGBA2RGB));
1882 *output = std::static_pointer_cast<Tensor>(output_cv);
1883 return Status::OK();
1884 } catch (const cv::Exception &e) {
1885 RETURN_STATUS_UNEXPECTED("RgbaToRgb: " + std::string(e.what()));
1886 }
1887 }
1888
RgbaToBgr(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1889 Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1890 try {
1891 RETURN_IF_NOT_OK(ValidateImage(input, "RgbaToBgr", {3, 5, 11}));
1892 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1893 if (input_cv->shape().Size() != kDefaultImageChannel || input_cv->shape()[kChannelIndexHWC] != kMaxImageChannel) {
1894 std::string err_msg =
1895 "RgbaToBgr: rank of image is not: " + std::to_string(kDefaultImageChannel) +
1896 ", but got: " + std::to_string(input_cv->shape().Size()) +
1897 ", or channels of image should be 4, but got: " + std::to_string(input_cv->shape()[kChannelIndexHWC]);
1898 RETURN_STATUS_UNEXPECTED(err_msg);
1899 }
1900 TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3});
1901 std::shared_ptr<CVTensor> output_cv;
1902 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1903 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGBA2BGR));
1904 *output = std::static_pointer_cast<Tensor>(output_cv);
1905 return Status::OK();
1906 } catch (const cv::Exception &e) {
1907 RETURN_STATUS_UNEXPECTED("RgbaToBgr: " + std::string(e.what()));
1908 }
1909 }
1910
RgbToBgr(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1911 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1912 try {
1913 RETURN_IF_NOT_OK(ValidateImage(input, "RgbToBgr", {3, 4, 5, 6, 10, 11, 12}));
1914 auto input_type = input->type();
1915 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1916 if (!input_cv->mat().data) {
1917 RETURN_STATUS_UNEXPECTED("[Internal ERROR] RgbToBgr: load image failed.");
1918 }
1919 if (input_cv->Rank() != kDefaultImageRank || input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
1920 RETURN_STATUS_UNEXPECTED("RgbToBgr: input tensor is not in shape of <H,W,C> or channel is not 3, got rank: " +
1921 std::to_string(input_cv->Rank()) +
1922 ", and channel: " + std::to_string(input_cv->shape()[2]));
1923 }
1924
1925 cv::Mat image = input_cv->mat().clone();
1926 if (input_type == DataType::DE_FLOAT16 || input_type == DataType::DE_INT16 || input_type == DataType::DE_UINT16) {
1927 for (int i = 0; i < input_cv->mat().rows; ++i) {
1928 auto *p1 = input_cv->mat().ptr<cv::Vec3s>(i);
1929 auto *p2 = image.ptr<cv::Vec3s>(i);
1930 for (int j = 0; j < input_cv->mat().cols; ++j) {
1931 p2[j][kBIndex] = p1[j][kRIndex];
1932 p2[j][kGIndex] = p1[j][kGIndex];
1933 p2[j][kRIndex] = p1[j][kBIndex];
1934 }
1935 }
1936 } else if (input_type == DataType::DE_FLOAT32 || input_type == DataType::DE_INT32) {
1937 for (int i = 0; i < input_cv->mat().rows; ++i) {
1938 auto *p1 = input_cv->mat().ptr<cv::Vec3f>(i);
1939 auto *p2 = image.ptr<cv::Vec3f>(i);
1940 for (int j = 0; j < input_cv->mat().cols; ++j) {
1941 p2[j][kBIndex] = p1[j][kRIndex];
1942 p2[j][kGIndex] = p1[j][kGIndex];
1943 p2[j][kRIndex] = p1[j][kBIndex];
1944 }
1945 }
1946 } else if (input_type == DataType::DE_FLOAT64) {
1947 for (int i = 0; i < input_cv->mat().rows; ++i) {
1948 auto *p1 = input_cv->mat().ptr<cv::Vec3d>(i);
1949 auto *p2 = image.ptr<cv::Vec3d>(i);
1950 for (int j = 0; j < input_cv->mat().cols; ++j) {
1951 p2[j][kBIndex] = p1[j][kRIndex];
1952 p2[j][kGIndex] = p1[j][kGIndex];
1953 p2[j][kRIndex] = p1[j][kBIndex];
1954 }
1955 }
1956 } else {
1957 cv::cvtColor(input_cv->mat(), image, cv::COLOR_RGB2BGR);
1958 }
1959
1960 std::shared_ptr<CVTensor> output_cv;
1961 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(image, input_cv->Rank(), &output_cv));
1962
1963 *output = std::static_pointer_cast<Tensor>(output_cv);
1964 return Status::OK();
1965 } catch (const cv::Exception &e) {
1966 RETURN_STATUS_UNEXPECTED("RgbToBgr: " + std::string(e.what()));
1967 }
1968 }
1969
RgbToGray(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)1970 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
1971 try {
1972 RETURN_IF_NOT_OK(ValidateImage(input, "RgbToGray", {3, 5, 11}));
1973 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
1974 if (input_cv->Rank() != kDefaultImageRank || input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel) {
1975 RETURN_STATUS_UNEXPECTED("RgbToGray: image shape is not <H,W,C> or channel is not 3, got rank: " +
1976 std::to_string(input_cv->Rank()) + ", and shape: " + input_cv->shape().ToString());
1977 }
1978 TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1]});
1979 std::shared_ptr<CVTensor> output_cv;
1980 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
1981 cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGB2GRAY));
1982 *output = std::static_pointer_cast<Tensor>(output_cv);
1983 return Status::OK();
1984 } catch (const cv::Exception &e) {
1985 RETURN_STATUS_UNEXPECTED("RgbToGray: " + std::string(e.what()));
1986 }
1987 }
1988
GetJpegImageInfo(const std::shared_ptr<Tensor> & input,int * img_width,int * img_height)1989 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height) {
1990 struct jpeg_decompress_struct cinfo {};
1991 struct JpegErrorManagerCustom jerr {};
1992 cinfo.err = jpeg_std_error(&jerr.pub);
1993 jerr.pub.error_exit = JpegErrorExitCustom;
1994 try {
1995 jpeg_create_decompress(&cinfo);
1996 if (input->type() == DataType::DE_BYTES) {
1997 uint32_t len = 0;
1998 RETURN_IF_NOT_OK(input->GetStringLength(&len));
1999 JpegSetSource(&cinfo, input->GetStringsBuffer(), len);
2000 } else {
2001 JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes());
2002 }
2003 (void)jpeg_read_header(&cinfo, TRUE);
2004 jpeg_calc_output_dimensions(&cinfo);
2005 RETURN_IF_NOT_OK(CheckJpegExit(&cinfo));
2006 } catch (std::runtime_error &e) {
2007 jpeg_destroy_decompress(&cinfo);
2008 RETURN_STATUS_UNEXPECTED(e.what());
2009 }
2010 *img_height = static_cast<int>(cinfo.output_height);
2011 *img_width = static_cast<int>(cinfo.output_width);
2012 jpeg_destroy_decompress(&cinfo);
2013 return Status::OK();
2014 }
2015
GetAffineMatrix(const std::shared_ptr<Tensor> & input,std::vector<float_t> * matrix,float_t degrees,const std::vector<float_t> & translation,float_t scale,const std::vector<float_t> & shear)2016 Status GetAffineMatrix(const std::shared_ptr<Tensor> &input, std::vector<float_t> *matrix, float_t degrees,
2017 const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear) {
2018 CHECK_FAIL_RETURN_UNEXPECTED(translation.size() >= 2, "AffineOp::Compute translation_ size should >= 2");
2019 float_t translation_x = translation[0];
2020 float_t translation_y = translation[1];
2021 float_t degrees_tmp = 0.0;
2022 RETURN_IF_NOT_OK(DegreesToRadians(degrees, °rees_tmp));
2023 CHECK_FAIL_RETURN_UNEXPECTED(shear.size() >= 2, "AffineOp::Compute shear_ size should >= 2");
2024 float_t shear_x = shear[0];
2025 float_t shear_y = shear[1];
2026 RETURN_IF_NOT_OK(DegreesToRadians(shear_x, &shear_x));
2027 RETURN_IF_NOT_OK(DegreesToRadians(-1 * shear_y, &shear_y));
2028
2029 // Apply Affine Transformation
2030 // T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
2031 // C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1]
2032 // RSS is rotation with scale and shear matrix
2033 // RSS(a, s, (sx, sy)) =
2034 // = R(a) * S(s) * SHy(sy) * SHx(sx)
2035 // = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ]
2036 // [ s*sin(a - sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ]
2037 // [ 0 , 0 , 1 ]
2038 //
2039 // where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears:
2040 // SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0]
2041 // [0, 1 ] [-tan(s), 1]
2042 //
2043 // Thus, the affine matrix is M = T * C * RSS * C^-1
2044
2045 // image is hwc, rows = shape()[0]
2046 float_t cx = static_cast<float_t>(input->shape()[1] - 1) / 2.0F;
2047 float_t cy = static_cast<float_t>(input->shape()[0] - 1) / 2.0F;
2048
2049 CHECK_FAIL_RETURN_UNEXPECTED(cos(shear_y) != 0.0, "AffineOp: cos(shear_y) should not be zero.");
2050
2051 // Calculate RSS
2052 *matrix = std::vector<float_t>{
2053 static_cast<float>(scale * cos(degrees_tmp + shear_y) / cos(shear_y)),
2054 static_cast<float>(scale * (-1 * cos(degrees_tmp + shear_y) * tan(shear_x) / cos(shear_y) - sin(degrees_tmp))),
2055 0,
2056 static_cast<float>(scale * sin(degrees_tmp + shear_y) / cos(shear_y)),
2057 static_cast<float>(scale * (-1 * sin(degrees_tmp + shear_y) * tan(shear_x) / cos(shear_y) + cos(degrees_tmp))),
2058 0};
2059 // Compute T * C * RSS * C^-1
2060 // Compute T * C * RSS * C^-1
2061 (*matrix)[2] = (1 - (*matrix)[0]) * cx - (*matrix)[1] * cy + translation_x;
2062 (*matrix)[5] = (1 - (*matrix)[4]) * cy - (*matrix)[3] * cx + translation_y;
2063 return Status::OK();
2064 }
2065
Affine(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float_t degrees,const std::vector<float_t> & translation,float_t scale,const std::vector<float_t> & shear,InterpolationMode interpolation,const std::vector<uint8_t> & fill_value)2066 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float_t degrees,
2067 const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear,
2068 InterpolationMode interpolation, const std::vector<uint8_t> &fill_value) {
2069 try {
2070 RETURN_IF_NOT_OK(ValidateImageRank("Affine", input->Rank()));
2071 dsize_t channel = 1;
2072 RETURN_IF_NOT_OK(ImageNumChannels(input, &channel));
2073 CHECK_FAIL_RETURN_UNEXPECTED(channel <= kMaxImageChannel || interpolation != InterpolationMode::kCubic,
2074 "Affine: interpolation can not be CUBIC when image channel is greater than 4.");
2075 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2076 if (!input_cv->mat().data) {
2077 RETURN_STATUS_UNEXPECTED("[Internal ERROR] Affine: load image failed.");
2078 }
2079
2080 std::vector<float_t> matrix;
2081 RETURN_IF_NOT_OK(GetAffineMatrix(input_cv, &matrix, degrees, translation, scale, shear));
2082 cv::Mat affine_mat(matrix);
2083 affine_mat = affine_mat.reshape(1, {2, 3});
2084
2085 std::shared_ptr<CVTensor> output_cv;
2086 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
2087 RETURN_UNEXPECTED_IF_NULL(output_cv);
2088 cv::warpAffine(input_cv->mat(), output_cv->mat(), affine_mat, input_cv->mat().size(),
2089 GetCVInterpolationMode(interpolation), cv::BORDER_CONSTANT,
2090 cv::Scalar(fill_value[kRIndex], fill_value[kGIndex], fill_value[kBIndex]));
2091 (*output) = std::static_pointer_cast<Tensor>(output_cv);
2092 return Status::OK();
2093 } catch (const cv::Exception &e) {
2094 RETURN_STATUS_UNEXPECTED("Affine: " + std::string(e.what()));
2095 }
2096 }
2097
GaussianBlur(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int32_t kernel_x,int32_t kernel_y,float sigma_x,float sigma_y)2098 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_x,
2099 int32_t kernel_y, float sigma_x, float sigma_y) {
2100 try {
2101 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2102 if (input_cv->mat().data == nullptr) {
2103 RETURN_STATUS_UNEXPECTED("[Internal ERROR] GaussianBlur: load image failed.");
2104 }
2105 cv::Mat output_cv_mat;
2106 cv::GaussianBlur(input_cv->mat(), output_cv_mat, cv::Size(kernel_x, kernel_y), static_cast<double>(sigma_x),
2107 static_cast<double>(sigma_y));
2108 std::shared_ptr<CVTensor> output_cv;
2109 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, input_cv->Rank(), &output_cv));
2110 (*output) = std::static_pointer_cast<Tensor>(output_cv);
2111 return Status::OK();
2112 } catch (const cv::Exception &e) {
2113 RETURN_STATUS_UNEXPECTED("GaussianBlur: " + std::string(e.what()));
2114 }
2115 }
2116
ComputePatchSize(const std::shared_ptr<CVTensor> & input_cv,std::shared_ptr<std::pair<int32_t,int32_t>> * patch_size,int32_t num_height,int32_t num_width,SliceMode slice_mode)2117 Status ComputePatchSize(const std::shared_ptr<CVTensor> &input_cv,
2118 std::shared_ptr<std::pair<int32_t, int32_t>> *patch_size, int32_t num_height, int32_t num_width,
2119 SliceMode slice_mode) {
2120 if (input_cv->mat().data == nullptr) {
2121 RETURN_STATUS_UNEXPECTED("[Internal ERROR] SlicePatches: Tensor could not convert to CV Tensor.");
2122 }
2123 RETURN_IF_NOT_OK(ValidateImageRank("Affine", input_cv->Rank()));
2124
2125 cv::Mat in_img = input_cv->mat();
2126 cv::Size s = in_img.size();
2127 if (num_height == 0 || num_height > s.height) {
2128 RETURN_STATUS_UNEXPECTED(
2129 "SlicePatches: The number of patches on height axis equals 0 or is greater than height, got number of patches:" +
2130 std::to_string(num_height));
2131 }
2132 if (num_width == 0 || num_width > s.width) {
2133 RETURN_STATUS_UNEXPECTED(
2134 "SlicePatches: The number of patches on width axis equals 0 or is greater than width, got number of patches:" +
2135 std::to_string(num_width));
2136 }
2137 int32_t patch_h = s.height / num_height;
2138 if (s.height % num_height != 0) {
2139 if (slice_mode == SliceMode::kPad) {
2140 patch_h += 1; // patch_h * num_height - s.height
2141 }
2142 }
2143 int32_t patch_w = s.width / num_width;
2144 if (s.width % num_width != 0) {
2145 if (slice_mode == SliceMode::kPad) {
2146 patch_w += 1; // patch_w * num_width - s.width
2147 }
2148 }
2149 (*patch_size)->first = patch_h;
2150 (*patch_size)->second = patch_w;
2151 return Status::OK();
2152 }
2153
SlicePatches(const std::shared_ptr<Tensor> & input,std::vector<std::shared_ptr<Tensor>> * output,int32_t num_height,int32_t num_width,SliceMode slice_mode,uint8_t fill_value)2154 Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::shared_ptr<Tensor>> *output,
2155 int32_t num_height, int32_t num_width, SliceMode slice_mode, uint8_t fill_value) {
2156 if (num_height == DEFAULT_NUM_HEIGHT && num_width == DEFAULT_NUM_WIDTH) {
2157 (*output).push_back(input);
2158 return Status::OK();
2159 }
2160
2161 auto patch_size = std::make_shared<std::pair<int32_t, int32_t>>(0, 0);
2162 int32_t patch_h = 0;
2163 int32_t patch_w = 0;
2164
2165 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2166 RETURN_IF_NOT_OK(ComputePatchSize(input_cv, &patch_size, num_height, num_width, slice_mode));
2167 std::tie(patch_h, patch_w) = *patch_size;
2168
2169 cv::Mat in_img = input_cv->mat();
2170 cv::Size s = in_img.size();
2171 try {
2172 cv::Mat out_img;
2173 if (slice_mode == SliceMode::kPad) { // padding on right and bottom directions
2174 auto padding_h = patch_h * num_height - s.height;
2175 auto padding_w = patch_w * num_width - s.width;
2176 out_img = cv::Mat(s.height + padding_h, s.width + padding_w, in_img.type(), cv::Scalar::all(fill_value));
2177 in_img.copyTo(out_img(cv::Rect(0, 0, s.width, s.height)));
2178 } else {
2179 out_img = in_img;
2180 }
2181 for (int i = 0; i < num_height; ++i) {
2182 for (int j = 0; j < num_width; ++j) {
2183 std::shared_ptr<CVTensor> patch_cv;
2184 cv::Rect rect(j * patch_w, i * patch_h, patch_w, patch_h);
2185 cv::Mat patch(out_img(rect));
2186 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(patch, input_cv->Rank(), &patch_cv));
2187 (*output).push_back(std::static_pointer_cast<Tensor>(patch_cv));
2188 }
2189 }
2190 return Status::OK();
2191 } catch (const cv::Exception &e) {
2192 RETURN_STATUS_UNEXPECTED("SlicePatches: " + std::string(e.what()));
2193 }
2194 }
2195
Solarize(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::vector<float> & threshold)2196 Status Solarize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
2197 const std::vector<float> &threshold) {
2198 try {
2199 RETURN_IF_NOT_OK(ValidateImage(input, "Solarize", {1, 2, 3, 4, 5, 6, 11, 12}, {2, 3}, {1, 3}));
2200 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2201 cv::Mat input_img = input_cv->mat();
2202 if (!input_cv->mat().data) {
2203 RETURN_STATUS_UNEXPECTED("Solarize: load image failed.");
2204 }
2205
2206 std::shared_ptr<CVTensor> mask_mat_tensor;
2207 std::shared_ptr<CVTensor> output_cv_tensor;
2208 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_img, input_cv->Rank(), &mask_mat_tensor));
2209
2210 RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv_tensor));
2211 RETURN_UNEXPECTED_IF_NULL(mask_mat_tensor);
2212 RETURN_UNEXPECTED_IF_NULL(output_cv_tensor);
2213
2214 auto threshold_min = threshold[0], threshold_max = threshold[1];
2215
2216 if (threshold_min == threshold_max) {
2217 mask_mat_tensor->mat().setTo(0, ~(input_cv->mat() >= threshold_min));
2218 } else {
2219 mask_mat_tensor->mat().setTo(0, ~((input_cv->mat() >= threshold_min) & (input_cv->mat() <= threshold_max)));
2220 }
2221
2222 // solarize desired portion
2223 const float max_size = 255.f;
2224 output_cv_tensor->mat() = cv::Scalar::all(max_size) - mask_mat_tensor->mat();
2225 input_cv->mat().copyTo(output_cv_tensor->mat(), input_cv->mat() < threshold_min);
2226 if (threshold_min < threshold_max) {
2227 input_cv->mat().copyTo(output_cv_tensor->mat(), input_cv->mat() > threshold_max);
2228 }
2229
2230 *output = std::static_pointer_cast<Tensor>(output_cv_tensor);
2231 }
2232
2233 catch (const cv::Exception &e) {
2234 RETURN_STATUS_UNEXPECTED("Solarize: " + std::string(e.what()));
2235 }
2236 return Status::OK();
2237 }
2238
ToTensor(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const DataType & data_type)2239 Status ToTensor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) {
2240 try {
2241 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
2242 if (!input_cv->mat().data) {
2243 RETURN_STATUS_UNEXPECTED("[Internal ERROR] ToTensor: load image failed.");
2244 }
2245 if (input_cv->Rank() == kMinImageRank) {
2246 // If input tensor is 2D, we assume we have HW dimensions
2247 RETURN_IF_NOT_OK(input_cv->ExpandDim(kMinImageRank));
2248 }
2249 CHECK_FAIL_RETURN_UNEXPECTED(
2250 input_cv->shape().Size() > kChannelIndexHWC,
2251 "ToTensor: rank of input data should be greater than: " + std::to_string(kChannelIndexHWC) +
2252 ", but got:" + std::to_string(input_cv->shape().Size()));
2253 int num_channels = static_cast<int>(input_cv->shape()[kChannelIndexHWC]);
2254 if (input_cv->shape().Size() != kDefaultImageRank) {
2255 RETURN_STATUS_UNEXPECTED("ToTensor: image shape should be <H,W,C>, but got rank: " +
2256 std::to_string(input_cv->shape().Size()));
2257 }
2258
2259 int height = static_cast<int>(input_cv->shape()[0]);
2260 int width = static_cast<int>(input_cv->shape()[1]);
2261
2262 // OpenCv has a bug in extractChannel when the type is float16.
2263 // To avoid the segfault, we cast to float32 first.
2264 if (input_cv->type() == DataType(DataType::DE_FLOAT16)) {
2265 RETURN_IF_NOT_OK(TypeCast(input_cv, output, DataType(DataType::DE_FLOAT32)));
2266 input_cv = CVTensor::AsCVTensor(*output);
2267 }
2268
2269 std::shared_ptr<CVTensor> output_cv;
2270 // Reshape from HCW to CHW
2271 RETURN_IF_NOT_OK(
2272 CVTensor::CreateEmpty(TensorShape{num_channels, height, width}, DataType(DataType::DE_FLOAT32), &output_cv));
2273 // Rescale tensor by dividing by 255
2274 const auto kMaxBitValueinFloat = static_cast<float>(kMaxBitValue);
2275 for (int i = 0; i < num_channels; ++i) {
2276 cv::Mat mat_t;
2277 cv::extractChannel(input_cv->mat(), mat_t, i);
2278 cv::Mat mat;
2279 RETURN_IF_NOT_OK(output_cv->MatAtIndex({i}, &mat));
2280 mat_t.convertTo(mat, CV_32F, 1 / kMaxBitValueinFloat, 0);
2281 }
2282
2283 // Process tensor output according to desired output data type
2284 if (data_type != DataType(DataType::DE_FLOAT32)) {
2285 RETURN_IF_NOT_OK(TypeCast(output_cv, output, data_type));
2286 } else {
2287 *output = std::move(output_cv);
2288 }
2289 return Status::OK();
2290 } catch (const cv::Exception &e) {
2291 RETURN_STATUS_UNEXPECTED("ToTensor: " + std::string(e.what()));
2292 }
2293 }
2294
2295 // round half to even
Round(float value)2296 float Round(float value) {
2297 const int32_t kEven = 2;
2298 float rnd = round(value);
2299 float rnd_l = floor(value);
2300 float rnd_h = ceil(value);
2301 if (value - rnd_l == kHalf) {
2302 if (common::IsDoubleEqual(fmod(rnd, kEven), 0.0)) {
2303 return rnd;
2304 } else if (value > 0) {
2305 return rnd_l;
2306 } else {
2307 return rnd_h;
2308 }
2309 }
2310 return rnd;
2311 }
2312
Linspace(float start,float end,int n,float scale,float offset,bool round)2313 std::vector<float> Linspace(float start, float end, int n, float scale, float offset, bool round) {
2314 std::vector<float> linear(n);
2315 float step = (n == 1) ? 0 : (end - start) / static_cast<float>(n - 1);
2316 for (size_t i = 0; i < linear.size(); ++i) {
2317 linear[i] = (start + static_cast<float>(i) * step) * scale + offset;
2318 if (round) {
2319 linear[i] = Round(linear[i]);
2320 }
2321 }
2322 return linear;
2323 }
2324
ApplyAugment(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::string & op_name,float magnitude,InterpolationMode interpolation,const std::vector<uint8_t> & fill_value)2325 Status ApplyAugment(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::string &op_name,
2326 float magnitude, InterpolationMode interpolation, const std::vector<uint8_t> &fill_value) {
2327 if (op_name == "ShearX") {
2328 float_t shear = magnitude * 180.F / CV_PI;
2329 AffineOp affine(0.0, {0, 0}, 1.0, {shear, 0.0}, interpolation, fill_value);
2330 RETURN_IF_NOT_OK(affine.Compute(input, output));
2331 } else if (op_name == "ShearY") {
2332 float_t shear = magnitude * 180.F / CV_PI;
2333 AffineOp affine(0.0, {0, 0}, 1.0, {0.0, shear}, interpolation, fill_value);
2334 RETURN_IF_NOT_OK(affine.Compute(input, output));
2335 } else if (op_name == "TranslateX") {
2336 float_t translate = magnitude;
2337 AffineOp affine(0.0, {translate, 0}, 1.0, {0.0, 0.0}, interpolation, fill_value);
2338 RETURN_IF_NOT_OK(affine.Compute(input, output));
2339 } else if (op_name == "TranslateY") {
2340 float_t translate = magnitude;
2341 AffineOp affine(0.0, {0, translate}, 1.0, {0.0, 0.0}, interpolation, fill_value);
2342 RETURN_IF_NOT_OK(affine.Compute(input, output));
2343 } else if (op_name == "Rotate") {
2344 RETURN_IF_NOT_OK(Rotate(input, output, {}, magnitude, interpolation, false, fill_value[kRIndex],
2345 fill_value[kBIndex], fill_value[kGIndex]));
2346 } else if (op_name == "Brightness") {
2347 RETURN_IF_NOT_OK(AdjustBrightness(input, output, 1 + magnitude));
2348 } else if (op_name == "Color") {
2349 RETURN_IF_NOT_OK(AdjustSaturation(input, output, 1 + magnitude));
2350 } else if (op_name == "Contrast") {
2351 RETURN_IF_NOT_OK(AdjustContrast(input, output, 1 + magnitude));
2352 } else if (op_name == "Sharpness") {
2353 SharpnessOp sharpness(1 + magnitude);
2354 RETURN_IF_NOT_OK(sharpness.Compute(input, output));
2355 } else if (op_name == "Posterize") {
2356 PosterizeOp posterize(static_cast<int>(magnitude));
2357 RETURN_IF_NOT_OK(posterize.Compute(input, output));
2358 } else if (op_name == "Solarize") {
2359 RETURN_IF_NOT_OK(Solarize(input, output, {magnitude, magnitude}));
2360 } else if (op_name == "AutoContrast") {
2361 RETURN_IF_NOT_OK(AutoContrast(input, output, 0.0, {}));
2362 } else if (op_name == "Equalize") {
2363 RETURN_IF_NOT_OK(Equalize(input, output));
2364 } else if (op_name == "Identity") {
2365 *output = std::static_pointer_cast<Tensor>(input);
2366 } else if (op_name == "Invert") {
2367 InvertOp invert;
2368 RETURN_IF_NOT_OK(invert.Compute(input, output));
2369 } else {
2370 RETURN_STATUS_UNEXPECTED("ApplyAugment: the provided operator " + op_name + " is not supported.");
2371 }
2372 return Status::OK();
2373 }
2374
EncodeJpeg(const std::shared_ptr<Tensor> & image,std::shared_ptr<Tensor> * output,int quality)2375 Status EncodeJpeg(const std::shared_ptr<Tensor> &image, std::shared_ptr<Tensor> *output, int quality) {
2376 RETURN_UNEXPECTED_IF_NULL(output);
2377
2378 std::string err_msg;
2379 if (image->type() != DataType::DE_UINT8) {
2380 err_msg = "EncodeJpeg: The type of the image data should be UINT8, but got " + image->type().ToString() + ".";
2381 RETURN_STATUS_UNEXPECTED(err_msg);
2382 }
2383
2384 TensorShape shape = image->shape();
2385 int rank = static_cast<int>(shape.Rank());
2386 if (rank < kMinImageRank || rank > kDefaultImageRank) {
2387 err_msg = "EncodeJpeg: The image has invalid dimensions. It should have two or three dimensions, but got ";
2388 err_msg += std::to_string(rank) + " dimensions.";
2389 RETURN_STATUS_UNEXPECTED(err_msg);
2390 }
2391 int channels;
2392 if (rank == kDefaultImageRank) {
2393 channels = static_cast<int>(shape[kMinImageRank]);
2394 if (channels != kMinImageChannel && channels != kDefaultImageChannel) {
2395 err_msg = "EncodeJpeg: The image has invalid channels. It should have 1 or 3 channels, but got ";
2396 err_msg += std::to_string(channels) + " channels.";
2397 RETURN_STATUS_UNEXPECTED(err_msg);
2398 }
2399 } else {
2400 channels = 1;
2401 }
2402
2403 if (quality < kMinJpegQuality || quality > kMaxJpegQuality) {
2404 err_msg = "EncodeJpeg: Invalid quality " + std::to_string(quality) + ", should be in range of [" +
2405 std::to_string(kMinJpegQuality) + ", " + std::to_string(kMaxJpegQuality) + "].";
2406
2407 RETURN_STATUS_UNEXPECTED(err_msg);
2408 }
2409
2410 std::vector<int> params = {cv::IMWRITE_JPEG_QUALITY, quality, cv::IMWRITE_JPEG_PROGRESSIVE, 0,
2411 cv::IMWRITE_JPEG_OPTIMIZE, 0, cv::IMWRITE_JPEG_RST_INTERVAL, 0};
2412
2413 std::vector<unsigned char> buffer;
2414 cv::Mat image_matrix;
2415
2416 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(image);
2417 image_matrix = input_cv->mat();
2418 if (!image_matrix.data) {
2419 RETURN_STATUS_UNEXPECTED("EncodeJpeg: Load the image tensor failed.");
2420 }
2421
2422 if (channels == kMinImageChannel) {
2423 CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".JPEG", image_matrix, buffer, params),
2424 "EncodeJpeg: Failed to encode image.");
2425 } else {
2426 cv::Mat image_bgr;
2427 cv::cvtColor(image_matrix, image_bgr, cv::COLOR_RGB2BGR);
2428 CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".JPEG", image_bgr, buffer, params),
2429 "EncodeJpeg: Failed to encode image.");
2430 }
2431
2432 TensorShape tensor_shape = TensorShape({(long int)buffer.size()});
2433 RETURN_IF_NOT_OK(Tensor::CreateFromMemory(tensor_shape, DataType(DataType::DE_UINT8), buffer.data(), output));
2434
2435 return Status::OK();
2436 }
2437
EncodePng(const std::shared_ptr<Tensor> & image,std::shared_ptr<Tensor> * output,int compression_level)2438 Status EncodePng(const std::shared_ptr<Tensor> &image, std::shared_ptr<Tensor> *output, int compression_level) {
2439 RETURN_UNEXPECTED_IF_NULL(output);
2440
2441 std::string err_msg;
2442 if (image->type() != DataType::DE_UINT8) {
2443 err_msg = "EncodePng: The type of the image data should be UINT8, but got " + image->type().ToString() + ".";
2444 RETURN_STATUS_UNEXPECTED(err_msg);
2445 }
2446
2447 TensorShape shape = image->shape();
2448 int rank = static_cast<int>(shape.Rank());
2449 if (rank < kMinImageRank || rank > kDefaultImageRank) {
2450 err_msg = "EncodePng: The image has invalid dimensions. It should have two or three dimensions, but got ";
2451 err_msg += std::to_string(rank) + " dimensions.";
2452 RETURN_STATUS_UNEXPECTED(err_msg);
2453 }
2454 int channels;
2455 if (rank == kDefaultImageRank) {
2456 channels = static_cast<int>(shape[kMinImageRank]);
2457 if (channels != kMinImageChannel && channels != kDefaultImageChannel) {
2458 err_msg = "EncodePng: The image has invalid channels. It should have 1 or 3 channels, but got ";
2459 err_msg += std::to_string(channels) + " channels.";
2460 RETURN_STATUS_UNEXPECTED(err_msg);
2461 }
2462 } else {
2463 channels = 1;
2464 }
2465
2466 if (compression_level < kMinPngCompression || compression_level > kMaxPngCompression) {
2467 err_msg = "EncodePng: Invalid compression_level " + std::to_string(compression_level) +
2468 ", should be in range of [" + std::to_string(kMinPngCompression) + ", " +
2469 std::to_string(kMaxPngCompression) + "].";
2470 RETURN_STATUS_UNEXPECTED(err_msg);
2471 }
2472
2473 std::vector<int> params = {cv::IMWRITE_PNG_COMPRESSION, compression_level, cv::IMWRITE_PNG_STRATEGY,
2474 cv::IMWRITE_PNG_STRATEGY_RLE};
2475 std::vector<unsigned char> buffer;
2476 cv::Mat image_matrix;
2477
2478 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(image);
2479 image_matrix = input_cv->mat();
2480 if (!image_matrix.data) {
2481 RETURN_STATUS_UNEXPECTED("EncodePng: Load the image tensor failed.");
2482 }
2483
2484 if (channels == kMinImageChannel) {
2485 CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".PNG", image_matrix, buffer, params),
2486 "EncodePng: Failed to encode image.");
2487 } else {
2488 cv::Mat image_bgr;
2489 cv::cvtColor(image_matrix, image_bgr, cv::COLOR_RGB2BGR);
2490 CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".PNG", image_bgr, buffer, params), "EncodePng: Failed to encode image.");
2491 }
2492
2493 TensorShape tensor_shape = TensorShape({(long int)buffer.size()});
2494 RETURN_IF_NOT_OK(Tensor::CreateFromMemory(tensor_shape, DataType(DataType::DE_UINT8), buffer.data(), output));
2495
2496 return Status::OK();
2497 }
2498
ReadFile(const std::string & filename,std::shared_ptr<Tensor> * output)2499 Status ReadFile(const std::string &filename, std::shared_ptr<Tensor> *output) {
2500 RETURN_UNEXPECTED_IF_NULL(output);
2501
2502 auto realpath = FileUtils::GetRealPath(filename.c_str());
2503 if (!realpath.has_value()) {
2504 RETURN_STATUS_UNEXPECTED("ReadFile: Invalid file path, " + filename + " does not exist.");
2505 }
2506 if (!Path(realpath.value()).IsFile()) {
2507 RETURN_STATUS_UNEXPECTED("ReadFile: Invalid file path, " + filename + " is not a regular file.");
2508 }
2509
2510 RETURN_IF_NOT_OK(Tensor::CreateFromFile(realpath.value(), output));
2511 return Status::OK();
2512 }
2513
ReadImage(const std::string & filename,std::shared_ptr<Tensor> * output,ImageReadMode mode)2514 Status ReadImage(const std::string &filename, std::shared_ptr<Tensor> *output, ImageReadMode mode) {
2515 RETURN_UNEXPECTED_IF_NULL(output);
2516
2517 auto realpath = FileUtils::GetRealPath(filename.c_str());
2518 if (!realpath.has_value()) {
2519 std::string err_msg = "ReadImage: Invalid file path, " + filename + " does not exist.";
2520 RETURN_STATUS_UNEXPECTED(err_msg);
2521 }
2522 if (!Path(realpath.value()).IsFile()) {
2523 RETURN_STATUS_UNEXPECTED("ReadImage: Invalid file path, " + filename + " is not a regular file.");
2524 }
2525
2526 cv::Mat image;
2527 int cv_mode = static_cast<int>(mode) - 1;
2528 image = cv::imread(realpath.value(), cv_mode);
2529 if (image.data == nullptr) {
2530 RETURN_STATUS_UNEXPECTED("ReadImage: Failed to read file " + filename);
2531 }
2532
2533 std::shared_ptr<CVTensor> output_cv;
2534 if (mode == ImageReadMode::kCOLOR || image.channels() > 1) {
2535 cv::Mat image_rgb;
2536 cv::cvtColor(image, image_rgb, cv::COLOR_BGRA2RGB);
2537 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(image_rgb, kDefaultImageRank, &output_cv));
2538 } else {
2539 RETURN_IF_NOT_OK(CVTensor::CreateFromMat(image, kDefaultImageRank, &output_cv));
2540 }
2541 *output = std::static_pointer_cast<Tensor>(output_cv);
2542
2543 return Status::OK();
2544 }
2545
WriteFile(const std::string & filename,const std::shared_ptr<Tensor> & data)2546 Status WriteFile(const std::string &filename, const std::shared_ptr<Tensor> &data) {
2547 std::string err_msg;
2548
2549 if (data->type() != DataType::DE_UINT8) {
2550 err_msg = "WriteFile: The type of the elements of data should be UINT8, but got " + data->type().ToString() + ".";
2551 RETURN_STATUS_UNEXPECTED(err_msg);
2552 }
2553
2554 long int data_size = data->Size();
2555 const char *data_buffer;
2556 if (data_size >= kDeMaxDim || data_size < 0) {
2557 err_msg = "WriteFile: Invalid data->Size() , should be >= 0 && < " + std::to_string(kDeMaxDim);
2558 err_msg += " , but got " + std::to_string(data_size) + " for " + filename;
2559 RETURN_STATUS_UNEXPECTED(err_msg);
2560 }
2561 if (data_size > 0) {
2562 if (data->type() == DataType::DE_BYTES) {
2563 data_buffer = (const char *)data->GetStringsBuffer();
2564 } else {
2565 data_buffer = (const char *)data->GetMutableBuffer();
2566 }
2567 if (data_buffer == nullptr) {
2568 err_msg = "WriteFile: Invalid data->GetBufferSize() , should not be nullptr.";
2569 RETURN_STATUS_UNEXPECTED(err_msg);
2570 }
2571 TensorShape shape = data->shape();
2572 int rank = static_cast<int>(shape.Rank());
2573 if (rank != kMinImageChannel) {
2574 err_msg = "WriteFile: The data has invalid dimensions. It should have only one dimension, but got ";
2575 err_msg += std::to_string(rank) + " dimensions.";
2576 RETURN_STATUS_UNEXPECTED(err_msg);
2577 }
2578 }
2579
2580 Path file(filename);
2581 if (!file.Exists()) {
2582 int file_descriptor;
2583 RETURN_IF_NOT_OK(file.CreateFile(&file_descriptor));
2584 RETURN_IF_NOT_OK(file.CloseFile(file_descriptor));
2585 }
2586 auto realpath = FileUtils::GetRealPath(filename.c_str());
2587 if (!realpath.has_value()) {
2588 RETURN_STATUS_UNEXPECTED("WriteFile: Invalid file path, " + filename + " failed to get the real path.");
2589 }
2590 if (!Path(realpath.value()).IsFile()) {
2591 RETURN_STATUS_UNEXPECTED("WriteFile: Invalid file path, " + filename + " is not a regular file.");
2592 }
2593
2594 std::ofstream fs(realpath.value().c_str(), std::ios::out | std::ios::trunc | std::ios::binary);
2595 CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "WriteFile: Failed to open the file: " + filename + " for writing.");
2596
2597 if (data_size > 0) {
2598 fs.write(data_buffer, data_size);
2599 if (fs.fail()) {
2600 err_msg = "WriteFile: Failed to write the file " + filename;
2601 fs.close();
2602 RETURN_STATUS_UNEXPECTED(err_msg);
2603 }
2604 }
2605 fs.close();
2606 ChangeFileMode(realpath.value(), S_IRUSR | S_IWUSR);
2607 return Status::OK();
2608 }
2609
WriteJpeg(const std::string & filename,const std::shared_ptr<Tensor> & image,int quality)2610 Status WriteJpeg(const std::string &filename, const std::shared_ptr<Tensor> &image, int quality) {
2611 std::string err_msg;
2612
2613 if (image->type() != DataType::DE_UINT8) {
2614 err_msg = "WriteJpeg: The type of the elements of image should be UINT8, but got " + image->type().ToString() + ".";
2615 RETURN_STATUS_UNEXPECTED(err_msg);
2616 }
2617 TensorShape shape = image->shape();
2618 int rank = static_cast<int>(shape.Rank());
2619 if (rank < kMinImageRank || rank > kDefaultImageRank) {
2620 err_msg = "WriteJpeg: The image has invalid dimensions. It should have two or three dimensions, but got ";
2621 err_msg += std::to_string(rank) + " dimensions.";
2622 RETURN_STATUS_UNEXPECTED(err_msg);
2623 }
2624 int channels;
2625 if (rank == kDefaultImageRank) {
2626 channels = static_cast<int>(shape[kMinImageRank]);
2627 if (channels != kMinImageChannel && channels != kDefaultImageChannel) {
2628 err_msg = "WriteJpeg: The image has invalid channels. It should have 1 or 3 channels, but got ";
2629 err_msg += std::to_string(channels) + " channels.";
2630 RETURN_STATUS_UNEXPECTED(err_msg);
2631 }
2632 } else {
2633 channels = 1;
2634 }
2635
2636 if (quality < kMinJpegQuality || quality > kMaxJpegQuality) {
2637 err_msg = "WriteJpeg: Invalid quality " + std::to_string(quality) + ", should be in range of [" +
2638 std::to_string(kMinJpegQuality) + ", " + std::to_string(kMaxJpegQuality) + "].";
2639 RETURN_STATUS_UNEXPECTED(err_msg);
2640 }
2641
2642 std::vector<int> params = {cv::IMWRITE_JPEG_QUALITY, quality, cv::IMWRITE_JPEG_PROGRESSIVE, 0,
2643 cv::IMWRITE_JPEG_OPTIMIZE, 0, cv::IMWRITE_JPEG_RST_INTERVAL, 0};
2644
2645 std::vector<unsigned char> buffer;
2646 cv::Mat image_matrix;
2647
2648 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(image);
2649 image_matrix = input_cv->mat();
2650 if (!image_matrix.data) {
2651 RETURN_STATUS_UNEXPECTED("WriteJpeg: Load the image tensor failed.");
2652 }
2653
2654 if (channels == kMinImageChannel) {
2655 CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".JPEG", image_matrix, buffer, params),
2656 "WriteJpeg: Failed to encode image.");
2657 } else {
2658 cv::Mat image_bgr;
2659 cv::cvtColor(image_matrix, image_bgr, cv::COLOR_RGB2BGR);
2660 CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".JPEG", image_bgr, buffer, params),
2661 "WriteJpeg: Failed to encode image.");
2662 }
2663
2664 Path file(filename);
2665 if (!file.Exists()) {
2666 int file_descriptor;
2667 RETURN_IF_NOT_OK(file.CreateFile(&file_descriptor));
2668 RETURN_IF_NOT_OK(file.CloseFile(file_descriptor));
2669 }
2670 auto realpath = FileUtils::GetRealPath(filename.c_str());
2671 if (!realpath.has_value()) {
2672 RETURN_STATUS_UNEXPECTED("WriteJpeg: Invalid file path, " + filename + " failed to get the real path.");
2673 }
2674 if (!Path(realpath.value()).IsFile()) {
2675 RETURN_STATUS_UNEXPECTED("WriteJpeg: Invalid file path, " + filename + " is not a regular file.");
2676 }
2677
2678 std::ofstream fs(realpath.value().c_str(), std::ios::out | std::ios::trunc | std::ios::binary);
2679 CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "WriteJpeg: Failed to open the file " + filename + " for writing.");
2680
2681 fs.write((const char *)buffer.data(), (long int)buffer.size());
2682 if (fs.fail()) {
2683 fs.close();
2684 RETURN_STATUS_UNEXPECTED("WriteJpeg: Failed to write the file " + filename);
2685 }
2686 fs.close();
2687 ChangeFileMode(realpath.value(), S_IRUSR | S_IWUSR);
2688 return Status::OK();
2689 }
2690
WritePng(const std::string & filename,const std::shared_ptr<Tensor> & image,int compression_level)2691 Status WritePng(const std::string &filename, const std::shared_ptr<Tensor> &image, int compression_level) {
2692 std::string err_msg;
2693
2694 if (image->type() != DataType::DE_UINT8) {
2695 err_msg = "WritePng: The type of the elements of image should be UINT8, but got " + image->type().ToString() + ".";
2696 RETURN_STATUS_UNEXPECTED(err_msg);
2697 }
2698 TensorShape shape = image->shape();
2699 int rank = static_cast<int>(shape.Rank());
2700 if (rank < kMinImageRank || rank > kDefaultImageRank) {
2701 err_msg = "WritePng: The image has invalid dimensions. It should have two or three dimensions, but got ";
2702 err_msg += std::to_string(rank) + " dimensions.";
2703 RETURN_STATUS_UNEXPECTED(err_msg);
2704 }
2705 int channels;
2706 if (rank == kDefaultImageRank) {
2707 channels = static_cast<int>(shape[kMinImageRank]);
2708 if (channels != kMinImageChannel && channels != kDefaultImageChannel) {
2709 err_msg = "WritePng: The image has invalid channels. It should have 1 or 3 channels, but got ";
2710 err_msg += std::to_string(channels) + " channels.";
2711 RETURN_STATUS_UNEXPECTED(err_msg);
2712 }
2713 } else {
2714 channels = 1;
2715 }
2716
2717 if (compression_level < kMinPngCompression || compression_level > kMaxPngCompression) {
2718 err_msg = "WritePng: Invalid compression_level " + std::to_string(compression_level) + ", should be in range of [" +
2719 std::to_string(kMinPngCompression) + ", " + std::to_string(kMaxPngCompression) + "].";
2720 RETURN_STATUS_UNEXPECTED(err_msg);
2721 }
2722
2723 std::vector<int> params = {cv::IMWRITE_PNG_COMPRESSION, compression_level, cv::IMWRITE_PNG_STRATEGY,
2724 cv::IMWRITE_PNG_STRATEGY_RLE};
2725 std::vector<unsigned char> buffer;
2726 cv::Mat image_matrix;
2727
2728 std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(image);
2729 image_matrix = input_cv->mat();
2730 if (!image_matrix.data) {
2731 RETURN_STATUS_UNEXPECTED("WritePng: Load the image tensor failed.");
2732 }
2733
2734 if (channels == kMinImageChannel) {
2735 CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".PNG", image_matrix, buffer, params),
2736 "WritePng: Failed to encode image.");
2737 } else {
2738 cv::Mat image_bgr;
2739 cv::cvtColor(image_matrix, image_bgr, cv::COLOR_RGB2BGR);
2740 CHECK_FAIL_RETURN_UNEXPECTED(cv::imencode(".PNG", image_bgr, buffer, params), "WritePng: Failed to encode image.");
2741 }
2742
2743 Path file(filename);
2744 if (!file.Exists()) {
2745 int file_descriptor;
2746 RETURN_IF_NOT_OK(file.CreateFile(&file_descriptor));
2747 RETURN_IF_NOT_OK(file.CloseFile(file_descriptor));
2748 }
2749 auto realpath = FileUtils::GetRealPath(filename.c_str());
2750 if (!realpath.has_value()) {
2751 RETURN_STATUS_UNEXPECTED("WritePng: Invalid file path, " + filename + " failed to get the real path.");
2752 }
2753 struct stat sb {};
2754 stat(realpath.value().c_str(), &sb);
2755 if (S_ISREG(sb.st_mode) == 0) {
2756 RETURN_STATUS_UNEXPECTED("WritePng: Invalid file path, " + filename + " is not a regular file.");
2757 }
2758
2759 std::ofstream fs(realpath.value().c_str(), std::ios::out | std::ios::trunc | std::ios::binary);
2760 CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "WritePng: Failed to open the file " + filename + " for writing.");
2761
2762 fs.write((const char *)buffer.data(), (long int)buffer.size());
2763 if (fs.fail()) {
2764 fs.close();
2765 RETURN_STATUS_UNEXPECTED("WritePng: Failed to write the file " + filename);
2766 }
2767 fs.close();
2768 ChangeFileMode(realpath.value(), S_IRUSR | S_IWUSR);
2769 return Status::OK();
2770 }
2771
2772 // support list
2773 const unsigned char kBmpMagic[] = "\x42\x4D";
2774 constexpr dsize_t kBmpMagicLen = 2;
2775 const unsigned char kTiffMagic1[] = "\x4D\x4D";
2776 const unsigned char kTiffMagic2[] = "\x49\x49";
2777 constexpr dsize_t kTiffMagicLen = 2;
2778
DumpImageAndAppendStatus(const std::shared_ptr<Tensor> & image,const Status & status)2779 Status DumpImageAndAppendStatus(const std::shared_ptr<Tensor> &image, const Status &status) {
2780 Status local_status = status;
2781 std::string file_name = "./abnormal_image.";
2782 std::string file_suffix;
2783 std::string error_info = local_status.GetErrDescription();
2784
2785 uint32_t image_length = 0;
2786 uchar *image_ptr = nullptr;
2787 if (image->type() == DataType::DE_BYTES) {
2788 RETURN_IF_NOT_OK(image->GetStringLength(&image_length));
2789 image_ptr = image->GetStringsBuffer();
2790 } else {
2791 image_length = image->SizeInBytes();
2792 image_ptr = image->GetMutableBuffer();
2793 }
2794
2795 if (image_length == 0) {
2796 return local_status;
2797 }
2798
2799 if (memcmp(image_ptr, kJpegMagic, kJpegMagicLen) == 0) { // support
2800 file_suffix = "jpg";
2801 } else if (memcmp(image_ptr, kPngMagic, kPngMagicLen) == 0) { // support
2802 file_suffix = "png";
2803 } else if (memcmp(image_ptr, kBmpMagic, kBmpMagicLen) == 0) { // support
2804 file_suffix = "bmp";
2805 } else if (memcmp(image_ptr, kTiffMagic1, kTiffMagicLen) == 0 || // support
2806 memcmp(image_ptr, kTiffMagic2, kTiffMagicLen) == 0) {
2807 file_suffix = "tif";
2808 } else {
2809 file_suffix = "exception";
2810 error_info += " Unknown image type.";
2811 }
2812
2813 auto ret = WriteFile(file_name + file_suffix, image);
2814 if (ret == Status::OK()) {
2815 error_info += " Dump the abnormal image to [" + (file_name + file_suffix) +
2816 "]. You can check this image first through the image viewer. If you find that " +
2817 "the image is abnormal, delete it from the dataset and re-run.";
2818 }
2819 local_status.SetErrDescription(error_info);
2820 return local_status;
2821 }
2822
2823 // unsupported list
2824 const unsigned char kGifMagic[] = "\x47\x49\x46";
2825 constexpr dsize_t kGifMagicLen = 3;
2826 const unsigned char kWebpMagic[] = "\x00\x57\x45\x42";
2827 constexpr dsize_t kWebpMagicLen = 4;
2828
CheckUnsupportedImage(const std::shared_ptr<Tensor> & image)2829 Status CheckUnsupportedImage(const std::shared_ptr<Tensor> &image) {
2830 bool unsupport_flag = false;
2831
2832 std::string file_name = "./unsupported_image.";
2833 std::string file_suffix;
2834 if (image->SizeInBytes() == 0) {
2835 RETURN_STATUS_UNEXPECTED("Image file size is 0.");
2836 }
2837
2838 uchar *image_ptr = nullptr;
2839 if (image->type() == DataType::DE_BYTES) {
2840 image_ptr = image->GetStringsBuffer();
2841 } else {
2842 image_ptr = image->GetMutableBuffer();
2843 }
2844
2845 if (memcmp(image_ptr, kGifMagic, kGifMagicLen) == 0) { // unsupported
2846 file_suffix = "gif";
2847 unsupport_flag = true;
2848 } else if (memcmp(image_ptr + 7, kWebpMagic, kWebpMagicLen) == 0) { // unsupported: skip the 7 bytes
2849 file_suffix = "webp";
2850 unsupport_flag = true;
2851 }
2852
2853 if (unsupport_flag) {
2854 auto ret = WriteFile(file_name + file_suffix, image);
2855 if (ret == Status::OK()) {
2856 RETURN_STATUS_UNEXPECTED("Unsupported image type [" + file_suffix + "] and dump the image to [" +
2857 (file_name + file_suffix) + "]. Please delete it from the dataset and re-run.");
2858 } else {
2859 ret.SetErrDescription("Unsupported image type [" + file_suffix + "], but dump the image failed. " +
2860 "Error info: " + ret.GetErrDescription());
2861 return ret;
2862 }
2863 }
2864 return Status::OK();
2865 }
2866 } // namespace dataset
2867 } // namespace mindspore
2868