• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_
18 
19 #include <setjmp.h>
20 
21 #include <memory>
22 #include <random>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 #if defined(_WIN32) || defined(_WIN64)
27 #undef HAVE_STDDEF_H
28 #undef HAVE_STDLIB_H
29 #elif __APPLE__
30 #include <sys/param.h>
31 #include <sys/mount.h>
32 #endif
33 #include "./jpeglib.h"
34 #include "./jerror.h"
35 #include <opencv2/imgproc/imgproc.hpp>
36 #include "minddata/dataset/core/tensor.h"
37 #include "minddata/dataset/kernels/tensor_op.h"
38 #include "minddata/dataset/util/status.h"
39 
40 #define CHANNEL_INDEX 2           // images are hwc, so index 2 represents number of channels
41 #define DEFAULT_IMAGE_CHANNELS 3  // images are 3 channels in general
42 #define DEFAULT_IMAGE_RANK 3      // images are hwc channels in general
43 #define MAX_BIT_VALUE 255         // max bit value after decode is 256
44 #define MIN_IMAGE_CHANNELS 1      // image ops support minimum of 1 channel
45 #define MAX_IMAGE_CHANNELS 4      // image ops support maximum of 4 channel
46 #define MIN_IMAGE_DIMENSION 2     // images are at least 2 dimensional
47 namespace mindspore {
48 namespace dataset {
49 void JpegErrorExitCustom(j_common_ptr cinfo);
50 
51 struct JpegErrorManagerCustom {
52   // "public" fields
53   struct jpeg_error_mgr pub;
54   // for return to caller
55   jmp_buf setjmp_buffer;
56 };
57 
58 /// \brief Returns the interpolation mode in openCV format
59 /// \param[in] mode Interpolation mode in DE format
60 int GetCVInterpolationMode(InterpolationMode mode);
61 
62 /// \brief Returns the openCV equivalent of the border type used for padding.
63 /// \param type
64 /// \return Status code
65 int GetCVBorderType(BorderType type);
66 
67 /// \brief Returns the check result of tensor rank and tensor shape
68 /// \param[in] tensor: The input tensor need to check
69 /// \param[in] channel: The channel index of tensor shape.
70 /// \param[out] return true if channel of tensor shape is 1 or 3.
71 bool CheckTensorShape(const std::shared_ptr<Tensor> &tensor, const int &channel);
72 
73 /// \brief Returns flipped image
74 /// \param[in] input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
75 /// \param flip_code: 1 for Horizontal (around y-axis), 0 for Vertical (around x-axis), -1 for both
76 ///     The flipping happens in place.
77 Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code);
78 
79 /// \brief Returns Horizontally flipped image
80 /// \param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
81 /// The flipping happens in place.
82 Status HorizontalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
83 
84 /// \brief Returns Vertically flipped image
85 /// \param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
86 /// \note The flipping happens in place.
87 Status VerticalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
88 
89 /// \brief  Returns Resized image.
90 /// \param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
91 /// \param output_height: height of output
92 /// \param output_width: width of output
93 /// \param fx: horizontal scale
94 /// \param fy: vertical scale
95 /// \param InterpolationMode: the interpolation mode
96 /// \param output: Resized image of shape <outputHeight,outputWidth,C> or <outputHeight,outputWidth>
97 ///                and same type as input
98 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height,
99               int32_t output_width, double fx = 0.0, double fy = 0.0,
100               InterpolationMode mode = InterpolationMode::kLinear);
101 
102 /// \brief Returns Decoded image
103 /// Supported images:
104 ///  BMP JPEG JPG PNG TIFF
105 /// supported by opencv, if user need more image analysis capabilities, please compile opencv particularlly.
106 /// \param input: CVTensor containing the not decoded image 1D bytes
107 /// \param output: Decoded image Tensor of shape <H,W,C> and type DE_UINT8. Pixel order is RGB
108 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
109 
110 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
111 
112 bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input);
113 
114 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input);
115 
116 void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size);
117 
118 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x = 0, int y = 0,
119                          int w = 0, int h = 0);
120 
121 /// \brief Returns Rescaled image
122 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
123 /// \param rescale: rescale parameter
124 /// \param shift: shift parameter
125 /// \param output: Rescaled image Tensor of same input shape and type DE_FLOAT32
126 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift);
127 
128 /// \brief Returns cropped ROI of an image
129 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
130 /// \param x: starting horizontal position of ROI
131 /// \param y: starting vertical position of ROI
132 /// \param w: width of the ROI
133 /// \param h: height of the ROI
134 /// \param output: Cropped image Tensor of shape <h,w,C> or <h,w> and same input type.
135 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h);
136 
137 /// \brief Change the color space of the image.
138 /// \param input: The input image.
139 /// \param output: The output image.
140 /// \param convert_mode: The mode of image channel conversion.
141 Status ConvertColor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, ConvertMode convert_mode);
142 
143 /// \brief Swaps the channels in the image, i.e. converts HWC to CHW
144 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
145 /// \param output: Tensor of shape <C,H,W> or <H,W> and same input type.
146 Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
147 
148 /// \brief Masks the given part of the input image with a another image (sub_mat)
149 /// \param[in] sub_mat The image we want to mask with
150 /// \param[in] input The pointer to the image we want to mask
151 /// \param[in] x The horizontal coordinate of left side of crop box
152 /// \param[in] y The vertical coordinate of the top side of crop box
153 /// \param[in] width The width of the mask box
154 /// \param[in] height The height of the mask box
155 /// \param[in] image_format The format of the image (CHW or HWC)
156 /// \param[out] input Masks the input image in-place and returns it
157 /// @return Status ok/error
158 Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Tensor> *input, int x, int y, int width,
159                       int height, ImageFormat image_format);
160 
161 /// \brief Copies a value from a source tensor into a destination tensor
162 /// \note This is meant for images and therefore only works if tensor is uint8 or float32
163 /// \param[in] source_tensor The tensor we take the value from
164 /// \param[in] dest_tensor The pointer to the tensor we want to copy the value to
165 /// \param[in] source_indx index of the value in the source tensor
166 /// \param[in] dest_indx index of the value in the destination tensor
167 /// \param[out] dest_tensor Copies the value to the given dest_tensor and returns it
168 /// @return Status ok/error
169 Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor,
170                        const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx);
171 
172 /// \brief Swap the red and blue pixels (RGB <-> BGR)
173 /// \param input: Tensor of shape <H,W,3> and any OpenCv compatible type, see CVTensor.
174 /// \param output: Swapped image of same shape and type
175 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
176 
177 /// \brief Crops and resizes the image
178 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
179 /// \param x: horizontal start point
180 /// \param y: vertical start point
181 /// \param crop_height: height of the cropped ROI
182 /// \param crop_width: width of the cropped ROI
183 /// \param target_width: width of the final resized image
184 /// \param target_height: height of the final resized image
185 /// \param InterpolationMode: the interpolation used in resize operation
186 /// \param output: Tensor of shape <targetHeight,targetWidth,C> or <targetHeight,targetWidth>
187 ///     and same type as input
188 Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y,
189                      int crop_height, int crop_width, int target_height, int target_width, InterpolationMode mode);
190 
191 /// \brief Returns rotated image
192 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
193 /// \param center: rotation center
194 /// \param degree: degree to rotate
195 /// \param expand: if reshape is necessary
196 /// \param output: rotated image of same input type.
197 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> center,
198               float degree, InterpolationMode interpolation = InterpolationMode::kNearestNeighbour, bool expand = false,
199               uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0);
200 
201 /// \brief Returns Normalized image
202 /// \param input: Tensor of shape <H,W,C> in RGB order and any OpenCv compatible type, see CVTensor.
203 /// \param mean: Tensor of shape <3> and type DE_FLOAT32 which are mean of each channel in RGB order
204 /// \param std:  Tensor of shape <3> and type DE_FLOAT32 which are std of each channel in RGB order
205 /// \param output: Normalized image Tensor of same input shape and type DE_FLOAT32
206 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean,
207                  std::vector<float> std);
208 
209 /// \brief Returns Normalized and paded image
210 /// \param input: Tensor of shape <H,W,C> in RGB order and any OpenCv compatible type, see CVTensor.
211 /// \param mean: Tensor of shape <3> and type DE_FLOAT32 which are mean of each channel in RGB order
212 /// \param std:  Tensor of shape <3> and type DE_FLOAT32 which are std of each channel in RGB order
213 /// \param dtype: output dtype
214 /// \param output: Normalized image Tensor and pad an extra channel, return a dtype Tensor
215 Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
216                     const std::shared_ptr<Tensor> &mean, const std::shared_ptr<Tensor> &std, const std::string &dtype);
217 
218 /// \brief Returns image with adjusted brightness.
219 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
220 /// \param alpha: Alpha value to adjust brightness by. Should be a positive number.
221 ///               If user input one value in python, the range is [1 - value, 1 + value].
222 ///               This will output original image multiplied by alpha. 0 gives a black image, 1 gives the
223 ///               original image while 2 increases the brightness by a factor of 2.
224 /// \param output: Adjusted image of same shape and type.
225 Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha);
226 
227 /// \brief Returns image with adjusted contrast.
228 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
229 /// \param alpha: Alpha value to adjust contrast by. Should be a positive number.
230 ///               If user input one value in python, the range is [1 - value, 1 + value].
231 ///               0 gives a solid gray image, 1 gives the original image while 2 increases
232 ///               the contrast by a factor of 2.
233 /// \param output: Adjusted image of same shape and type.
234 Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha);
235 
236 /// \brief Returns image with contrast maximized.
237 /// \param input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and any OpenCv compatible type, see CVTensor.
238 /// \param cutoff: Cutoff percentage of how many pixels are to be removed (high pixels change to 255 and low change
239 ///     to 0) from the high and low ends of the histogram.
240 /// \param ignore: Pixel values to be ignored in the algorithm.
241 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &cutoff,
242                     const std::vector<uint32_t> &ignore);
243 
244 /// \brief Returns image with gamma correction.
245 /// \param[in] input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and any OpenCV compatible type,
246 ///     see CVTensor.
247 /// \param[in] gamma: Non negative real number, same as gamma in the equation. gamma larger than 1 make the shadows
248 ///     darker, while gamma smaller than 1 make dark regions lighter.
249 /// \param[in] gain: The constant multiplier.
250 /// \param[out] output: Adjusted image of same shape and type.
251 Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &gamma,
252                    const float &gain);
253 
254 /// \brief Returns image with adjusted saturation.
255 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
256 /// \param alpha: Alpha value to adjust saturation by. Should be a positive number.
257 ///               If user input one value in python, the range is [1 - value, 1 + value].
258 ///               0 will give a black and white image, 1 will give the original image while
259 ///               2 will enhance the saturation by a factor of 2.
260 /// \param output: Adjusted image of same shape and type.
261 Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha);
262 
263 /// \brief Returns image with adjusted hue.
264 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
265 /// \param hue: Hue value to adjust by, should be within range [-0.5, 0.5]. 0.5 and - 0.5 will reverse the hue channel
266 ///             completely.
267 ///             If user input one value in python, the range is [-value, value].
268 /// \param output: Adjusted image of same shape and type.
269 Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &hue);
270 
271 /// \brief Returns image with equalized histogram.
272 /// \param[in] input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and
273 ///                   any OpenCv compatible type, see CVTensor.
274 /// \param[out] output: Equalized image of same shape and type.
275 Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
276 
277 /// \brief Masks out a random section from the image with set dimension
278 /// \param input: input Tensor
279 /// \param output: cutOut Tensor
280 /// \param box_height: height of the cropped box
281 /// \param box_width: width of the cropped box
282 /// \param num_patches: number of boxes to cut out from the image
283 /// \param bounded: boolean flag to toggle between random erasing and cutout
284 /// \param random_color: whether or not random fill value should be used
285 /// \param fill_r: red fill value for erase
286 /// \param fill_g: green fill value for erase
287 /// \param fill_b: blue fill value for erase.
288 Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t box_height,
289              int32_t box_width, int32_t num_patches, bool bounded, bool random_color, std::mt19937 *rnd,
290              uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0);
291 
292 /// \brief Pads the input image and puts the padded image in the output
293 /// \param input: input Tensor
294 /// \param output: padded Tensor
295 /// \param pad_top: amount of padding done in top
296 /// \param pad_bottom: amount of padding done in bottom
297 /// \param pad_left: amount of padding done in left
298 /// \param pad_right: amount of padding done in right
299 /// \param border_types: the interpolation to be done in the border
300 /// \param fill_r: red fill value for pad
301 /// \param fill_g: green fill value for pad
302 /// \param fill_b: blue fill value for pad.
303 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
304            const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
305            uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0);
306 
307 /// \brief Take in a 4 channel image in RBGA to RGB
308 /// \param[in] input The input image
309 /// \param[out] output The output image
310 /// \return Status code
311 Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
312 
313 /// \brief Take in a 4 channel image in RBGA to BGR
314 /// \param[in] input The input image
315 /// \param[out] output The output image
316 /// \return Status code
317 Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
318 
319 /// \brief Take in a 3 channel image in RBG to BGR
320 /// \param[in] input The input image
321 /// \param[out] output The output image
322 /// \return Status code
323 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
324 
325 /// \brief Take in a 3 channel image in RBG to GRAY
326 /// \param[in] input The input image
327 /// \param[out] output The output image
328 /// \return Status code
329 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
330 
331 /// \brief Get jpeg image width and height
332 /// \param input: CVTensor containing the not decoded image 1D bytes
333 /// \param img_width: the jpeg image width
334 /// \param img_height: the jpeg image height
335 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height);
336 
337 /// \brief Geometrically transform the input image
338 /// \param[in] input Input Tensor
339 /// \param[out] output Transformed Tensor
340 /// \param[in] mat The transformation matrix
341 /// \param[in] interpolation The interpolation mode
342 /// \param[in] fill_r Red fill value for pad
343 /// \param[in] fill_g Green fill value for pad
344 /// \param[in] fill_b Blue fill value for pad
345 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::vector<float_t> &mat,
346               InterpolationMode interpolation, uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0);
347 
348 /// \brief Filter the input image with a Gaussian kernel
349 /// \param[in] input Input Tensor
350 /// \param[out] output Transformed Tensor
351 /// \param[in] kernel_size_x Gaussian kernel size of width
352 /// \param[in] kernel_size_y Gaussian kernel size of height
353 /// \param[in] sigma_x Gaussian kernel standard deviation of width
354 /// \param[in] sigma_y Gaussian kernel standard deviation of height
355 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_size_x,
356                     int32_t kernel_size_y, float sigma_x, float sigma_y);
357 
358 /// \brief Slice tensor to multiple patches.
359 /// \param[in] input Input Tensor
360 /// \param[out] output Vector of Output Tensor
361 /// \param[in] num_height Number of patches in vertical direction.
362 /// \param[in] num_width Number of patches in horizontal direction.
363 /// \param[in] slice_mode Mode represents padding or drop.
364 /// \param[in] fill_value The value of filled pixel in right and bottom border when padding.
365 Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::shared_ptr<Tensor>> *output,
366                     int32_t num_height, int32_t num_width, SliceMode slice_mode, uint8_t fill_value);
367 
368 /// \brief Compute patch height and width.
369 /// \param[in] input Input CVTensor
370 /// \param[out] patch_size Size of patch
371 /// \param[in] num_height Number of patches in vertical direction.
372 /// \param[in] num_width Number of patches in horizontal direction.
373 /// \param[in] slice_mode Mode represents padding or drop.
374 Status ComputePatchSize(const std::shared_ptr<CVTensor> &input_cv,
375                         std::shared_ptr<std::pair<int32_t, int32_t>> *patch_size, int32_t num_height, int32_t num_width,
376                         SliceMode slice_mode);
377 
378 /// \brief Validate image rank.
379 /// \param[in] op_name operator name.
380 /// \param[in] rank refers to the rank of input image shape.
381 Status ValidateImageRank(const std::string &op_name, int32_t rank);
382 }  // namespace dataset
383 }  // namespace mindspore
384 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_
385