1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_ 18 19 #include <setjmp.h> 20 21 #include <memory> 22 #include <random> 23 #include <string> 24 #include <utility> 25 #include <vector> 26 #if defined(_WIN32) || defined(_WIN64) 27 #undef HAVE_STDDEF_H 28 #undef HAVE_STDLIB_H 29 #elif __APPLE__ 30 #include <sys/param.h> 31 #include <sys/mount.h> 32 #endif 33 #include "./jpeglib.h" 34 #include "./jerror.h" 35 #include <opencv2/imgproc/imgproc.hpp> 36 #include "minddata/dataset/core/tensor.h" 37 #include "minddata/dataset/kernels/tensor_op.h" 38 #include "minddata/dataset/util/status.h" 39 40 #define CHANNEL_INDEX 2 // images are hwc, so index 2 represents number of channels 41 #define DEFAULT_IMAGE_CHANNELS 3 // images are 3 channels in general 42 #define DEFAULT_IMAGE_RANK 3 // images are hwc channels in general 43 #define MAX_BIT_VALUE 255 // max bit value after decode is 256 44 #define MIN_IMAGE_CHANNELS 1 // image ops support minimum of 1 channel 45 #define MAX_IMAGE_CHANNELS 4 // image ops support maximum of 4 channel 46 #define MIN_IMAGE_DIMENSION 2 // images are at least 2 dimensional 47 namespace mindspore { 48 namespace dataset { 49 void JpegErrorExitCustom(j_common_ptr cinfo); 50 51 struct JpegErrorManagerCustom { 52 // "public" fields 53 struct jpeg_error_mgr pub; 54 // for return to caller 55 jmp_buf setjmp_buffer; 56 }; 57 58 /// \brief Returns the interpolation mode in openCV format 59 /// \param[in] mode Interpolation mode in DE format 60 int GetCVInterpolationMode(InterpolationMode mode); 61 62 /// \brief Returns the openCV equivalent of the border type used for padding. 63 /// \param type 64 /// \return Status code 65 int GetCVBorderType(BorderType type); 66 67 /// \brief Returns the check result of tensor rank and tensor shape 68 /// \param[in] tensor: The input tensor need to check 69 /// \param[in] channel: The channel index of tensor shape. 70 /// \param[out] return true if channel of tensor shape is 1 or 3. 71 bool CheckTensorShape(const std::shared_ptr<Tensor> &tensor, const int &channel); 72 73 /// \brief Returns flipped image 74 /// \param[in] input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 75 /// \param flip_code: 1 for Horizontal (around y-axis), 0 for Vertical (around x-axis), -1 for both 76 /// The flipping happens in place. 77 Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code); 78 79 /// \brief Returns Horizontally flipped image 80 /// \param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 81 /// The flipping happens in place. 82 Status HorizontalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output); 83 84 /// \brief Returns Vertically flipped image 85 /// \param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 86 /// \note The flipping happens in place. 87 Status VerticalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output); 88 89 /// \brief Returns Resized image. 90 /// \param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 91 /// \param output_height: height of output 92 /// \param output_width: width of output 93 /// \param fx: horizontal scale 94 /// \param fy: vertical scale 95 /// \param InterpolationMode: the interpolation mode 96 /// \param output: Resized image of shape <outputHeight,outputWidth,C> or <outputHeight,outputWidth> 97 /// and same type as input 98 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height, 99 int32_t output_width, double fx = 0.0, double fy = 0.0, 100 InterpolationMode mode = InterpolationMode::kLinear); 101 102 /// \brief Returns Decoded image 103 /// Supported images: 104 /// BMP JPEG JPG PNG TIFF 105 /// supported by opencv, if user need more image analysis capabilities, please compile opencv particularlly. 106 /// \param input: CVTensor containing the not decoded image 1D bytes 107 /// \param output: Decoded image Tensor of shape <H,W,C> and type DE_UINT8. Pixel order is RGB 108 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 109 110 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 111 112 bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input); 113 114 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input); 115 116 void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size); 117 118 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x = 0, int y = 0, 119 int w = 0, int h = 0); 120 121 /// \brief Returns Rescaled image 122 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 123 /// \param rescale: rescale parameter 124 /// \param shift: shift parameter 125 /// \param output: Rescaled image Tensor of same input shape and type DE_FLOAT32 126 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift); 127 128 /// \brief Returns cropped ROI of an image 129 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 130 /// \param x: starting horizontal position of ROI 131 /// \param y: starting vertical position of ROI 132 /// \param w: width of the ROI 133 /// \param h: height of the ROI 134 /// \param output: Cropped image Tensor of shape <h,w,C> or <h,w> and same input type. 135 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h); 136 137 /// \brief Change the color space of the image. 138 /// \param input: The input image. 139 /// \param output: The output image. 140 /// \param convert_mode: The mode of image channel conversion. 141 Status ConvertColor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, ConvertMode convert_mode); 142 143 /// \brief Swaps the channels in the image, i.e. converts HWC to CHW 144 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 145 /// \param output: Tensor of shape <C,H,W> or <H,W> and same input type. 146 Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output); 147 148 /// \brief Masks the given part of the input image with a another image (sub_mat) 149 /// \param[in] sub_mat The image we want to mask with 150 /// \param[in] input The pointer to the image we want to mask 151 /// \param[in] x The horizontal coordinate of left side of crop box 152 /// \param[in] y The vertical coordinate of the top side of crop box 153 /// \param[in] width The width of the mask box 154 /// \param[in] height The height of the mask box 155 /// \param[in] image_format The format of the image (CHW or HWC) 156 /// \param[out] input Masks the input image in-place and returns it 157 /// @return Status ok/error 158 Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Tensor> *input, int x, int y, int width, 159 int height, ImageFormat image_format); 160 161 /// \brief Copies a value from a source tensor into a destination tensor 162 /// \note This is meant for images and therefore only works if tensor is uint8 or float32 163 /// \param[in] source_tensor The tensor we take the value from 164 /// \param[in] dest_tensor The pointer to the tensor we want to copy the value to 165 /// \param[in] source_indx index of the value in the source tensor 166 /// \param[in] dest_indx index of the value in the destination tensor 167 /// \param[out] dest_tensor Copies the value to the given dest_tensor and returns it 168 /// @return Status ok/error 169 Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor, 170 const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx); 171 172 /// \brief Swap the red and blue pixels (RGB <-> BGR) 173 /// \param input: Tensor of shape <H,W,3> and any OpenCv compatible type, see CVTensor. 174 /// \param output: Swapped image of same shape and type 175 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output); 176 177 /// \brief Crops and resizes the image 178 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 179 /// \param x: horizontal start point 180 /// \param y: vertical start point 181 /// \param crop_height: height of the cropped ROI 182 /// \param crop_width: width of the cropped ROI 183 /// \param target_width: width of the final resized image 184 /// \param target_height: height of the final resized image 185 /// \param InterpolationMode: the interpolation used in resize operation 186 /// \param output: Tensor of shape <targetHeight,targetWidth,C> or <targetHeight,targetWidth> 187 /// and same type as input 188 Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, 189 int crop_height, int crop_width, int target_height, int target_width, InterpolationMode mode); 190 191 /// \brief Returns rotated image 192 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 193 /// \param center: rotation center 194 /// \param degree: degree to rotate 195 /// \param expand: if reshape is necessary 196 /// \param output: rotated image of same input type. 197 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> center, 198 float degree, InterpolationMode interpolation = InterpolationMode::kNearestNeighbour, bool expand = false, 199 uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); 200 201 /// \brief Returns Normalized image 202 /// \param input: Tensor of shape <H,W,C> in RGB order and any OpenCv compatible type, see CVTensor. 203 /// \param mean: Tensor of shape <3> and type DE_FLOAT32 which are mean of each channel in RGB order 204 /// \param std: Tensor of shape <3> and type DE_FLOAT32 which are std of each channel in RGB order 205 /// \param output: Normalized image Tensor of same input shape and type DE_FLOAT32 206 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean, 207 std::vector<float> std); 208 209 /// \brief Returns Normalized and paded image 210 /// \param input: Tensor of shape <H,W,C> in RGB order and any OpenCv compatible type, see CVTensor. 211 /// \param mean: Tensor of shape <3> and type DE_FLOAT32 which are mean of each channel in RGB order 212 /// \param std: Tensor of shape <3> and type DE_FLOAT32 which are std of each channel in RGB order 213 /// \param dtype: output dtype 214 /// \param output: Normalized image Tensor and pad an extra channel, return a dtype Tensor 215 Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, 216 const std::shared_ptr<Tensor> &mean, const std::shared_ptr<Tensor> &std, const std::string &dtype); 217 218 /// \brief Returns image with adjusted brightness. 219 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor. 220 /// \param alpha: Alpha value to adjust brightness by. Should be a positive number. 221 /// If user input one value in python, the range is [1 - value, 1 + value]. 222 /// This will output original image multiplied by alpha. 0 gives a black image, 1 gives the 223 /// original image while 2 increases the brightness by a factor of 2. 224 /// \param output: Adjusted image of same shape and type. 225 Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha); 226 227 /// \brief Returns image with adjusted contrast. 228 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor. 229 /// \param alpha: Alpha value to adjust contrast by. Should be a positive number. 230 /// If user input one value in python, the range is [1 - value, 1 + value]. 231 /// 0 gives a solid gray image, 1 gives the original image while 2 increases 232 /// the contrast by a factor of 2. 233 /// \param output: Adjusted image of same shape and type. 234 Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha); 235 236 /// \brief Returns image with contrast maximized. 237 /// \param input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and any OpenCv compatible type, see CVTensor. 238 /// \param cutoff: Cutoff percentage of how many pixels are to be removed (high pixels change to 255 and low change 239 /// to 0) from the high and low ends of the histogram. 240 /// \param ignore: Pixel values to be ignored in the algorithm. 241 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &cutoff, 242 const std::vector<uint32_t> &ignore); 243 244 /// \brief Returns image with gamma correction. 245 /// \param[in] input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and any OpenCV compatible type, 246 /// see CVTensor. 247 /// \param[in] gamma: Non negative real number, same as gamma in the equation. gamma larger than 1 make the shadows 248 /// darker, while gamma smaller than 1 make dark regions lighter. 249 /// \param[in] gain: The constant multiplier. 250 /// \param[out] output: Adjusted image of same shape and type. 251 Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &gamma, 252 const float &gain); 253 254 /// \brief Returns image with adjusted saturation. 255 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor. 256 /// \param alpha: Alpha value to adjust saturation by. Should be a positive number. 257 /// If user input one value in python, the range is [1 - value, 1 + value]. 258 /// 0 will give a black and white image, 1 will give the original image while 259 /// 2 will enhance the saturation by a factor of 2. 260 /// \param output: Adjusted image of same shape and type. 261 Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha); 262 263 /// \brief Returns image with adjusted hue. 264 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor. 265 /// \param hue: Hue value to adjust by, should be within range [-0.5, 0.5]. 0.5 and - 0.5 will reverse the hue channel 266 /// completely. 267 /// If user input one value in python, the range is [-value, value]. 268 /// \param output: Adjusted image of same shape and type. 269 Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &hue); 270 271 /// \brief Returns image with equalized histogram. 272 /// \param[in] input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and 273 /// any OpenCv compatible type, see CVTensor. 274 /// \param[out] output: Equalized image of same shape and type. 275 Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 276 277 /// \brief Masks out a random section from the image with set dimension 278 /// \param input: input Tensor 279 /// \param output: cutOut Tensor 280 /// \param box_height: height of the cropped box 281 /// \param box_width: width of the cropped box 282 /// \param num_patches: number of boxes to cut out from the image 283 /// \param bounded: boolean flag to toggle between random erasing and cutout 284 /// \param random_color: whether or not random fill value should be used 285 /// \param fill_r: red fill value for erase 286 /// \param fill_g: green fill value for erase 287 /// \param fill_b: blue fill value for erase. 288 Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t box_height, 289 int32_t box_width, int32_t num_patches, bool bounded, bool random_color, std::mt19937 *rnd, 290 uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); 291 292 /// \brief Pads the input image and puts the padded image in the output 293 /// \param input: input Tensor 294 /// \param output: padded Tensor 295 /// \param pad_top: amount of padding done in top 296 /// \param pad_bottom: amount of padding done in bottom 297 /// \param pad_left: amount of padding done in left 298 /// \param pad_right: amount of padding done in right 299 /// \param border_types: the interpolation to be done in the border 300 /// \param fill_r: red fill value for pad 301 /// \param fill_g: green fill value for pad 302 /// \param fill_b: blue fill value for pad. 303 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top, 304 const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types, 305 uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); 306 307 /// \brief Take in a 4 channel image in RBGA to RGB 308 /// \param[in] input The input image 309 /// \param[out] output The output image 310 /// \return Status code 311 Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 312 313 /// \brief Take in a 4 channel image in RBGA to BGR 314 /// \param[in] input The input image 315 /// \param[out] output The output image 316 /// \return Status code 317 Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 318 319 /// \brief Take in a 3 channel image in RBG to BGR 320 /// \param[in] input The input image 321 /// \param[out] output The output image 322 /// \return Status code 323 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 324 325 /// \brief Take in a 3 channel image in RBG to GRAY 326 /// \param[in] input The input image 327 /// \param[out] output The output image 328 /// \return Status code 329 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 330 331 /// \brief Get jpeg image width and height 332 /// \param input: CVTensor containing the not decoded image 1D bytes 333 /// \param img_width: the jpeg image width 334 /// \param img_height: the jpeg image height 335 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height); 336 337 /// \brief Geometrically transform the input image 338 /// \param[in] input Input Tensor 339 /// \param[out] output Transformed Tensor 340 /// \param[in] mat The transformation matrix 341 /// \param[in] interpolation The interpolation mode 342 /// \param[in] fill_r Red fill value for pad 343 /// \param[in] fill_g Green fill value for pad 344 /// \param[in] fill_b Blue fill value for pad 345 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::vector<float_t> &mat, 346 InterpolationMode interpolation, uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); 347 348 /// \brief Filter the input image with a Gaussian kernel 349 /// \param[in] input Input Tensor 350 /// \param[out] output Transformed Tensor 351 /// \param[in] kernel_size_x Gaussian kernel size of width 352 /// \param[in] kernel_size_y Gaussian kernel size of height 353 /// \param[in] sigma_x Gaussian kernel standard deviation of width 354 /// \param[in] sigma_y Gaussian kernel standard deviation of height 355 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_size_x, 356 int32_t kernel_size_y, float sigma_x, float sigma_y); 357 358 /// \brief Slice tensor to multiple patches. 359 /// \param[in] input Input Tensor 360 /// \param[out] output Vector of Output Tensor 361 /// \param[in] num_height Number of patches in vertical direction. 362 /// \param[in] num_width Number of patches in horizontal direction. 363 /// \param[in] slice_mode Mode represents padding or drop. 364 /// \param[in] fill_value The value of filled pixel in right and bottom border when padding. 365 Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::shared_ptr<Tensor>> *output, 366 int32_t num_height, int32_t num_width, SliceMode slice_mode, uint8_t fill_value); 367 368 /// \brief Compute patch height and width. 369 /// \param[in] input Input CVTensor 370 /// \param[out] patch_size Size of patch 371 /// \param[in] num_height Number of patches in vertical direction. 372 /// \param[in] num_width Number of patches in horizontal direction. 373 /// \param[in] slice_mode Mode represents padding or drop. 374 Status ComputePatchSize(const std::shared_ptr<CVTensor> &input_cv, 375 std::shared_ptr<std::pair<int32_t, int32_t>> *patch_size, int32_t num_height, int32_t num_width, 376 SliceMode slice_mode); 377 378 /// \brief Validate image rank. 379 /// \param[in] op_name operator name. 380 /// \param[in] rank refers to the rank of input image shape. 381 Status ValidateImageRank(const std::string &op_name, int32_t rank); 382 } // namespace dataset 383 } // namespace mindspore 384 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_ 385