1 /** 2 * Copyright 2020-2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_ 18 19 #if defined(_WIN32) || defined(_WIN64) 20 #undef HAVE_STDDEF_H 21 #undef HAVE_STDLIB_H 22 #elif __APPLE__ 23 #include <sys/mount.h> 24 #include <sys/param.h> 25 #endif 26 27 #include <csetjmp> 28 #include <memory> 29 #include <random> 30 #include <set> 31 #include <string> 32 #include <utility> 33 #include <vector> 34 35 #include "jpeglib.h" 36 #include "jerror.h" 37 #include <opencv2/imgproc/imgproc.hpp> 38 39 #include "minddata/dataset/core/tensor.h" 40 #include "minddata/dataset/kernels/tensor_op.h" 41 #include "minddata/dataset/util/status.h" 42 #include "minddata/dataset/util/validators.h" 43 44 namespace mindspore { 45 namespace dataset { 46 constexpr dsize_t kChannelIndexHWC = 2; // images are hwc, so index 2 represents number of channels 47 constexpr dsize_t kChannelIndexCHW = 0; // images are chw, so index 0 represents number of channels 48 constexpr dsize_t kMinImageRank = 2; // images have at least 2 dimensions 49 constexpr dsize_t kDefaultImageRank = 3; // images are hwc channels in general 50 constexpr int32_t kMaxBitValue = 255; // max bit value after decode is 256 51 constexpr dsize_t kMinImageChannel = 1; // image ops support minimum of 1 channel 52 constexpr dsize_t kDefaultImageChannel = 3; // images are 3 channels in general 53 constexpr dsize_t kMaxImageChannel = 4; // image ops support maximum of 4 channel 54 constexpr float kHalf = 0.5; // to get the half of a value 55 constexpr dsize_t kRIndex = 0; // index of red channel in RGB format 56 constexpr dsize_t kGIndex = 1; // index of green channel in RGB format 57 constexpr dsize_t kBIndex = 2; // index of blue channel in RGB format 58 constexpr dsize_t kHeightIndex = 0; // index of height of HWC images 59 constexpr dsize_t kWidthIndex = 1; // index of width of HWC images 60 constexpr dsize_t kMinJpegQuality = 1; // the minimum quality for JPEG 61 constexpr dsize_t kMaxJpegQuality = 100; // the maximum quality for JPEG 62 constexpr dsize_t kMinPngCompression = 0; // the minimum compression level for PNG 63 constexpr dsize_t kMaxPngCompression = 9; // the maximum compression level for PNG 64 constexpr dsize_t kChannelIndexNHWC = 3; 65 constexpr dsize_t kNHWCImageRank = 4; 66 constexpr dsize_t kWidthIndexNHWC = 2; 67 constexpr dsize_t kHeightIndexNHWC = 1; 68 constexpr dsize_t kWidthIndexNCHW = 3; 69 constexpr dsize_t kHeightIndexNCHW = 2; 70 71 void JpegErrorExitCustom(j_common_ptr cinfo); 72 73 struct JpegErrorManagerCustom { 74 // "public" fields 75 struct jpeg_error_mgr pub; 76 // for return to caller 77 jmp_buf setjmp_buffer; 78 }; 79 80 /// \brief Returns the interpolation mode in openCV format 81 /// \param[in] mode Interpolation mode in DE format 82 int GetCVInterpolationMode(InterpolationMode mode); 83 84 /// \brief Returns the openCV equivalent of the border type used for padding. 85 /// \param type 86 /// \return Status code 87 int GetCVBorderType(BorderType type); 88 89 /// \brief Get the number of input image channels. 90 /// \param[in] image Tensor of the image. 91 /// \param[out] channels Channels of the image. 92 /// \return The status code. 93 Status ImageNumChannels(const std::shared_ptr<Tensor> &image, dsize_t *channels); 94 95 /// \brief Get the size of input image. 96 /// \param[in] image Tensor of the image. 97 /// \param[out] size Size of the image as [height, width]. 98 /// \return The status code. 99 Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *size); 100 101 /// \brief Validate image Dtype, rank and channel. 102 /// \param[in] image Image tensor to be validated. 103 /// \param[in] op_name operator name. 104 /// \param[in] valid_dtype Valid date type of the image tensor. Default: {}, means not to check date type. 105 /// \param[in] valid_rank Valid dimension of the image tensor. Default: {}, means not to check dimension. 106 /// \param[in] valid_channel Valid channel of the image tensor. Default: {}, means not to check channel. 107 Status ValidateImage(const std::shared_ptr<Tensor> &image, const std::string &op_name, 108 const std::set<uint8_t> &valid_dtype = {}, const std::set<dsize_t> &valid_rank = {}, 109 const std::set<dsize_t> &valid_channel = {}); 110 111 /// \brief Validate image dtype. 112 /// \param[in] op_name operator name. 113 /// \param[in] dtype Date type of the image tensor. 114 Status ValidateImageDtype(const std::string &op_name, DataType dtype); 115 116 /// \brief Validate image rank. 117 /// \param[in] op_name operator name. 118 /// \param[in] rank refers to the rank of input image shape. 119 Status ValidateImageRank(const std::string &op_name, int32_t rank); 120 121 /// \brief Returns the check result of tensor rank and tensor shape 122 /// \param[in] tensor: The input tensor need to check 123 /// \param[in] channel: The channel index of tensor shape. 124 /// \param[out] return true if channel of tensor shape is 1 or 3. 125 bool CheckTensorShape(const std::shared_ptr<Tensor> &tensor, const int &channel); 126 127 /// \brief Returns flipped image 128 /// \param[in] input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 129 /// \param flip_code: 1 for Horizontal (around y-axis), 0 for Vertical (around x-axis), -1 for both 130 /// The flipping happens in place. 131 Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code); 132 133 /// \brief Returns Horizontally flipped image 134 /// \param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 135 /// The flipping happens in place. 136 Status HorizontalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output); 137 138 /// \brief Returns Vertically flipped image 139 /// \param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 140 /// \note The flipping happens in place. 141 Status VerticalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output); 142 143 /// \brief Returns Resized image. 144 /// \param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 145 /// \param output_height: height of output 146 /// \param output_width: width of output 147 /// \param fx: horizontal scale 148 /// \param fy: vertical scale 149 /// \param InterpolationMode: the interpolation mode 150 /// \param output: Resized image of shape <outputHeight,outputWidth,C> or <outputHeight,outputWidth> 151 /// and same type as input 152 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height, 153 int32_t output_width, double fx = 0.0, double fy = 0.0, 154 InterpolationMode mode = InterpolationMode::kLinear); 155 156 /// \brief Returns Decoded image 157 /// Supported images: 158 /// BMP JPEG JPG PNG TIFF 159 /// supported by opencv, if user need more image analysis capabilities, please compile opencv particularlly. 160 /// \param input: CVTensor containing the not decoded image 1D bytes 161 /// \param output: Decoded image Tensor of shape <H,W,C> and type DE_UINT8. Pixel order is RGB 162 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 163 164 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 165 166 DATASET_API bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input); 167 168 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input); 169 170 void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size); 171 172 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x = 0, int y = 0, 173 int w = 0, int h = 0); 174 175 /// \brief Returns Rescaled image 176 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 177 /// \param rescale: rescale parameter 178 /// \param shift: shift parameter 179 /// \param output: Rescaled image Tensor of same input shape and type DE_FLOAT32 180 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift); 181 182 /// \brief Returns cropped ROI of an image 183 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 184 /// \param x: starting horizontal position of ROI 185 /// \param y: starting vertical position of ROI 186 /// \param w: width of the ROI 187 /// \param h: height of the ROI 188 /// \param output: Cropped image Tensor of shape <h,w,C> or <h,w> and same input type. 189 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h); 190 191 /// \brief Change the color space of the image. 192 /// \param input: The input image. 193 /// \param output: The output image. 194 /// \param convert_mode: The mode of image channel conversion. 195 Status ConvertColor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, ConvertMode convert_mode); 196 197 /// \brief Swaps the channels in the image, i.e. converts HWC to CHW 198 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 199 /// \param output: Tensor of shape <C,H,W> or <H,W> and same input type. 200 Status HwcToChw(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 201 202 /// \brief Masks the given part of the input image with a another image (sub_mat) 203 /// \param[in] sub_mat The image we want to mask with 204 /// \param[in] input The pointer to the image we want to mask 205 /// \param[in] x The horizontal coordinate of left side of crop box 206 /// \param[in] y The vertical coordinate of the top side of crop box 207 /// \param[in] width The width of the mask box 208 /// \param[in] height The height of the mask box 209 /// \param[in] image_format The format of the image (CHW or HWC) 210 /// \param[out] input Masks the input image in-place and returns it 211 /// @return Status ok/error 212 Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Tensor> *input, int x, int y, int width, 213 int height, ImageFormat image_format); 214 215 /// \brief Copies a value from a source tensor into a destination tensor 216 /// \note This is meant for images and therefore only works if tensor is uint8 or float32 217 /// \param[in] source_tensor The tensor we take the value from 218 /// \param[in] dest_tensor The pointer to the tensor we want to copy the value to 219 /// \param[in] source_indx index of the value in the source tensor 220 /// \param[in] dest_indx index of the value in the destination tensor 221 /// \param[out] dest_tensor Copies the value to the given dest_tensor and returns it 222 /// @return Status ok/error 223 Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor, 224 const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx); 225 226 /// \brief Swap the red and blue pixels (RGB <-> BGR) 227 /// \param input: Tensor of shape <H,W,3> and any OpenCv compatible type, see CVTensor. 228 /// \param output: Swapped image of same shape and type 229 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output); 230 231 /// \brief Crops and resizes the image 232 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 233 /// \param x: horizontal start point 234 /// \param y: vertical start point 235 /// \param crop_height: height of the cropped ROI 236 /// \param crop_width: width of the cropped ROI 237 /// \param target_width: width of the final resized image 238 /// \param target_height: height of the final resized image 239 /// \param InterpolationMode: the interpolation used in resize operation 240 /// \param output: Tensor of shape <targetHeight,targetWidth,C> or <targetHeight,targetWidth> 241 /// and same type as input 242 Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, 243 int crop_height, int crop_width, int target_height, int target_width, InterpolationMode mode); 244 245 /// \brief Returns rotated image 246 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 247 /// \param center: rotation center 248 /// \param degree: degree to rotate 249 /// \param expand: if reshape is necessary 250 /// \param output: rotated image of same input type. 251 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> center, 252 float degree, InterpolationMode interpolation = InterpolationMode::kNearestNeighbour, bool expand = false, 253 uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); 254 255 /// \brief Returns Normalized image 256 /// \param input: Tensor of shape <H,W,C> in RGB order and any OpenCv compatible type, see CVTensor. 257 /// \param mean: Tensor of shape <3> and type DE_FLOAT32 which are mean of each channel in RGB order 258 /// \param std: Tensor of shape <3> and type DE_FLOAT32 which are std of each channel in RGB order 259 /// \param is_hwc: Check if input is HWC/CHW format 260 /// \param output: Normalized image Tensor of same input shape and type DE_FLOAT32 261 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean, 262 std::vector<float> std, bool is_hwc); 263 264 /// \brief Returns Normalized and padded image 265 /// \param input: Tensor of shape <H,W,C> in RGB order and any OpenCv compatible type, see CVTensor. 266 /// \param mean: vector of float values which are mean of each channel 267 /// \param std: vector of float values which are std of each channel 268 /// \param dtype: output dtype 269 /// \param is_hwc: Check if input is HWC/CHW format 270 /// \param output: Normalized image Tensor and pad an extra channel, return a dtype Tensor 271 Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<float> mean, 272 std::vector<float> std, const std::string &dtype, bool is_hwc); 273 274 /// \brief Returns image with adjusted brightness. 275 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor. 276 /// \param alpha: Alpha value to adjust brightness by. Should be a positive number. 277 /// If user input one value in python, the range is [1 - value, 1 + value]. 278 /// This will output original image multiplied by alpha. 0 gives a black image, 1 gives the 279 /// original image while 2 increases the brightness by a factor of 2. 280 /// \param output: Adjusted image of same shape and type. 281 Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha); 282 283 /// \brief Returns image with adjusted contrast. 284 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor. 285 /// \param alpha: Alpha value to adjust contrast by. Should be a positive number. 286 /// If user input one value in python, the range is [1 - value, 1 + value]. 287 /// 0 gives a solid gray image, 1 gives the original image while 2 increases 288 /// the contrast by a factor of 2. 289 /// \param output: Adjusted image of same shape and type. 290 Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha); 291 292 /// \brief Returns image with contrast maximized. 293 /// \param input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and any OpenCv compatible type, see CVTensor. 294 /// \param cutoff: Cutoff percentage of how many pixels are to be removed (high pixels change to 255 and low change 295 /// to 0) from the high and low ends of the histogram. 296 /// \param ignore: Pixel values to be ignored in the algorithm. 297 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float cutoff, 298 const std::vector<uint32_t> &ignore); 299 300 /// \brief Returns image with gamma correction. 301 /// \param[in] input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and any OpenCV compatible type, 302 /// see CVTensor. 303 /// \param[in] gamma: Non negative real number, same as gamma in the equation. gamma larger than 1 make the shadows 304 /// darker, while gamma smaller than 1 make dark regions lighter. 305 /// \param[in] gain: The constant multiplier. 306 /// \param[out] output: Adjusted image of same shape and type. 307 Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float gamma, float gain); 308 309 /// \brief Returns image with adjusted saturation. 310 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor. 311 /// \param alpha: Alpha value to adjust saturation by. Should be a positive number. 312 /// If user input one value in python, the range is [1 - value, 1 + value]. 313 /// 0 will give a black and white image, 1 will give the original image while 314 /// 2 will enhance the saturation by a factor of 2. 315 /// \param output: Adjusted image of same shape and type. 316 Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha); 317 318 /// \brief Adjust the sharpness of the input image. 319 /// \param[in] input: Tensor of input image. 320 /// \param[out] output: Tensor of output image. 321 /// \param[in] alpha: How much to adjust the sharpness. 322 /// \return Status code. 323 Status AdjustSharpness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float alpha); 324 325 /// \brief Returns image with adjusted hue. 326 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor. 327 /// \param hue: Hue value to adjust by, should be within range [-0.5, 0.5]. 0.5 and - 0.5 will reverse the hue channel 328 /// completely. 329 /// If user input one value in python, the range is [-value, value]. 330 /// \param output: Adjusted image of same shape and type. 331 Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float hue); 332 333 /// \brief Returns image with equalized histogram. 334 /// \param[in] input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and 335 /// any OpenCv compatible type, see CVTensor. 336 /// \param[out] output: Equalized image of same shape and type. 337 Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 338 339 /// \brief Masks out a random section from the image with set dimension 340 /// \param input: input Tensor 341 /// \param output: cutOut Tensor 342 /// \param box_height: height of the cropped box 343 /// \param box_width: width of the cropped box 344 /// \param num_patches: number of boxes to cut out from the image 345 /// \param bounded: boolean flag to toggle between random erasing and cutout 346 /// \param random_color: whether or not random fill value should be used 347 /// \param fill_colors: vector of color fill values for erase 348 /// \param is_hwc: Check if input is HWC/CHW format 349 Status CutOut(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t box_height, 350 int32_t box_width, int32_t num_patches, bool bounded, bool random_color, std::mt19937 *rnd, 351 std::vector<uint8_t> fill_colors = {}, bool is_hwc = true); 352 353 /// \brief Erase the input image with given value 354 /// \param input: input Tensor 355 /// \param output: erase Tensor 356 /// \param top: top of the cropped box 357 /// \param left: left of the cropped box 358 /// \param height: height of the cropped box 359 /// \param width: width of the cropped box 360 /// \param value: fill value for erase 361 /// \param inplace: whether to apply erasing inplace 362 Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t top, int32_t left, 363 int32_t height, int32_t width, const std::vector<float> &value, bool inplace); 364 365 /// \brief Invert the colors of the input image. 366 /// \param[in] input: Tensor of input image. 367 /// \param[out] output: Tensor of output image. 368 /// \return Status code. 369 Status Invert(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 370 371 /// \brief Pads the input image and puts the padded image in the output 372 /// \param input: input Tensor 373 /// \param output: padded Tensor 374 /// \param pad_top: amount of padding done in top 375 /// \param pad_bottom: amount of padding done in bottom 376 /// \param pad_left: amount of padding done in left 377 /// \param pad_right: amount of padding done in right 378 /// \param border_types: the interpolation to be done in the border 379 /// \param fill_r: red fill value for pad 380 /// \param fill_g: green fill value for pad 381 /// \param fill_b: blue fill value for pad. 382 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top, 383 const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types, 384 uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); 385 386 /// \brief Posterize the input image by reducing the number of bits for ecah color channel. 387 /// \param[in] input: Tensor of input image. 388 /// \param[out] output: Tensor of output image. 389 /// \param[in] bits: The number of bits to keep for each channel. 390 /// \return Status code. 391 Status Posterize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, uint8_t bits); 392 393 /// \brief Add AlexNet-style PCA-based noise to an image. 394 /// \param[in] input The input image. 395 /// \param[out] output The output image. 396 /// \param[in] rnd_r Random weight for red channel. 397 /// \param[in] rnd_g Random weight for green channel. 398 /// \param[in] rnd_b Random weight for blue channel. 399 /// \return Status code. 400 Status RandomLighting(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rnd_r, float rnd_g, 401 float rnd_b); 402 403 /// \brief Take in a 4 channel image in RBGA to RGB 404 /// \param[in] input The input image 405 /// \param[out] output The output image 406 /// \return Status code 407 Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 408 409 /// \brief Take in a 4 channel image in RBGA to BGR 410 /// \param[in] input The input image 411 /// \param[out] output The output image 412 /// \return Status code 413 Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 414 415 /// \brief Take in a 3 channel image in RBG to BGR 416 /// \param[in] input The input image 417 /// \param[out] output The output image 418 /// \return Status code 419 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 420 421 /// \brief Take in a 3 channel image in RBG to GRAY 422 /// \param[in] input The input image 423 /// \param[out] output The output image 424 /// \return Status code 425 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 426 427 /// \brief Get jpeg image width and height 428 /// \param input: CVTensor containing the not decoded image 1D bytes 429 /// \param img_width: the jpeg image width 430 /// \param img_height: the jpeg image height 431 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height); 432 433 /// \brief Get an affine matrix that applies affine transformation 434 /// \param[in] input Input Tensor 435 /// \param[in] matrix The transformation matrix 436 /// \param[in] degrees Range of the rotation degrees 437 /// \param[in] translation The horizontal and vertical translations 438 /// \param[in] scale The scaling factor 439 /// \param[in] shear The shear angle 440 Status GetAffineMatrix(const std::shared_ptr<Tensor> &input, std::vector<float_t> *matrix, float_t degrees, 441 const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear); 442 443 /// \brief Geometrically transform the input image 444 /// \param[in] input Input Tensor 445 /// \param[out] output Transformed Tensor 446 /// \param[in] degrees Range of the rotation degrees 447 /// \param[in] translation The horizontal and vertical translations 448 /// \param[in] scale The scaling factor 449 /// \param[in] shear The shear angle 450 /// \param[in] interpolation The interpolation mode 451 /// \param[in] fill_value Fill value for pad 452 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float_t degrees, 453 const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear, 454 InterpolationMode interpolation, const std::vector<uint8_t> &fill_value); 455 456 /// \brief Filter the input image with a Gaussian kernel 457 /// \param[in] input Input Tensor 458 /// \param[out] output Transformed Tensor 459 /// \param[in] kernel_size_x Gaussian kernel size of width 460 /// \param[in] kernel_size_y Gaussian kernel size of height 461 /// \param[in] sigma_x Gaussian kernel standard deviation of width 462 /// \param[in] sigma_y Gaussian kernel standard deviation of height 463 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_size_x, 464 int32_t kernel_size_y, float sigma_x, float sigma_y); 465 466 /// \brief Apply perspective transformation on input image. 467 /// \param[in] input Input Tensor. 468 /// \param[out] output Transformed Tensor. 469 /// \param[in] start_points List containing four lists of two integers corresponding to four 470 /// corners [top-left, top-right, bottom-right, bottom-left] of the original image. 471 /// \param[in] end_points List containing four lists of two integers corresponding to four 472 /// corners [top-left, top-right, bottom-right, bottom-left] of the transformed image. 473 /// \param[in] interpolation Method of interpolation. 474 Status Perspective(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, 475 const std::vector<std::vector<int32_t>> &start_points, 476 const std::vector<std::vector<int32_t>> &end_points, InterpolationMode interpolation); 477 478 /// \brief Slice tensor to multiple patches. 479 /// \param[in] input Input Tensor 480 /// \param[out] output Vector of Output Tensor 481 /// \param[in] num_height Number of patches in vertical direction. 482 /// \param[in] num_width Number of patches in horizontal direction. 483 /// \param[in] slice_mode Mode represents padding or drop. 484 /// \param[in] fill_value The value of filled pixel in right and bottom border when padding. 485 Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::shared_ptr<Tensor>> *output, 486 int32_t num_height, int32_t num_width, SliceMode slice_mode, uint8_t fill_value); 487 488 /// \brief Solarize the image by inverting all pixel values within the threshold. 489 /// \param[in] input Input Tensor 490 /// \param[out] output Output Tensor 491 /// \param[in] threshold Pixel value range to be inverted. 492 Status Solarize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, 493 const std::vector<float> &threshold); 494 495 /// \brief Compute patch height and width. 496 /// \param[in] input Input CVTensor 497 /// \param[out] patch_size Size of patch 498 /// \param[in] num_height Number of patches in vertical direction. 499 /// \param[in] num_width Number of patches in horizontal direction. 500 /// \param[in] slice_mode Mode represents padding or drop. 501 Status ComputePatchSize(const std::shared_ptr<CVTensor> &input_cv, 502 std::shared_ptr<std::pair<int32_t, int32_t>> *patch_size, int32_t num_height, int32_t num_width, 503 SliceMode slice_mode); 504 505 /// \brief Rescale and convert HWC to CHW format. 506 /// \param[in] input The input image 507 /// \param[in] data_type The output data type 508 /// \param[out] output The output image 509 /// \return Status code 510 Status ToTensor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type); 511 512 /// \brief Generate a vector that contains n numbers between start and end with evenly interval. 513 /// \param[in] start Start number. 514 /// \param[in] end End number. 515 /// \param[in] n Count of numbers. 516 /// \param[in] scale Zoom scale. 517 /// \param[in] offset Bias. 518 /// \param[in] round Round input to the nearest integer. 519 std::vector<float> Linspace(float start, float end, int32_t n, float scale = 1.0, float offset = 0, bool round = false); 520 521 /// \brief Round input to the nearest integer. Note that this function implements the "round half to even" to break 522 /// ties when a number is equidistant from two integers. 523 /// \param[in] value Input value. 524 float Round(float value); 525 526 /// \brief Perform the selected augment. 527 /// \param[in] input The input tensor. 528 /// \param[in] output The output tensor. 529 /// \param[in] op_name The selected op. 530 /// \param[in] magnitude The magnitude value. 531 /// \param[in] interpolation Possible options for interpolation method. 532 /// \param[in] fill_value Values used to fill. 533 Status ApplyAugment(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::string &op_name, 534 float magnitude, InterpolationMode interpolation, const std::vector<uint8_t> &fill_value); 535 536 /// \brief Encode the image as JPEG data. 537 /// \param[in] image The image to be encoded. 538 /// \param[out] output The Tensor data. 539 /// \param[in] quality The quality for the output tensor, in range of [1, 100]. Default: 75. 540 /// \return The status code. 541 Status EncodeJpeg(const std::shared_ptr<Tensor> &image, std::shared_ptr<Tensor> *output, int quality = 75); 542 543 /// \brief Encode the image as PNG data. 544 /// \param[in] image The image to be encoded. 545 /// \param[out] output The Tensor data. 546 /// \param[in] compression_level The compression_level for encoding, in range of [0, 9]. Default: 6. 547 /// \return The status code. 548 Status EncodePng(const std::shared_ptr<Tensor> &image, std::shared_ptr<Tensor> *output, int compression_level = 6); 549 550 /// \brief Reads a file in binary mode. 551 /// \param[in] filename The path to the file to be read. 552 /// \param[out] output The binary data. 553 /// \return The status code. 554 Status ReadFile(const std::string &filename, std::shared_ptr<Tensor> *output); 555 556 /// \brief Reads a image file and decode it into one or three channels data. 557 /// \param[in] filename The path to the image file to be read. 558 /// \param[out] output Output Tensor. 559 /// \param[in] mode The read mode used for optionally converting the image, can be one of 560 /// [ImageReadMode::kUNCHANGED, ImageReadMode::kGRAYSCALE, ImageReadMode::kCOLOR]. Default: 561 /// ImageReadMode::kUNCHANGED. 562 /// - ImageReadMode::kUNCHANGED, remain the output in the original format. 563 /// - ImageReadMode::kGRAYSCALE, convert the output into one channel grayscale data. 564 /// - ImageReadMode::kCOLOR, convert the output into three channels RGB color data. 565 /// \return The status code. 566 Status ReadImage(const std::string &filename, std::shared_ptr<Tensor> *output, 567 ImageReadMode mode = ImageReadMode::kUNCHANGED); 568 569 /// \brief Write the one dimension uint8 data into a file using binary mode. 570 /// \param[in] filename The path to the file to be written. 571 /// \param[in] data The tensor data. 572 /// \return The status code. 573 Status WriteFile(const std::string &filename, const std::shared_ptr<Tensor> &data); 574 575 /// \brief Write the image data into a JPEG file. 576 /// \param[in] filename The path to the file to be written. 577 /// \param[in] image The data tensor. 578 /// \param[in] quality The quality for JPEG file, in range of [1, 100]. Default: 75. 579 /// \return Status code. 580 Status WriteJpeg(const std::string &filename, const std::shared_ptr<Tensor> &image, int quality = 75); 581 582 /// \brief Write the image into a PNG file. 583 /// \param[in] filename The path to the file to be written. 584 /// \param[in] image The data tensor. 585 /// \param[in] compression_level The compression level for PNG file, in range of [0, 9]. Default: 6. 586 /// \return Status code. 587 Status WritePng(const std::string &filename, const std::shared_ptr<Tensor> &image, int compression_level = 6); 588 589 /// \brief Dump the abnormal image to disk and facilitate user to check it. 590 /// \param[in] image The data Tensor. 591 /// \param[in] status The previous error status which is needed to append more info. 592 /// \return Status code. 593 Status DumpImageAndAppendStatus(const std::shared_ptr<Tensor> &image, const Status &status); 594 595 /// \brief Check the unsupported image and dump it to disk. 596 /// \param[in] image The data Tensor. 597 /// \return Status code. 598 Status CheckUnsupportedImage(const std::shared_ptr<Tensor> &image); 599 } // namespace dataset 600 } // namespace mindspore 601 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_ 602