1 /** 2 * Copyright 2020-2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_LITE_IMAGE_UTILS_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_LITE_IMAGE_UTILS_H_ 19 20 #include <csetjmp> 21 #include <memory> 22 #include <random> 23 #include <set> 24 #include <string> 25 #include <vector> 26 27 #if defined(_WIN32) || defined(_WIN64) 28 #undef HAVE_STDDEF_H 29 #undef HAVE_STDLIB_H 30 #endif 31 32 #include "jpeglib.h" 33 #include "jerror.h" 34 35 #include "minddata/dataset/core/tensor.h" 36 #include "minddata/dataset/kernels/image/lite_cv/image_process.h" 37 #include "minddata/dataset/kernels/tensor_op.h" 38 #include "minddata/dataset/util/status.h" 39 #include "minddata/dataset/util/validators.h" 40 41 namespace mindspore { 42 namespace dataset { 43 constexpr dsize_t kChannelIndexHWC = 2; // images are hwc, so index 2 represents number of channels 44 constexpr dsize_t kChannelIndexCHW = 0; // images are chw, so index 0 represents number of channels 45 constexpr int32_t kMaxBitValue = 255; // max bit value after decode is 256 46 constexpr dsize_t kMinImageChannel = 1; // image ops support minimum of 1 channel 47 constexpr dsize_t kDefaultImageChannel = 3; // images are 3 channels in general 48 constexpr dsize_t kMaxImageChannel = 4; // image ops support maximum of 4 channel 49 constexpr float kHalf = 0.5; // to get the half of a value 50 constexpr dsize_t kMinJpegQuality = 1; // the minimum quality for JPEG 51 constexpr dsize_t kMaxJpegQuality = 100; // the maximum quality for JPEG 52 constexpr dsize_t kDefaultImageRank = 3; // images are hwc channels in general 53 constexpr dsize_t kMinImageRank = 2; // images have at least 2 dimensions 54 constexpr int32_t kMaxPixelValue = 255; 55 constexpr dsize_t kHeightIndex = 0; // index of height of HWC images 56 constexpr dsize_t kWidthIndex = 1; // index of width of HWC images 57 constexpr dsize_t kRIndex = 0; // index of red channel in RGB format 58 constexpr dsize_t kGIndex = 1; // index of green channel in RGB format 59 constexpr dsize_t kBIndex = 2; // index of blue channel in RGB format 60 61 void JpegErrorExitCustom(j_common_ptr cinfo); 62 63 struct JpegErrorManagerCustom { 64 // "public" fields 65 struct jpeg_error_mgr pub; 66 // for return to caller 67 jmp_buf setjmp_buffer; 68 }; 69 70 #if defined(ENABLE_MINDDATA_PYTHON) 71 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input); 72 73 /// \brief Returns Rescaled image 74 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 75 /// \param rescale: rescale parameter 76 /// \param shift: shift parameter 77 /// \param output: Rescaled image Tensor of same input shape and type DE_FLOAT32 78 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift); 79 80 /// \brief Swap the red and blue pixels (RGB <-> BGR) 81 /// \param input: Tensor of shape <H,W,3> and any OpenCv compatible type, see CVTensor. 82 /// \param output: Swapped image of same shape and type 83 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output); 84 #endif 85 86 bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input); 87 88 void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size); 89 90 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x = 0, int y = 0, 91 int w = 0, int h = 0); 92 93 /// \brief Returns cropped ROI of an image 94 /// \param[in] input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 95 /// \param[in] x Starting horizontal position of ROI 96 /// \param[in] y Starting vertical position of ROI 97 /// \param[in] w Width of the ROI 98 /// \param[in] h Height of the ROI 99 /// \param[out] output: Cropped image Tensor of shape <h,w,C> or <h,w> and same input type. 100 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h); 101 102 /// \brief Returns Decoded image 103 /// Supported images: 104 /// BMP JPEG JPG PNG TIFF 105 /// supported by opencv, if user need more image analysis capabilities, please compile opencv particularlly. 106 /// \param[in] input CVTensor containing the not decoded image 1D bytes 107 /// \param[out] output Decoded image Tensor of shape <H,W,C> and type DE_UINT8. Pixel order is RGB 108 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 109 110 /// \brief Get jpeg image width and height 111 /// \param[in] input CVTensor containing the not decoded image 1D bytes 112 /// \param[in] img_width The jpeg image width 113 /// \param[in] img_height The jpeg image height 114 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height); 115 116 /// \brief Returns Normalized image 117 /// \param[in] input Tensor of shape <H,W,C> in RGB order and any OpenCv compatible type, see CVTensor. 118 /// \param[in] mean Tensor of shape <3> and type DE_FLOAT32 which are mean of each channel in RGB order 119 /// \param[in] std Tensor of shape <3> and type DE_FLOAT32 which are std of each channel in RGB order 120 /// \param[out] output Normalized image Tensor of same input shape and type DE_FLOAT32 121 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, 122 const std::vector<float> &vec_mean, const std::vector<float> &vec_std); 123 124 /// \brief Returns Resized image. 125 /// \param[in] input 126 /// \param[in] output_height Height of output 127 /// \param[in] output_width Width of output 128 /// \param[in] fx Horizontal scale 129 /// \param[in] fy Vertical scale 130 /// \param[in] InterpolationMode The interpolation mode 131 /// \param[out] output Resized image of shape <outputHeight,outputWidth,C> or <outputHeight,outputWidth> 132 /// and same type as input 133 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height, 134 int32_t output_width, double fx = 0.0, double fy = 0.0, 135 InterpolationMode mode = InterpolationMode::kLinear); 136 137 /// \brief Returns Resized image. 138 /// \param[in] inputs input TensorRow 139 /// \param[in] height Height of output 140 /// \param[in] width Width of output 141 /// \param[in] img_orientation Angle method of image rotation 142 /// \param[out] outputs Resized image of shape <height,width,C> and same type as input 143 Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation, 144 TensorRow *outputs); 145 146 /// \brief Take in a 3 channel image in RBG to BGR 147 /// \param[in] input The input image 148 /// \param[out] output The output image 149 /// \return Status code 150 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 151 152 /// \brief Take in a 3 channel image in RBG to GRAY 153 /// \param[in] input The input image 154 /// \param[out] output The output image 155 /// \return Status code 156 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 157 158 /// \brief Pads the input image and puts the padded image in the output 159 /// \param[in] input: input Tensor 160 /// \param[out] output: padded Tensor 161 /// \param[in] pad_top Amount of padding done in top 162 /// \param[in] pad_bottom Amount of padding done in bottom 163 /// \param[in] pad_left Amount of padding done in left 164 /// \param[in] pad_right Amount of padding done in right 165 /// \param[in] border_types The interpolation to be done in the border 166 /// \param[in] fill_r Red fill value for pad 167 /// \param[in] fill_g Green fill value for pad 168 /// \param[in] fill_b Blue fill value for pad 169 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top, 170 const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types, 171 uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); 172 173 /// \brief Rotate the input image by orientation 174 /// \param[in] input Input Tensor 175 /// \param[out] output Rotated Tensor 176 /// \param[in] orientation The orientation of EXIF 177 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, uint64_t orientation); 178 179 /// \brief Get an affine matrix that applies affine transformation 180 /// \param[in] input Input Tensor 181 /// \param[in] matrix The transformation matrix 182 /// \param[in] degrees Range of the rotation degrees 183 /// \param[in] translation The horizontal and vertical translations 184 /// \param[in] scale The scaling factor 185 /// \param[in] shear The shear angle 186 Status GetAffineMatrix(const std::shared_ptr<Tensor> &input, std::vector<float_t> *matrix, float_t degrees, 187 const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear); 188 189 /// \brief Geometrically transform the input image 190 /// \param[in] input Input Tensor 191 /// \param[out] output Transformed Tensor 192 /// \param[in] degrees Range of the rotation degrees 193 /// \param[in] translation The horizontal and vertical translations 194 /// \param[in] scale The scaling factor 195 /// \param[in] shear The shear angle 196 /// \param[in] interpolation The interpolation mode 197 /// \param[in] fill_value Fill value for pad 198 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float_t degrees, 199 const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear, 200 InterpolationMode interpolation, const std::vector<uint8_t> &fill_value); 201 202 /// \brief Filter the input image with a Gaussian kernel 203 /// \param[in] input Input Tensor 204 /// \param[out] output Transformed Tensor 205 /// \param[in] kernel_size_x Gaussian kernel size of width 206 /// \param[in] kernel_size_y Gaussian kernel size of height 207 /// \param[in] sigma_x Gaussian kernel standard deviation of width 208 /// \param[in] sigma_y Gaussian kernel standard deviation of height 209 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_size_x, 210 int32_t kernel_size_y, float sigma_x, float sigma_y); 211 212 /// \brief Get the size of input image. 213 /// \param[in] image Tensor of the image. 214 /// \param[out] size Size of the image as [height, width]. 215 /// \return The status code. 216 Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *size); 217 218 /// \brief Validate image Dtype, rank and channel. 219 /// \param[in] image Image tensor to be validated. 220 /// \param[in] op_name operator name. 221 /// \param[in] valid_dtype Valid date type of the image tensor. Default: {}, means not to check date type. 222 /// \param[in] valid_rank Valid dimension of the image tensor. Default: {}, means not to check dimension. 223 /// \param[in] valid_channel Valid channel of the image tensor. Default: {}, means not to check channel. 224 Status ValidateImage(const std::shared_ptr<Tensor> &image, const std::string &op_name, 225 const std::set<uint8_t> &valid_dtype = {}, const std::set<dsize_t> &valid_rank = {}, 226 const std::set<dsize_t> &valid_channel = {}); 227 228 /// \brief Validate image rank. 229 /// \param[in] op_name operator name. 230 /// \param[in] rank refers to the rank of input image shape. 231 Status ValidateImageRank(const std::string &op_name, int32_t rank); 232 233 /// \brief Swaps the channels in the image, i.e. converts HWC to CHW 234 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor. 235 /// \param output: Tensor of shape <C,H,W> or <H,W> and same input type. 236 Status HwcToChw(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 237 } // namespace dataset 238 } // namespace mindspore 239 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_LITE_IMAGE_UTILS_H_ 240