• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_LITE_IMAGE_UTILS_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_LITE_IMAGE_UTILS_H_
19 
20 #include <csetjmp>
21 #include <memory>
22 #include <random>
23 #include <set>
24 #include <string>
25 #include <vector>
26 
27 #if defined(_WIN32) || defined(_WIN64)
28 #undef HAVE_STDDEF_H
29 #undef HAVE_STDLIB_H
30 #endif
31 
32 #include "jpeglib.h"
33 #include "jerror.h"
34 
35 #include "minddata/dataset/core/tensor.h"
36 #include "minddata/dataset/kernels/image/lite_cv/image_process.h"
37 #include "minddata/dataset/kernels/tensor_op.h"
38 #include "minddata/dataset/util/status.h"
39 #include "minddata/dataset/util/validators.h"
40 
41 namespace mindspore {
42 namespace dataset {
43 constexpr dsize_t kChannelIndexHWC = 2;      // images are hwc, so index 2 represents number of channels
44 constexpr dsize_t kChannelIndexCHW = 0;      // images are chw, so index 0 represents number of channels
45 constexpr int32_t kMaxBitValue = 255;        // max bit value after decode is 256
46 constexpr dsize_t kMinImageChannel = 1;      // image ops support minimum of 1 channel
47 constexpr dsize_t kDefaultImageChannel = 3;  // images are 3 channels in general
48 constexpr dsize_t kMaxImageChannel = 4;      // image ops support maximum of 4 channel
49 constexpr float kHalf = 0.5;                 // to get the half of a value
50 constexpr dsize_t kMinJpegQuality = 1;       // the minimum quality for JPEG
51 constexpr dsize_t kMaxJpegQuality = 100;     // the maximum quality for JPEG
52 constexpr dsize_t kDefaultImageRank = 3;     // images are hwc channels in general
53 constexpr dsize_t kMinImageRank = 2;         // images have at least 2 dimensions
54 constexpr int32_t kMaxPixelValue = 255;
55 constexpr dsize_t kHeightIndex = 0;  // index of height of HWC images
56 constexpr dsize_t kWidthIndex = 1;   // index of width of HWC images
57 constexpr dsize_t kRIndex = 0;       // index of red channel in RGB format
58 constexpr dsize_t kGIndex = 1;       // index of green channel in RGB format
59 constexpr dsize_t kBIndex = 2;       // index of blue channel in RGB format
60 
61 void JpegErrorExitCustom(j_common_ptr cinfo);
62 
63 struct JpegErrorManagerCustom {
64   // "public" fields
65   struct jpeg_error_mgr pub;
66   // for return to caller
67   jmp_buf setjmp_buffer;
68 };
69 
70 #if defined(ENABLE_MINDDATA_PYTHON)
71 bool IsNonEmptyPNG(const std::shared_ptr<Tensor> &input);
72 
73 /// \brief Returns Rescaled image
74 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
75 /// \param rescale: rescale parameter
76 /// \param shift: shift parameter
77 /// \param output: Rescaled image Tensor of same input shape and type DE_FLOAT32
78 Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift);
79 
80 /// \brief Swap the red and blue pixels (RGB <-> BGR)
81 /// \param input: Tensor of shape <H,W,3> and any OpenCv compatible type, see CVTensor.
82 /// \param output: Swapped image of same shape and type
83 Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
84 #endif
85 
86 bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input);
87 
88 void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size);
89 
90 Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x = 0, int y = 0,
91                          int w = 0, int h = 0);
92 
93 /// \brief Returns cropped ROI of an image
94 /// \param[in] input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
95 /// \param[in] x Starting horizontal position of ROI
96 /// \param[in] y Starting vertical position of ROI
97 /// \param[in] w Width of the ROI
98 /// \param[in] h Height of the ROI
99 /// \param[out] output: Cropped image Tensor of shape <h,w,C> or <h,w> and same input type.
100 Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h);
101 
102 /// \brief Returns Decoded image
103 /// Supported images:
104 ///  BMP JPEG JPG PNG TIFF
105 /// supported by opencv, if user need more image analysis capabilities, please compile opencv particularlly.
106 /// \param[in] input CVTensor containing the not decoded image 1D bytes
107 /// \param[out] output Decoded image Tensor of shape <H,W,C> and type DE_UINT8. Pixel order is RGB
108 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
109 
110 /// \brief Get jpeg image width and height
111 /// \param[in] input CVTensor containing the not decoded image 1D bytes
112 /// \param[in] img_width The jpeg image width
113 /// \param[in] img_height The jpeg image height
114 Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, int *img_height);
115 
116 /// \brief Returns Normalized image
117 /// \param[in] input Tensor of shape <H,W,C> in RGB order and any OpenCv compatible type, see CVTensor.
118 /// \param[in] mean Tensor of shape <3> and type DE_FLOAT32 which are mean of each channel in RGB order
119 /// \param[in] std  Tensor of shape <3> and type DE_FLOAT32 which are std of each channel in RGB order
120 /// \param[out] output Normalized image Tensor of same input shape and type DE_FLOAT32
121 Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
122                  const std::vector<float> &vec_mean, const std::vector<float> &vec_std);
123 
124 /// \brief  Returns Resized image.
125 /// \param[in] input
126 /// \param[in] output_height Height of output
127 /// \param[in] output_width Width of output
128 /// \param[in] fx Horizontal scale
129 /// \param[in] fy Vertical scale
130 /// \param[in] InterpolationMode The interpolation mode
131 /// \param[out] output Resized image of shape <outputHeight,outputWidth,C> or <outputHeight,outputWidth>
132 ///                and same type as input
133 Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height,
134               int32_t output_width, double fx = 0.0, double fy = 0.0,
135               InterpolationMode mode = InterpolationMode::kLinear);
136 
137 /// \brief  Returns Resized image.
138 /// \param[in] inputs input TensorRow
139 /// \param[in] height Height of output
140 /// \param[in] width Width of output
141 /// \param[in] img_orientation Angle method of image rotation
142 /// \param[out] outputs Resized image of shape <height,width,C> and same type as input
143 Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation,
144                       TensorRow *outputs);
145 
146 /// \brief Take in a 3 channel image in RBG to BGR
147 /// \param[in] input The input image
148 /// \param[out] output The output image
149 /// \return Status code
150 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
151 
152 /// \brief Take in a 3 channel image in RBG to GRAY
153 /// \param[in] input The input image
154 /// \param[out] output The output image
155 /// \return Status code
156 Status RgbToGray(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
157 
158 /// \brief Pads the input image and puts the padded image in the output
159 /// \param[in] input: input Tensor
160 /// \param[out] output: padded Tensor
161 /// \param[in] pad_top Amount of padding done in top
162 /// \param[in] pad_bottom Amount of padding done in bottom
163 /// \param[in] pad_left Amount of padding done in left
164 /// \param[in] pad_right Amount of padding done in right
165 /// \param[in] border_types The interpolation to be done in the border
166 /// \param[in] fill_r Red fill value for pad
167 /// \param[in] fill_g Green fill value for pad
168 /// \param[in] fill_b Blue fill value for pad
169 Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
170            const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
171            uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0);
172 
173 /// \brief Rotate the input image by orientation
174 /// \param[in] input Input Tensor
175 /// \param[out] output Rotated Tensor
176 /// \param[in] orientation The orientation of EXIF
177 Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, uint64_t orientation);
178 
179 /// \brief Get an affine matrix that applies affine transformation
180 /// \param[in] input Input Tensor
181 /// \param[in] matrix The transformation matrix
182 /// \param[in] degrees Range of the rotation degrees
183 /// \param[in] translation The horizontal and vertical translations
184 /// \param[in] scale The scaling factor
185 /// \param[in] shear The shear angle
186 Status GetAffineMatrix(const std::shared_ptr<Tensor> &input, std::vector<float_t> *matrix, float_t degrees,
187                        const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear);
188 
189 /// \brief Geometrically transform the input image
190 /// \param[in] input Input Tensor
191 /// \param[out] output Transformed Tensor
192 /// \param[in] degrees Range of the rotation degrees
193 /// \param[in] translation The horizontal and vertical translations
194 /// \param[in] scale The scaling factor
195 /// \param[in] shear The shear angle
196 /// \param[in] interpolation The interpolation mode
197 /// \param[in] fill_value Fill value for pad
198 Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float_t degrees,
199               const std::vector<float_t> &translation, float_t scale, const std::vector<float_t> &shear,
200               InterpolationMode interpolation, const std::vector<uint8_t> &fill_value);
201 
202 /// \brief Filter the input image with a Gaussian kernel
203 /// \param[in] input Input Tensor
204 /// \param[out] output Transformed Tensor
205 /// \param[in] kernel_size_x Gaussian kernel size of width
206 /// \param[in] kernel_size_y Gaussian kernel size of height
207 /// \param[in] sigma_x Gaussian kernel standard deviation of width
208 /// \param[in] sigma_y Gaussian kernel standard deviation of height
209 Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t kernel_size_x,
210                     int32_t kernel_size_y, float sigma_x, float sigma_y);
211 
212 /// \brief Get the size of input image.
213 /// \param[in] image Tensor of the image.
214 /// \param[out] size Size of the image as [height, width].
215 /// \return The status code.
216 Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *size);
217 
218 /// \brief Validate image Dtype, rank and channel.
219 /// \param[in] image Image tensor to be validated.
220 /// \param[in] op_name operator name.
221 /// \param[in] valid_dtype Valid date type of the image tensor. Default: {}, means not to check date type.
222 /// \param[in] valid_rank Valid dimension of the image tensor. Default: {}, means not to check dimension.
223 /// \param[in] valid_channel Valid channel of the image tensor. Default: {}, means not to check channel.
224 Status ValidateImage(const std::shared_ptr<Tensor> &image, const std::string &op_name,
225                      const std::set<uint8_t> &valid_dtype = {}, const std::set<dsize_t> &valid_rank = {},
226                      const std::set<dsize_t> &valid_channel = {});
227 
228 /// \brief Validate image rank.
229 /// \param[in] op_name operator name.
230 /// \param[in] rank refers to the rank of input image shape.
231 Status ValidateImageRank(const std::string &op_name, int32_t rank);
232 
233 /// \brief Swaps the channels in the image, i.e. converts HWC to CHW
234 /// \param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
235 /// \param output: Tensor of shape <C,H,W> or <H,W> and same input type.
236 Status HwcToChw(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
237 }  // namespace dataset
238 }  // namespace mindspore
239 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_LITE_IMAGE_UTILS_H_
240