• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef IMAGE_PROCESS_H_
18 #define IMAGE_PROCESS_H_
19 
20 #include <math.h>
21 #include <vector>
22 #include <algorithm>
23 #include <iostream>
24 
25 #include "lite_cv/lite_mat.h"
26 
27 namespace mindspore {
28 namespace dataset {
29 
30 #define CV_PI 3.1415926535897932384626433832795
31 #define IM_TOOL_EXIF_ORIENTATION_0_DEG 1
32 #define IM_TOOL_EXIF_ORIENTATION_0_DEG_MIRROR 2
33 #define IM_TOOL_EXIF_ORIENTATION_180_DEG 3
34 #define IM_TOOL_EXIF_ORIENTATION_180_DEG_MIRROR 4
35 #define IM_TOOL_EXIF_ORIENTATION_90_DEG_MIRROR 5
36 #define IM_TOOL_EXIF_ORIENTATION_90_DEG 6
37 #define IM_TOOL_EXIF_ORIENTATION_270_DEG_MIRROR 7
38 #define IM_TOOL_EXIF_ORIENTATION_270_DEG 8
39 #define NUM_OF_RGB_CHANNELS 9
40 #define IM_TOOL_DATA_TYPE_FLOAT (1)
41 #define IM_TOOL_DATA_TYPE_UINT8 (2)
42 #define IM_TOOL_RETURN_STATUS_SUCCESS (0)
43 #define IM_TOOL_RETURN_STATUS_INVALID_INPUT (1)
44 #define IM_TOOL_RETURN_STATUS_FAILED (2)
45 
46 #define INT16_CAST(X) \
47   static_cast<int16_t>(::std::min(::std::max(static_cast<int>(X + (X >= 0.f ? 0.5f : -0.5f)), -32768), 32767));
48 
49 enum PaddBorderType {
50   PADD_BORDER_CONSTANT = 0,                     /**< Fills the border with constant values. */
51   PADD_BORDER_REPLICATE = 1,                    /**< Fills the border with replicate mode. */
52   PADD_BORDER_REFLECT_101 = 4,                  /**< Fills the border with reflect 101 mode. */
53   PADD_BORDER_DEFAULT = PADD_BORDER_REFLECT_101 /**< Default pad mode, use reflect 101 mode. */
54 };
55 
56 struct BoxesConfig {
57  public:
58   std::vector<size_t> img_shape;
59   std::vector<int> num_default;
60   std::vector<int> feature_size;
61   float min_scale;
62   float max_scale;
63   std::vector<std::vector<float>> aspect_rations;
64   std::vector<int> steps;
65   std::vector<float> prior_scaling;
66 };
67 
68 /// \brief resizing image by bilinear algorithm, the data type of currently only supports is uint8,
69 ///          the channel of currently supports is 3 and 1.
70 /// \param[in] src Input image data.
71 /// \param[in] dst Output image data.
72 /// \param[in] dst_w The width of the output image.
73 /// \param[in] dst_h The length of the output image.
74 bool ResizeBilinear(const LiteMat &src, LiteMat &dst, int dst_w, int dst_h);
75 
76 /// \brief Init Lite Mat from pixel, the conversion of currently supports is rbgaTorgb and rgbaTobgr.
77 /// \note The length of the pointer must be the same as that of the multiplication of w and h.
78 /// \param[in] data Input image data.
79 /// \param[in] pixel_type The type of pixel_type.
80 /// \param[in] data_type The type of data_type.
81 /// \param[in] w The width of the output image.
82 /// \param[in] h The length of the output image.
83 /// \param[in] m Used to store image data.
84 bool InitFromPixel(const unsigned char *data, LPixelType pixel_type, LDataType data_type, int w, int h, LiteMat &m);
85 
86 /// \brief convert the data type, the conversion of currently supports is uint8 to float.
87 /// \param[in] src Input image data.
88 /// \param[in] dst Output image data.
89 /// \param[in] scale Scale pixel value(default:1.0).
90 bool ConvertTo(const LiteMat &src, LiteMat &dst, double scale = 1.0);
91 
92 /// \brief crop image, the channel supports is 3 and 1.
93 /// \param[in] src Input image data.
94 /// \param[in] dst Output image data.
95 /// \param[in] x The x coordinate value of the starting point of the screenshot.
96 /// \param[in] y The y coordinate value of the starting point of the screenshot.
97 /// \param[in] w The width of the screenshot.
98 /// \param[in] h The height of the screenshot.
99 bool Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h);
100 
101 /// \brief normalize image, currently the supports data type is float.
102 /// \param[in] src Input image data.
103 /// \param[in] dst Output image data.
104 /// \param[in] mean Mean of the data set.
105 /// \param[in] std Norm of the data set.
106 bool SubStractMeanNormalize(const LiteMat &src, LiteMat &dst, const std::vector<float> &mean,
107                             const std::vector<float> &std);
108 
109 /// \brief padd image, the channel supports is 3 and 1.
110 /// \param[in] src Input image data.
111 /// \param[in] dst Output image data.
112 /// \param[in] top The length of top.
113 /// \param[in] bottom The length of bottom.
114 /// \param[in] left The length of left.
115 /// \param[in] right he length of right.
116 /// \param[in] pad_type The type of pad.
117 /// \param[in] fill_b_or_gray B or GRAY.
118 /// \param[in] fill_g G.
119 /// \param[in] fill_r R.
120 bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int right, PaddBorderType pad_type,
121          uint8_t fill_b_or_gray = 0, uint8_t fill_g = 0, uint8_t fill_r = 0);
122 
123 /// \brief Extract image channel by index.
124 /// \param[in] src Input image data.
125 /// \param[in] dst Output image data.
126 /// \param[in] col The serial number of the channel.
127 bool ExtractChannel(LiteMat &src, LiteMat &dst, int col);
128 
129 /// \brief Split image channels to single channel.
130 /// \param[in] src Input image data.
131 /// \param[in] mv Single channel data.
132 bool Split(const LiteMat &src, std::vector<LiteMat> &mv);
133 
134 /// \brief Create a multi-channel image out of several single-channel arrays.
135 /// \param[in] mv Single channel data.
136 /// \param[in] dst Output image data.
137 bool Merge(const std::vector<LiteMat> &mv, LiteMat &dst);
138 
139 /// \brief Apply affine transformation for 1 channel image.
140 /// \param[in] src Input image data.
141 /// \param[in] out_img Output image data.
142 /// \param[in] M[6] Affine transformation matrix.
143 /// \param[in] dsize The size of the output image.
144 /// \param[in] borderValue The pixel value is used for filing after the image is captured.
145 bool Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector<size_t> dsize, UINT8_C1 borderValue);
146 
147 /// \brief Apply affine transformation for 3 channel image.
148 /// \param[in] src Input image data.
149 /// \param[in] out_img Output image data.
150 /// \param[in] M[6] Affine transformation matrix.
151 /// \param[in] dsize The size of the output image.
152 /// \param[in] borderValue The pixel value is used for filing after the image is captured.
153 bool Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector<size_t> dsize, UINT8_C3 borderValue);
154 
155 /// \brief Get default anchor boxes for Faster R-CNN, SSD, YOLO etc.
156 /// \param[in] config Objects of BoxesConfig structure.
157 std::vector<std::vector<float>> GetDefaultBoxes(const BoxesConfig config);
158 
159 /// \brief Convert the prediction boxes to the actual boxes of (y, x, h, w).
160 /// \param[in] boxes Actual size box.
161 /// \param[in] default_boxes Default box.
162 /// \param[in] config Objects of BoxesConfig structure.
163 void ConvertBoxes(std::vector<std::vector<float>> &boxes, const std::vector<std::vector<float>> &default_boxes,
164                   const BoxesConfig config);
165 
166 /// \brief Apply Non-Maximum Suppression.
167 /// \param[in] all_boxes All input boxes.
168 /// \param[in] all_scores Score after all boxes are executed through the network.
169 /// \param[in] thres Pre-value of IOU.
170 /// \param[in] max_boxes Maximum value of output box.
171 std::vector<int> ApplyNms(const std::vector<std::vector<float>> &all_boxes, std::vector<float> &all_scores, float thres,
172                           int max_boxes);
173 
174 /// \brief affine image by linear.
175 /// \param[in] src Input image data.
176 /// \param[in] dst Output image data.
177 /// \param[in] M Transformation matrix
178 /// \param[in] dst_w The width of the output image.
179 /// \param[in] dst_h The height of the output image.
180 /// \param[in] borderType Edge processing type.
181 /// \param[in] borderValue Boundary fill value.
182 bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h,
183                         PaddBorderType borderType, std::vector<uint8_t> &borderValue);
184 
185 /// \brief affine image by linear.
186 /// \param[in] src Input image data.
187 /// \param[in] dst Output image data.
188 /// \param[in] M Transformation matrix
189 /// \param[in] dst_w The width of the output image.
190 /// \param[in] dst_h The height of the output image.
191 /// \param[in] borderType Edge processing type.
192 /// \param[in] borderValue Boundary fill value.
193 bool WarpPerspectiveBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h,
194                              PaddBorderType borderType, std::vector<uint8_t> &borderValue);
195 
196 /// \brief Matrix rotation.
197 /// \param[in] x The value of the x-axis of the coordinate rotation point.
198 /// \param[in] y The value of the y-axis of the coordinate rotation point.
199 /// \param[in] angle Rotation angle.
200 /// \param[in] scale Scaling ratio.
201 /// \param[in] M Output transformation matrix.
202 bool GetRotationMatrix2D(float x, float y, double angle, double scale, LiteMat &M);
203 
204 /// \brief Perspective transformation.
205 /// \param[in] src_point Input coordinate point.
206 /// \param[in] dst_point Output coordinate point.
207 /// \param[in] M Output matrix.
208 bool GetPerspectiveTransform(std::vector<Point> src_point, std::vector<Point> dst_point, LiteMat &M);
209 
210 /// \brief Affine transformation.
211 /// \param[in] src_point Input coordinate point.
212 /// \param[in] dst_point Output coordinate point.
213 /// \param[in] M Output matrix.
214 bool GetAffineTransform(std::vector<Point> src_point, std::vector<Point> dst_point, LiteMat &M);
215 
216 /// \brief Matrix transpose.
217 /// \param[in] src Input matrix.
218 /// \param[in] dst Output matrix.
219 bool Transpose(const LiteMat &src, LiteMat &dst);
220 
221 /// \brief Filter the image by a Gaussian kernel
222 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now.
223 /// \param[in] dst LiteMat image after processing.
224 /// \param[in] ksize The size of Gaussian kernel. It should be a vector of size 2 as {kernel_x, kernel_y}, both value of
225 ///     which should be positive and odd.
226 /// \param[in] sigmaX The Gaussian kernel standard deviation of width. It should be a positive value.
227 /// \param[in] sigmaY The Gaussian kernel standard deviation of height (default=0.f). It should be a positive value,
228 ///     or will use the value of sigmaX.
229 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT).
230 bool GaussianBlur(const LiteMat &src, LiteMat &dst, const std::vector<int> &ksize, double sigmaX, double sigmaY = 0.f,
231                   PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
232 
233 /// \brief Detect edges in an image
234 /// \param[in] src LiteMat image to be processed. Only single channel LiteMat of type UINT8 is supported now.
235 /// \param[in] dst LiteMat image after processing.
236 /// \param[in] low_thresh The lower bound of the edge. Pixel with value below it will not be considered as a boundary.
237 ///     It should be a nonnegative value.
238 //// \param[in] high_thresh The higher bound of the edge. Pixel with value over it will
239 /// be absolutely considered as a boundary. It should be a nonnegative value and no less than low_thresh.
240 /// \param[in] ksize The size of Sobel kernel (default=3). It can only be 3, 5 or 7.
241 /// \param[in] L2gradient Whether to use L2 distance while calculating gradient (default=false).
242 bool Canny(const LiteMat &src, LiteMat &dst, double low_thresh, double high_thresh, int ksize = 3,
243            bool L2gradient = false);
244 
245 /// \brief Apply a 2D convolution over the image.
246 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now.
247 /// \param[in] kernel LiteMat 2D convolution kernel. Only LiteMat of type FLOAT32 is supported now.
248 /// \param[in] dst LiteMat image after processing.
249 /// \param[in] dst_type Output data type of dst.
250 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT).
251 bool Conv2D(const LiteMat &src, const LiteMat &kernel, LiteMat &dst, LDataType dst_type,
252             PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
253 
254 /// \brief Applies a separable linear convolution over the image
255 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now.
256 /// \param[in] kx LiteMat 1D convolution kernel. Only LiteMat of type FLOAT32 is supported now.
257 /// \param[in] ky LiteMat 1D convolution kernel. Only LiteMat of type FLOAT32 is supported now.
258 /// \param[in] dst LiteMat image after processing.
259 /// \param[in] dst_type Output data type of dst.
260 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT).
261 bool ConvRowCol(const LiteMat &src, const LiteMat &kx, const LiteMat &ky, LiteMat &dst, LDataType dst_type,
262                 PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
263 
264 /// \brief Filter the image by a Sobel kernel
265 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now.
266 /// \param[in] dst LiteMat image after processing.
267 /// \param[in] flag_x Order of the derivative x. It should be a nonnegative value and can not be equal to 0 at the same
268 ///     time with flag_y.
269 /// \param[in] flag_y Order of the derivative y. It should be a nonnegative value and can not be equal
270 ///     to 0 at the same time with flag_x.
271 /// \param[in] ksize The size of Sobel kernel (default=3). It can only be 1, 3, 5 or 7.
272 /// \param[in] scale The scale factor for the computed derivative values (default=1.0).
273 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT).
274 bool Sobel(const LiteMat &src, LiteMat &dst, int flag_x, int flag_y, int ksize = 3, double scale = 1.0,
275            PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
276 
277 /// \brief Convert RGB image or color image to BGR image.
278 /// \param[in] src Input image data.
279 /// \param[in] data_type The type of data_type.
280 /// \param[in] w The width of output image.
281 /// \param[in] h The height of output image.
282 /// \param[in] mat Output image data.
283 bool ConvertRgbToBgr(const LiteMat &src, const LDataType &data_type, int w, int h, LiteMat &mat);
284 
285 /// \brief Convert RGB image or color image to grayscale image.
286 /// \param[in] src Input image data.
287 /// \param[in] data_type The type of data_type.
288 /// \param[in] w The width of output image.
289 /// \param[in] h The height of output image.
290 /// \param[in] mat Output image data.
291 bool ConvertRgbToGray(const LiteMat &src, LDataType data_type, int w, int h, LiteMat &mat);
292 
293 /// \brief Resize preserve AR with filler.
294 /// \param[in] src Input image data.
295 /// \param[in] dst Output image data.
296 /// \param[in] h The height of output image.
297 /// \param[in] w The width of output image.
298 /// \param[in] ratioShiftWShiftH Array that records the ratio, width shift, and height shift.
299 /// \param[in] invM Fixed direction array.
300 /// \param[in] img_orientation Way of export direction.
301 bool ResizePreserveARWithFiller(LiteMat &src, LiteMat &dst, int h, int w, float (*ratioShiftWShiftH)[3],
302                                 float (*invM)[2][3], int img_orientation);
303 
304 }  // namespace dataset
305 }  // namespace mindspore
306 #endif  // IMAGE_PROCESS_H_
307