• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef IMAGE_PROCESS_H_
18 #define IMAGE_PROCESS_H_
19 
20 #include <algorithm>
21 #include <cmath>
22 #include <iostream>
23 #include <limits>
24 #include <vector>
25 
26 #include "lite_cv/lite_mat.h"
27 
28 namespace mindspore {
29 namespace dataset {
30 #define CV_PI 3.1415926535897932384626433832795
31 #define IM_TOOL_EXIF_ORIENTATION_0_DEG 1
32 #define IM_TOOL_EXIF_ORIENTATION_0_DEG_MIRROR 2
33 #define IM_TOOL_EXIF_ORIENTATION_180_DEG 3
34 #define IM_TOOL_EXIF_ORIENTATION_180_DEG_MIRROR 4
35 #define IM_TOOL_EXIF_ORIENTATION_90_DEG_MIRROR 5
36 #define IM_TOOL_EXIF_ORIENTATION_90_DEG 6
37 #define IM_TOOL_EXIF_ORIENTATION_270_DEG_MIRROR 7
38 #define IM_TOOL_EXIF_ORIENTATION_270_DEG 8
39 #define NUM_OF_RGB_CHANNELS 9
40 #define IM_TOOL_DATA_TYPE_FLOAT (1)
41 #define IM_TOOL_DATA_TYPE_UINT8 (2)
42 #define IM_TOOL_RETURN_STATUS_SUCCESS (0)
43 #define IM_TOOL_RETURN_STATUS_INVALID_INPUT (1)
44 #define IM_TOOL_RETURN_STATUS_FAILED (2)
45 
46 #define INT16_CAST(X) \
47   static_cast<int16_t>(::std::min(::std::max(static_cast<int>(X + (X >= 0.f ? 0.5f : -0.5f)), -32768), 32767));
48 
49 enum PaddBorderType {
50   PADD_BORDER_CONSTANT = 0,                     /**< Fills the border with constant values. */
51   PADD_BORDER_REPLICATE = 1,                    /**< Fills the border with replicate mode. */
52   PADD_BORDER_REFLECT_101 = 4,                  /**< Fills the border with reflect 101 mode. */
53   PADD_BORDER_DEFAULT = PADD_BORDER_REFLECT_101 /**< Default pad mode, use reflect 101 mode. */
54 };
55 
56 struct BoxesConfig {
57  public:
58   std::vector<size_t> img_shape;
59   std::vector<int> num_default;
60   std::vector<int> feature_size;
61   float min_scale;
62   float max_scale;
63   std::vector<std::vector<float>> aspect_rations;
64   std::vector<int> steps;
65   std::vector<float> prior_scaling;
66 };
67 
68 /// \brief resizing image by bilinear algorithm, the data type of currently only supports is uint8,
69 ///          the channel of currently supports is 3 and 1.
70 /// \param[in] src Input image data.
71 /// \param[in] dst Output image data.
72 /// \param[in] dst_w The width of the output image.
73 /// \param[in] dst_h The length of the output image.
74 /// \par Example
75 /// \code
76 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
77 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
78 ///     LiteMat lite_mat_dst;
79 ///
80 ///     /* Resize to (256, 256, 3) */
81 ///     ResizeBilinear(lite_mat_src, lite_mat_dst, 256, 256);
82 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
83 /// \endcode
84 /// \return Return true if transform successfully.
85 bool DATASET_API ResizeBilinear(const LiteMat &src, LiteMat &dst, int dst_w, int dst_h);
86 
87 /// \brief Init Lite Mat from pixel, the conversion of currently supports is rbgaTorgb and rgbaTobgr.
88 /// \note The length of the pointer must be the same as that of the multiplication of w and h.
89 /// \param[in] data Input image data.
90 /// \param[in] pixel_type The type of pixel (refer to enum LPixelType).
91 ///   - LPixelType.BGR, pixel in BGR type.
92 ///   - LPixelType.RGB, pixel in RGB type.
93 ///   - LPixelType.RGBA, pixel in RGBA type.
94 ///   - LPixelType.RGBA2GRAY, convert image from RGBA to GRAY.
95 ///   - LPixelType.RGBA2BGR, convert image from RGBA to BGR.
96 ///   - LPixelType.RGBA2RGB, convert image from RGBA to RGB.
97 ///   - LPixelType.NV212BGR, convert image from NV21 to BGR.
98 ///   - LPixelType.NV122BGR, convert image from NV12 to BGR.
99 /// \param[in] data_type The type of data (refer to LDataType class).
100 /// \param[in] w The width of the output image.
101 /// \param[in] h The length of the output image.
102 /// \param[in] m Used to store image data.
103 /// \par Example
104 /// \code
105 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
106 ///     LiteMat lite_mat_dst;
107 ///     InitFromPixel(p_rgb, LPixelType::RGB, LDataType::UINT8, width, height, lite_mat_dst);
108 ///
109 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
110 /// \endcode
111 /// \return Return true if transform successfully.
112 bool DATASET_API InitFromPixel(const unsigned char *data, LPixelType pixel_type, LDataType data_type, int w, int h,
113                                LiteMat &m);
114 
115 /// \brief convert the data type, the conversion of currently supports is uint8 to float.
116 /// \param[in] src Input image data.
117 /// \param[in] dst Output image data.
118 /// \param[in] scale Scale pixel value(default:1.0).
119 /// \par Example
120 /// \code
121 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
122 ///     LiteMat lite_mat_src;
123 ///     InitFromPixel(p_rgb, LPixelType::RGB, LDataType::UINT8, width, height, lite_mat_dst);
124 ///
125 ///     LiteMat lite_mat_dst;
126 ///     ConvertTo(lite_mat_src, lite_mat_dst);
127 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
128 /// \endcode
129 /// \return Return true if transform successfully.
130 bool DATASET_API ConvertTo(const LiteMat &src, LiteMat &dst, double scale = 1.0);
131 
132 /// \brief crop image, the channel supports is 3 and 1.
133 /// \param[in] src Input image data.
134 /// \param[in] dst Output image data.
135 /// \param[in] x The x coordinate value of the starting point of the screenshot.
136 /// \param[in] y The y coordinate value of the starting point of the screenshot.
137 /// \param[in] w The width of the screenshot.
138 /// \param[in] h The height of the screenshot.
139 /// \par Example
140 /// \code
141 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
142 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
143 ///     LiteMat lite_mat_dst;
144 ///
145 ///     /* Crop to (32, 32, 3) */
146 ///     Crop(lite_mat_src, lite_mat_dst, 0, 0, 32, 32);
147 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
148 /// \endcode
149 /// \return Return true if transform successfully.
150 bool DATASET_API Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h);
151 
152 /// \brief normalize image, currently the supports data type is float.
153 /// \param[in] src Input image data.
154 /// \param[in] dst Output image data.
155 /// \param[in] mean Mean of the data set.
156 /// \param[in] std Norm of the data set.
157 /// \par Example
158 /// \code
159 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
160 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
161 ///     LiteMat lite_mat_src2;
162 ///     ConvertTo(lite_mat_src, lite_mat_src2);
163 ///     LiteMat lite_mat_dst;
164 ///
165 ///     /* Normalize */
166 ///     std::vector<float> means = {0.485, 0.456, 0.406};
167 ///     std::vector<float> stds = {0.229, 0.224, 0.225};
168 ///     SubStractMeanNormalize(lite_mat_src2, lite_mat_dst, means, stds);
169 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
170 /// \endcode
171 /// \return Return true if transform successfully.
172 bool DATASET_API SubStractMeanNormalize(const LiteMat &src, LiteMat &dst, const std::vector<float> &mean,
173                                         const std::vector<float> &std);
174 
175 /// \brief padd image, the channel supports is 3 and 1.
176 /// \param[in] src Input image data.
177 /// \param[in] dst Output image data.
178 /// \param[in] top The length of top.
179 /// \param[in] bottom The length of bottom.
180 /// \param[in] left The length of left.
181 /// \param[in] right he length of right.
182 /// \param[in] pad_type The type of pad.
183 ///   - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values.
184 ///   - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode.
185 ///   - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode.
186 ///   - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode.
187 /// \param[in] fill_b_or_gray B or GRAY.
188 /// \param[in] fill_g G.
189 /// \param[in] fill_r R.
190 /// \par Example
191 /// \code
192 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
193 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::FLOAT32);
194 ///     LiteMat lite_mat_dst;
195 ///
196 ///     /* Pad image with 4 pixels */
197 ///     Pad(lite_mat_src, lite_mat_dst, 4, 4, 4, 4, PaddBorderType::PADD_BORDER_CONSTANT);
198 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
199 /// \endcode
200 /// \return Return true if transform successfully.
201 bool DATASET_API Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int right,
202                      PaddBorderType pad_type, uint8_t fill_b_or_gray = 0, uint8_t fill_g = 0, uint8_t fill_r = 0);
203 
204 /// \brief Extract image channel by index.
205 /// \param[in] src Input image data.
206 /// \param[in] dst Output image data.
207 /// \param[in] col The serial number of the channel.
208 /// \par Example
209 /// \code
210 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
211 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
212 ///     LiteMat lite_mat_dst;
213 ///
214 ///     /* Extract the first channel of image */
215 ///     ExtractChannel(lite_mat_src, lite_mat_dst, 0);
216 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
217 /// \endcode
218 /// \return Return true if transform successfully.
219 bool DATASET_API ExtractChannel(LiteMat &src, LiteMat &dst, int col);
220 
221 /// \brief Split image channels.
222 /// \param[in] src Input image data.
223 /// \param[in] mv Vector of LiteMat containing all channels.
224 /// \par Example
225 /// \code
226 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
227 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
228 ///     std::vector<LiteMat> lite_mat_dst;
229 ///
230 ///     /* Extract all channels of image */
231 ///     Split(lite_mat_src, lite_mat_dst);
232 /// \endcode
233 /// \return Return true if transform successfully.
234 bool DATASET_API Split(const LiteMat &src, std::vector<LiteMat> &mv);
235 
236 /// \brief Create a multi-channel image out of several single-channel arrays.
237 /// \param[in] mv Single channel data.
238 /// \param[in] dst Output image data.
239 /// \par Example
240 /// \code
241 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
242 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
243 ///     std::vector<LiteMat> lite_mat_dst;
244 ///
245 ///     /* Extract all channels of image */
246 ///     Split(lite_mat_src, lite_mat_dst);
247 ///
248 ///     /* Merge all channels to an image */
249 ///     LiteMat lite_mat_dst2;
250 ///     Merge(lite_mat_dst, lite_mat_dst2);
251 /// \endcode
252 /// \return Return true if transform successfully.
253 bool DATASET_API Merge(const std::vector<LiteMat> &mv, LiteMat &dst);
254 
255 /// \brief Apply affine transformation for 1 channel image.
256 /// \param[in] src Input image data.
257 /// \param[in] out_img Output image data.
258 /// \param[in] M[6] Affine transformation matrix.
259 /// \param[in] dsize The size of the output image.
260 /// \param[in] borderValue The pixel value is used for filing after the image is captured.
261 /// \par Example
262 /// \code
263 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height) */
264 ///     LiteMat lite_mat_src(width, height, (void *)p_rgb, LDataType::UINT8);
265 ///     LiteMat lite_mat_src2;
266 ///     ConvertRgbToGray(lite_mat_src, LDataType::UINT8, width, height, lite_mat_src2);
267 ///
268 ///     /* Define Affine matrix and apply */
269 ///     LiteMat lite_mat_dst;
270 ///     double M[6] = {1, 0, 0,
271 ///                    0, 1, 0};
272 ///     Affine(lite_mat_src2, lite_mat_dst, M, {width, height}, UINT8_C1(0));
273 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
274 /// \endcode
275 /// \return Return true if transform successfully.
276 bool DATASET_API Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector<size_t> dsize,
277                         UINT8_C1 borderValue);
278 
279 /// \brief Apply affine transformation for 3 channel image.
280 /// \param[in] src Input image data.
281 /// \param[in] out_img Output image data.
282 /// \param[in] M[6] Affine transformation matrix.
283 /// \param[in] dsize The size of the output image.
284 /// \param[in] borderValue The pixel value is used for filing after the image is captured.
285 /// \par Example
286 /// \code
287 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
288 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
289 ///     LiteMat lite_mat_dst;
290 ///
291 ///     /* Define Affine matrix and apply */
292 ///     double M[6] = {1, 0, 20,
293 ///                    0, 1, 20};
294 ///     Affine(lite_mat_src, lite_mat_dst, M, {image.cols, image.rows}, UINT8_C3(0, 0, 0));
295 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
296 /// \endcode
297 /// \return Return true if transform successfully.
298 bool DATASET_API Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector<size_t> dsize,
299                         UINT8_C3 borderValue);
300 
301 /// \brief Get default anchor boxes for Faster R-CNN, SSD, YOLO etc.
302 /// \param[in] config Objects of BoxesConfig structure.
303 std::vector<std::vector<float>> DATASET_API GetDefaultBoxes(const BoxesConfig &config);
304 
305 /// \brief Convert the prediction boxes to the actual boxes of (y, x, h, w).
306 /// \param[in] boxes Actual size box.
307 /// \param[in] default_boxes Default box.
308 /// \param[in] config Objects of BoxesConfig structure.
309 void DATASET_API ConvertBoxes(std::vector<std::vector<float>> &boxes,
310                               const std::vector<std::vector<float>> &default_boxes, const BoxesConfig &config);
311 
312 /// \brief Apply Non-Maximum Suppression.
313 /// \param[in] all_boxes All input boxes.
314 /// \param[in] all_scores Score after all boxes are executed through the network.
315 /// \param[in] thres Pre-value of IOU.
316 /// \param[in] max_boxes Maximum value of output box.
317 /// \par Example
318 /// \code
319 ///     /* Apply NMS on bboxes */
320 ///     std::vector<std::vector<float>> all_boxes = {{1, 1, 2, 2}, {3, 3, 4, 4}, {5, 5, 6, 6}, {5, 5, 6, 6}};
321 ///     std::vector<float> all_scores = {0.6, 0.5, 0.4, 0.9};
322 ///     std::vector<int> keep = ApplyNms(all_boxes, all_scores, 0.5, 10);
323 /// \endcode
324 /// \return Remaining bounding boxes.
325 std::vector<int> DATASET_API ApplyNms(const std::vector<std::vector<float>> &all_boxes, std::vector<float> &all_scores,
326                                       float thres, int max_boxes);
327 
328 /// \brief affine image by linear.
329 /// \param[in] src Input image data.
330 /// \param[in] dst Output image data.
331 /// \param[in] M Transformation matrix
332 /// \param[in] dst_w The width of the output image.
333 /// \param[in] dst_h The height of the output image.
334 /// \param[in] borderType Edge processing type.
335 ///   - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values.
336 ///   - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode.
337 ///   - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode.
338 ///   - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode.
339 /// \param[in] borderValue Boundary fill value.
340 /// \par Example
341 /// \code
342 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
343 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
344 ///     LiteMat lite_mat_dst;
345 ///
346 ///     /* Define Affine matrix and apply */
347 ///     double M[6] = {1, 0, 20,
348 ///                    0, 1, 20};
349 ///     LiteMat Matrix(3, 2, M, LDataType::DOUBLE);
350 ///     std::vector<uint8_t> border_value = {0, 0, 0};
351 ///     WarpAffineBilinear(lite_mat_src, lite_mat_dst, Matrix, width, height,
352 ///                        PaddBorderType::PADD_BORDER_CONSTANT, border_value);
353 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
354 /// \endcode
355 /// \return Return true if transform successfully.
356 bool DATASET_API WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h,
357                                     PaddBorderType borderType, std::vector<uint8_t> &borderValue);
358 
359 /// \brief affine image by linear.
360 /// \param[in] src Input image data.
361 /// \param[in] dst Output image data.
362 /// \param[in] M Transformation matrix
363 /// \param[in] dst_w The width of the output image.
364 /// \param[in] dst_h The height of the output image.
365 /// \param[in] borderType Edge processing type.
366 ///   - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values.
367 ///   - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode.
368 ///   - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode.
369 ///   - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode.
370 /// \param[in] borderValue Boundary fill value.
371 /// \par Example
372 /// \code
373 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
374 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
375 ///     LiteMat lite_mat_dst;
376 ///
377 ///     /* Get Perspective matrix and apply */
378 ///     std::vector<Point> src = {Point(165, 270), Point(835, 270), Point(360, 125), Point(615, 125)};
379 ///     std::vector<Point> dst = {Point(165, 270), Point(835, 270), Point(100, 100), Point(500, 30)};
380 ///     LiteMat M;
381 ///     GetPerspectiveTransform(src, dst, M);
382 ///     std::vector<uint8_t> border_value = {0, 0, 0};
383 ///     WarpPerspectiveBilinear(lite_mat_src, lite_mat_dst, M, width, height,
384 ///                             PaddBorderType::PADD_BORDER_CONSTANT, border_value);
385 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
386 /// \endcode
387 /// \return Return true if transform successfully.
388 bool DATASET_API WarpPerspectiveBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h,
389                                          PaddBorderType borderType, std::vector<uint8_t> &borderValue);
390 
391 /// \brief Matrix rotation.
392 /// \param[in] x The value of the x-axis of the coordinate rotation point.
393 /// \param[in] y The value of the y-axis of the coordinate rotation point.
394 /// \param[in] angle Rotation angle.
395 /// \param[in] scale Scaling ratio.
396 /// \param[in] M Output transformation matrix.
397 /// \par Example
398 /// \code
399 ///     /* Get Rotation matrix */
400 ///     double angle = 60.0;
401 ///     double scale = 0.5;
402 ///     LiteMat M;
403 ///     GetRotationMatrix2D(1.0f, 2.0f, angle, scale, M);
404 ///     std::cout << M.width_ << " " << M.height_ << " " << M.channel_ << std::endl;
405 /// \endcode
406 /// \return Return true if transform successfully.
407 bool DATASET_API GetRotationMatrix2D(float x, float y, double angle, double scale, LiteMat &M);
408 
409 /// \brief Perspective transformation.
410 /// \param[in] src_point Input coordinate point.
411 /// \param[in] dst_point Output coordinate point.
412 /// \param[in] M Output matrix.
413 /// \par Example
414 /// \code
415 ///     /* Get Perspective matrix */
416 ///     std::vector<Point> src = {Point(165, 270), Point(835, 270), Point(360, 125), Point(615, 125)};
417 ///     std::vector<Point> dst = {Point(165, 270), Point(835, 270), Point(100, 100), Point(500, 30)};
418 ///     LiteMat M;
419 ///     GetPerspectiveTransform(src, dst, M);
420 ///     std::cout << M.width_ << " " << M.height_ << " " << M.channel_ << std::endl;
421 /// \endcode
422 /// \return Return true if transform successfully.
423 bool DATASET_API GetPerspectiveTransform(std::vector<Point> src_point, std::vector<Point> dst_point, LiteMat &M);
424 
425 /// \brief Affine transformation.
426 /// \param[in] src_point Input coordinate point.
427 /// \param[in] dst_point Output coordinate point.
428 /// \param[in] M Output matrix.
429 /// \par Example
430 /// \code
431 ///     /* Get Affine matrix */
432 ///     std::vector<Point> src = {Point(50, 50), Point(200, 50), Point(50, 200)};
433 ///     std::vector<Point> dst = {Point(40, 40), Point(100, 40), Point(50, 90)};
434 ///     LiteMat M;
435 ///     GetAffineTransform(src, dst, M);
436 ///     std::cout << M.width_ << " " << M.height_ << " " << M.channel_ << std::endl;
437 /// \endcode
438 /// \return Return true if transform successfully.
439 bool DATASET_API GetAffineTransform(std::vector<Point> src_point, std::vector<Point> dst_point, LiteMat &M);
440 
441 /// \brief Matrix transpose.
442 /// \param[in] src Input matrix.
443 /// \param[in] dst Output matrix.
444 /// \par Example
445 /// \code
446 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
447 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
448 ///     LiteMat lite_mat_src2;
449 ///     ConvertTo(lite_mat_src, lite_mat_src2);
450 ///     LiteMat lite_mat_dst;
451 ///
452 ///     /* Transpose image */
453 ///     Transpose(lite_mat_src2, lite_mat_dst);
454 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
455 /// \endcode
456 /// \return Return true if transform successfully.
457 bool DATASET_API Transpose(const LiteMat &src, LiteMat &dst);
458 
459 /// \brief Filter the image by a Gaussian kernel
460 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now.
461 /// \param[in] dst LiteMat image after processing.
462 /// \param[in] ksize The size of Gaussian kernel. It should be a vector of size 2 as {kernel_x, kernel_y}, both value of
463 ///     which should be positive and odd.
464 /// \param[in] sigmaX The Gaussian kernel standard deviation of width. It should be a positive value.
465 /// \param[in] sigmaY The Gaussian kernel standard deviation of height (default=0.f). It should be a positive value,
466 ///     or will use the value of sigmaX.
467 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT).
468 ///   - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values.
469 ///   - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode.
470 ///   - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode.
471 ///   - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode.
472 /// \par Example
473 /// \code
474 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
475 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
476 ///     LiteMat lite_mat_dst;
477 ///
478 ///     /* Blur image */
479 ///     GaussianBlur(lite_mat_src, lite_mat_dst, {3, 5}, 3, 3);
480 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
481 /// \endcode
482 /// \return Return true if transform successfully.
483 bool DATASET_API GaussianBlur(const LiteMat &src, LiteMat &dst, const std::vector<int> &ksize, double sigmaX,
484                               double sigmaY = 0.f, PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
485 
486 /// \brief Detect edges in an image
487 /// \param[in] src LiteMat image to be processed. Only single channel LiteMat of type UINT8 is supported now.
488 /// \param[in] dst LiteMat image after processing.
489 /// \param[in] low_thresh The lower bound of the edge. Pixel with value below it will not be considered as a boundary.
490 ///     It should be a nonnegative value.
491 //// \param[in] high_thresh The higher bound of the edge. Pixel with value over it will
492 /// be absolutely considered as a boundary. It should be a nonnegative value and no less than low_thresh.
493 /// \param[in] ksize The size of Sobel kernel (default=3). It can only be 3, 5 or 7.
494 /// \param[in] L2gradient Whether to use L2 distance while calculating gradient (default=false).
495 /// \par Example
496 /// \code
497 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
498 ///     LiteMat lite_mat_src;
499 ///     InitFromPixel(p_rgb, LPixelType::RGB, LDataType::UINT8, width, height, lite_mat_src);
500 ///     LiteMat lite_mat_src2;
501 ///     ConvertRgbToGray(lite_mat_src, LDataType::UINT8, image.cols, image.rows, lite_mat_src2);
502 ///
503 ///     LiteMat lite_mat_dst;
504 ///     Canny(lite_mat_src2, lite_mat_dst, 200, 300, 5);
505 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
506 /// \endcode
507 /// \return Return true if transform successfully.
508 bool DATASET_API Canny(const LiteMat &src, LiteMat &dst, double low_thresh, double high_thresh, int ksize = 3,
509                        bool L2gradient = false);
510 
511 /// \brief Apply a 2D convolution over the image.
512 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now.
513 /// \param[in] kernel LiteMat 2D convolution kernel. Only LiteMat of type FLOAT32 is supported now.
514 /// \param[in] dst LiteMat image after processing.
515 /// \param[in] dst_type Output data type of dst.
516 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT).
517 /// \par Example
518 /// \code
519 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
520 ///     LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8);
521 ///     LiteMat lite_mat_dst;
522 ///
523 ///     LiteMat kernel;
524 ///     kernel.Init(3, 3, 1, LDataType::FLOAT32);
525 ///     float *kernel_ptr = kernel;
526 ///     for (int i = 0; i < 9; i++) {
527 ///         kernel_ptr[i] = i % 2;
528 ///     }
529 ///     Conv2D(lite_mat_src, kernel, lite_mat_dst, LDataType::UINT8);
530 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
531 /// \endcode
532 /// \return Return true if transform successfully.
533 bool DATASET_API Conv2D(const LiteMat &src, const LiteMat &kernel, LiteMat &dst, LDataType dst_type,
534                         PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
535 
536 /// \brief Applies a separable linear convolution over the image
537 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now.
538 /// \param[in] kx LiteMat 1D convolution kernel. Only LiteMat of type FLOAT32 is supported now.
539 /// \param[in] ky LiteMat 1D convolution kernel. Only LiteMat of type FLOAT32 is supported now.
540 /// \param[in] dst LiteMat image after processing.
541 /// \param[in] dst_type Output data type of dst.
542 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT).
543 bool DATASET_API ConvRowCol(const LiteMat &src, const LiteMat &kx, const LiteMat &ky, LiteMat &dst, LDataType dst_type,
544                             PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
545 
546 /// \brief Filter the image by a Sobel kernel
547 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now.
548 /// \param[in] dst LiteMat image after processing.
549 /// \param[in] flag_x Order of the derivative x. It should be a nonnegative value and can not be equal to 0 at the same
550 ///     time with flag_y.
551 /// \param[in] flag_y Order of the derivative y. It should be a nonnegative value and can not be equal
552 ///     to 0 at the same time with flag_x.
553 /// \param[in] ksize The size of Sobel kernel (default=3). It can only be 1, 3, 5 or 7.
554 /// \param[in] scale The scale factor for the computed derivative values (default=1.0).
555 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT).
556 ///   - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values.
557 ///   - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode.
558 ///   - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode.
559 ///   - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode.
560 /// \par Example
561 /// \code
562 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
563 ///     LiteMat lite_mat_src;
564 ///     InitFromPixel(p_rgb, LPixelType::RGB, LDataType::UINT8, width, height, lite_mat_src);
565 ///     LiteMat lite_mat_src2;
566 ///     ConvertRgbToGray(lite_mat_src, LDataType::UINT8, image.cols, image.rows, lite_mat_src2);
567 ///
568 ///     LiteMat lite_mat_dst;
569 ///     Sobel(lite_mat_src2, lite_mat_dst, 1, 0, 3, 1, PaddBorderType::PADD_BORDER_REPLICATE);
570 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
571 /// \endcode
572 /// \return Return true if transform successfully.
573 bool DATASET_API Sobel(const LiteMat &src, LiteMat &dst, int flag_x, int flag_y, int ksize = 3, double scale = 1.0,
574                        PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
575 
576 /// \brief Convert RGB image or color image to BGR image.
577 /// \param[in] src Input image data.
578 /// \param[in] data_type The type of data (refer to LDataType class).
579 /// \param[in] w The width of output image.
580 /// \param[in] h The height of output image.
581 /// \param[in] mat Output image data.
582 /// \par Example
583 /// \code
584 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
585 ///     LiteMat lite_mat_src;
586 ///     lite_mat_src.Init(width, height, channel, p_rgb, LDataType::UINT8);
587 ///     LiteMat lite_mat_dst;
588 ///
589 ///     ConvertRgbToBgr(lite_mat_src, LDataType::UINT8, width, height, lite_mat_dst);
590 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
591 /// \endcode
592 /// \return Return true if transform successfully.
593 bool DATASET_API ConvertRgbToBgr(const LiteMat &src, const LDataType &data_type, int w, int h, LiteMat &mat);
594 
595 /// \brief Convert RGB image or color image to grayscale image.
596 /// \param[in] src Input image data.
597 /// \param[in] data_type The type of data (refer to LDataType class).
598 /// \param[in] w The width of output image.
599 /// \param[in] h The height of output image.
600 /// \param[in] mat Output image data.
601 /// \par Example
602 /// \code
603 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
604 ///     LiteMat lite_mat_src;
605 ///     lite_mat_src.Init(width, height, channel, p_rgb, LDataType::UINT8);
606 ///     LiteMat lite_mat_dst;
607 ///
608 ///     ConvertRgbToGray(lite_mat_src, LDataType::UINT8, width, height, lite_mat_dst);
609 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
610 /// \endcode
611 /// \return Return true if transform successfully.
612 bool DATASET_API ConvertRgbToGray(const LiteMat &src, LDataType data_type, int w, int h, LiteMat &mat);
613 
614 /// \brief Resize preserve AR with filler.
615 /// \param[in] src Input image data.
616 /// \param[in] dst Output image data.
617 /// \param[in] h The height of output image.
618 /// \param[in] w The width of output image.
619 /// \param[in] ratioShiftWShiftH Array that records the ratio, width shift, and height shift.
620 /// \param[in] invM Fixed direction array.
621 /// \param[in] img_orientation Way of export direction.
622 /// \par Example
623 /// \code
624 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
625 ///     LiteMat lite_mat_src;
626 ///     lite_mat_src.Init(width, height, channel, p_rgb, LDataType::UINT8);
627 ///     LiteMat lite_mat_dst;
628 ///
629 ///     float ratioShiftWShiftH[3] = {0};
630 ///     float invM[2][3] = {{0, 0, 0}, {0, 0, 0}};
631 ///     int h = 1000;
632 ///     int w = 1000;
633 ///     ResizePreserveARWithFiller(lite_mat_src, lite_mat_dst, h, w, &ratioShiftWShiftH, &invM, 0);
634 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
635 /// \endcode
636 /// \return Return true if transform successfully.
637 bool DATASET_API ResizePreserveARWithFiller(LiteMat &src, LiteMat &dst, int h, int w, float (*ratioShiftWShiftH)[3],
638                                             float (*invM)[2][3], int img_orientation);
639 
640 /// \brief Transpose the input image; shape (H, W, C) to shape (C, H, W).
641 /// \param[in] src Input image data.
642 /// \param[in] dst Output image data.
643 /// \par Example
644 /// \code
645 ///     /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */
646 ///     LiteMat lite_mat_src;
647 ///     lite_mat_src.Init(width, height, channel, p_rgb, LDataType::UINT8);
648 ///     LiteMat lite_mat_dst;
649 ///
650 ///     HWC2CHW(lite_mat_src, lite_mat_dst);
651 ///     std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl;
652 /// \endcode
653 /// \return Return true if transform successfully.
654 bool DATASET_API HWC2CHW(LiteMat &src, LiteMat &dst);
655 }  // namespace dataset
656 }  // namespace mindspore
657 #endif  // IMAGE_PROCESS_H_
658