1 /** 2 * Copyright 2020-2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef IMAGE_PROCESS_H_ 18 #define IMAGE_PROCESS_H_ 19 20 #include <algorithm> 21 #include <cmath> 22 #include <iostream> 23 #include <limits> 24 #include <vector> 25 26 #include "lite_cv/lite_mat.h" 27 28 namespace mindspore { 29 namespace dataset { 30 #define CV_PI 3.1415926535897932384626433832795 31 #define IM_TOOL_EXIF_ORIENTATION_0_DEG 1 32 #define IM_TOOL_EXIF_ORIENTATION_0_DEG_MIRROR 2 33 #define IM_TOOL_EXIF_ORIENTATION_180_DEG 3 34 #define IM_TOOL_EXIF_ORIENTATION_180_DEG_MIRROR 4 35 #define IM_TOOL_EXIF_ORIENTATION_90_DEG_MIRROR 5 36 #define IM_TOOL_EXIF_ORIENTATION_90_DEG 6 37 #define IM_TOOL_EXIF_ORIENTATION_270_DEG_MIRROR 7 38 #define IM_TOOL_EXIF_ORIENTATION_270_DEG 8 39 #define NUM_OF_RGB_CHANNELS 9 40 #define IM_TOOL_DATA_TYPE_FLOAT (1) 41 #define IM_TOOL_DATA_TYPE_UINT8 (2) 42 #define IM_TOOL_RETURN_STATUS_SUCCESS (0) 43 #define IM_TOOL_RETURN_STATUS_INVALID_INPUT (1) 44 #define IM_TOOL_RETURN_STATUS_FAILED (2) 45 46 #define INT16_CAST(X) \ 47 static_cast<int16_t>(::std::min(::std::max(static_cast<int>(X + (X >= 0.f ? 0.5f : -0.5f)), -32768), 32767)); 48 49 enum PaddBorderType { 50 PADD_BORDER_CONSTANT = 0, /**< Fills the border with constant values. */ 51 PADD_BORDER_REPLICATE = 1, /**< Fills the border with replicate mode. */ 52 PADD_BORDER_REFLECT_101 = 4, /**< Fills the border with reflect 101 mode. */ 53 PADD_BORDER_DEFAULT = PADD_BORDER_REFLECT_101 /**< Default pad mode, use reflect 101 mode. */ 54 }; 55 56 struct BoxesConfig { 57 public: 58 std::vector<size_t> img_shape; 59 std::vector<int> num_default; 60 std::vector<int> feature_size; 61 float min_scale; 62 float max_scale; 63 std::vector<std::vector<float>> aspect_rations; 64 std::vector<int> steps; 65 std::vector<float> prior_scaling; 66 }; 67 68 /// \brief resizing image by bilinear algorithm, the data type of currently only supports is uint8, 69 /// the channel of currently supports is 3 and 1. 70 /// \param[in] src Input image data. 71 /// \param[in] dst Output image data. 72 /// \param[in] dst_w The width of the output image. 73 /// \param[in] dst_h The length of the output image. 74 /// \par Example 75 /// \code 76 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 77 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 78 /// LiteMat lite_mat_dst; 79 /// 80 /// /* Resize to (256, 256, 3) */ 81 /// ResizeBilinear(lite_mat_src, lite_mat_dst, 256, 256); 82 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 83 /// \endcode 84 /// \return Return true if transform successfully. 85 bool DATASET_API ResizeBilinear(const LiteMat &src, LiteMat &dst, int dst_w, int dst_h); 86 87 /// \brief Init Lite Mat from pixel, the conversion of currently supports is rbgaTorgb and rgbaTobgr. 88 /// \note The length of the pointer must be the same as that of the multiplication of w and h. 89 /// \param[in] data Input image data. 90 /// \param[in] pixel_type The type of pixel (refer to enum LPixelType). 91 /// - LPixelType.BGR, pixel in BGR type. 92 /// - LPixelType.RGB, pixel in RGB type. 93 /// - LPixelType.RGBA, pixel in RGBA type. 94 /// - LPixelType.RGBA2GRAY, convert image from RGBA to GRAY. 95 /// - LPixelType.RGBA2BGR, convert image from RGBA to BGR. 96 /// - LPixelType.RGBA2RGB, convert image from RGBA to RGB. 97 /// - LPixelType.NV212BGR, convert image from NV21 to BGR. 98 /// - LPixelType.NV122BGR, convert image from NV12 to BGR. 99 /// \param[in] data_type The type of data (refer to LDataType class). 100 /// \param[in] w The width of the output image. 101 /// \param[in] h The length of the output image. 102 /// \param[in] m Used to store image data. 103 /// \par Example 104 /// \code 105 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 106 /// LiteMat lite_mat_dst; 107 /// InitFromPixel(p_rgb, LPixelType::RGB, LDataType::UINT8, width, height, lite_mat_dst); 108 /// 109 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 110 /// \endcode 111 /// \return Return true if transform successfully. 112 bool DATASET_API InitFromPixel(const unsigned char *data, LPixelType pixel_type, LDataType data_type, int w, int h, 113 LiteMat &m); 114 115 /// \brief convert the data type, the conversion of currently supports is uint8 to float. 116 /// \param[in] src Input image data. 117 /// \param[in] dst Output image data. 118 /// \param[in] scale Scale pixel value(default:1.0). 119 /// \par Example 120 /// \code 121 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 122 /// LiteMat lite_mat_src; 123 /// InitFromPixel(p_rgb, LPixelType::RGB, LDataType::UINT8, width, height, lite_mat_dst); 124 /// 125 /// LiteMat lite_mat_dst; 126 /// ConvertTo(lite_mat_src, lite_mat_dst); 127 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 128 /// \endcode 129 /// \return Return true if transform successfully. 130 bool DATASET_API ConvertTo(const LiteMat &src, LiteMat &dst, double scale = 1.0); 131 132 /// \brief crop image, the channel supports is 3 and 1. 133 /// \param[in] src Input image data. 134 /// \param[in] dst Output image data. 135 /// \param[in] x The x coordinate value of the starting point of the screenshot. 136 /// \param[in] y The y coordinate value of the starting point of the screenshot. 137 /// \param[in] w The width of the screenshot. 138 /// \param[in] h The height of the screenshot. 139 /// \par Example 140 /// \code 141 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 142 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 143 /// LiteMat lite_mat_dst; 144 /// 145 /// /* Crop to (32, 32, 3) */ 146 /// Crop(lite_mat_src, lite_mat_dst, 0, 0, 32, 32); 147 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 148 /// \endcode 149 /// \return Return true if transform successfully. 150 bool DATASET_API Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h); 151 152 /// \brief normalize image, currently the supports data type is float. 153 /// \param[in] src Input image data. 154 /// \param[in] dst Output image data. 155 /// \param[in] mean Mean of the data set. 156 /// \param[in] std Norm of the data set. 157 /// \par Example 158 /// \code 159 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 160 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 161 /// LiteMat lite_mat_src2; 162 /// ConvertTo(lite_mat_src, lite_mat_src2); 163 /// LiteMat lite_mat_dst; 164 /// 165 /// /* Normalize */ 166 /// std::vector<float> means = {0.485, 0.456, 0.406}; 167 /// std::vector<float> stds = {0.229, 0.224, 0.225}; 168 /// SubStractMeanNormalize(lite_mat_src2, lite_mat_dst, means, stds); 169 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 170 /// \endcode 171 /// \return Return true if transform successfully. 172 bool DATASET_API SubStractMeanNormalize(const LiteMat &src, LiteMat &dst, const std::vector<float> &mean, 173 const std::vector<float> &std); 174 175 /// \brief padd image, the channel supports is 3 and 1. 176 /// \param[in] src Input image data. 177 /// \param[in] dst Output image data. 178 /// \param[in] top The length of top. 179 /// \param[in] bottom The length of bottom. 180 /// \param[in] left The length of left. 181 /// \param[in] right he length of right. 182 /// \param[in] pad_type The type of pad. 183 /// - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values. 184 /// - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode. 185 /// - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode. 186 /// - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode. 187 /// \param[in] fill_b_or_gray B or GRAY. 188 /// \param[in] fill_g G. 189 /// \param[in] fill_r R. 190 /// \par Example 191 /// \code 192 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 193 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::FLOAT32); 194 /// LiteMat lite_mat_dst; 195 /// 196 /// /* Pad image with 4 pixels */ 197 /// Pad(lite_mat_src, lite_mat_dst, 4, 4, 4, 4, PaddBorderType::PADD_BORDER_CONSTANT); 198 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 199 /// \endcode 200 /// \return Return true if transform successfully. 201 bool DATASET_API Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int right, 202 PaddBorderType pad_type, uint8_t fill_b_or_gray = 0, uint8_t fill_g = 0, uint8_t fill_r = 0); 203 204 /// \brief Extract image channel by index. 205 /// \param[in] src Input image data. 206 /// \param[in] dst Output image data. 207 /// \param[in] col The serial number of the channel. 208 /// \par Example 209 /// \code 210 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 211 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 212 /// LiteMat lite_mat_dst; 213 /// 214 /// /* Extract the first channel of image */ 215 /// ExtractChannel(lite_mat_src, lite_mat_dst, 0); 216 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 217 /// \endcode 218 /// \return Return true if transform successfully. 219 bool DATASET_API ExtractChannel(LiteMat &src, LiteMat &dst, int col); 220 221 /// \brief Split image channels. 222 /// \param[in] src Input image data. 223 /// \param[in] mv Vector of LiteMat containing all channels. 224 /// \par Example 225 /// \code 226 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 227 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 228 /// std::vector<LiteMat> lite_mat_dst; 229 /// 230 /// /* Extract all channels of image */ 231 /// Split(lite_mat_src, lite_mat_dst); 232 /// \endcode 233 /// \return Return true if transform successfully. 234 bool DATASET_API Split(const LiteMat &src, std::vector<LiteMat> &mv); 235 236 /// \brief Create a multi-channel image out of several single-channel arrays. 237 /// \param[in] mv Single channel data. 238 /// \param[in] dst Output image data. 239 /// \par Example 240 /// \code 241 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 242 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 243 /// std::vector<LiteMat> lite_mat_dst; 244 /// 245 /// /* Extract all channels of image */ 246 /// Split(lite_mat_src, lite_mat_dst); 247 /// 248 /// /* Merge all channels to an image */ 249 /// LiteMat lite_mat_dst2; 250 /// Merge(lite_mat_dst, lite_mat_dst2); 251 /// \endcode 252 /// \return Return true if transform successfully. 253 bool DATASET_API Merge(const std::vector<LiteMat> &mv, LiteMat &dst); 254 255 /// \brief Apply affine transformation for 1 channel image. 256 /// \param[in] src Input image data. 257 /// \param[in] out_img Output image data. 258 /// \param[in] M[6] Affine transformation matrix. 259 /// \param[in] dsize The size of the output image. 260 /// \param[in] borderValue The pixel value is used for filing after the image is captured. 261 /// \par Example 262 /// \code 263 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height) */ 264 /// LiteMat lite_mat_src(width, height, (void *)p_rgb, LDataType::UINT8); 265 /// LiteMat lite_mat_src2; 266 /// ConvertRgbToGray(lite_mat_src, LDataType::UINT8, width, height, lite_mat_src2); 267 /// 268 /// /* Define Affine matrix and apply */ 269 /// LiteMat lite_mat_dst; 270 /// double M[6] = {1, 0, 0, 271 /// 0, 1, 0}; 272 /// Affine(lite_mat_src2, lite_mat_dst, M, {width, height}, UINT8_C1(0)); 273 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 274 /// \endcode 275 /// \return Return true if transform successfully. 276 bool DATASET_API Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector<size_t> dsize, 277 UINT8_C1 borderValue); 278 279 /// \brief Apply affine transformation for 3 channel image. 280 /// \param[in] src Input image data. 281 /// \param[in] out_img Output image data. 282 /// \param[in] M[6] Affine transformation matrix. 283 /// \param[in] dsize The size of the output image. 284 /// \param[in] borderValue The pixel value is used for filing after the image is captured. 285 /// \par Example 286 /// \code 287 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 288 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 289 /// LiteMat lite_mat_dst; 290 /// 291 /// /* Define Affine matrix and apply */ 292 /// double M[6] = {1, 0, 20, 293 /// 0, 1, 20}; 294 /// Affine(lite_mat_src, lite_mat_dst, M, {image.cols, image.rows}, UINT8_C3(0, 0, 0)); 295 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 296 /// \endcode 297 /// \return Return true if transform successfully. 298 bool DATASET_API Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector<size_t> dsize, 299 UINT8_C3 borderValue); 300 301 /// \brief Get default anchor boxes for Faster R-CNN, SSD, YOLO etc. 302 /// \param[in] config Objects of BoxesConfig structure. 303 std::vector<std::vector<float>> DATASET_API GetDefaultBoxes(const BoxesConfig &config); 304 305 /// \brief Convert the prediction boxes to the actual boxes of (y, x, h, w). 306 /// \param[in] boxes Actual size box. 307 /// \param[in] default_boxes Default box. 308 /// \param[in] config Objects of BoxesConfig structure. 309 void DATASET_API ConvertBoxes(std::vector<std::vector<float>> &boxes, 310 const std::vector<std::vector<float>> &default_boxes, const BoxesConfig &config); 311 312 /// \brief Apply Non-Maximum Suppression. 313 /// \param[in] all_boxes All input boxes. 314 /// \param[in] all_scores Score after all boxes are executed through the network. 315 /// \param[in] thres Pre-value of IOU. 316 /// \param[in] max_boxes Maximum value of output box. 317 /// \par Example 318 /// \code 319 /// /* Apply NMS on bboxes */ 320 /// std::vector<std::vector<float>> all_boxes = {{1, 1, 2, 2}, {3, 3, 4, 4}, {5, 5, 6, 6}, {5, 5, 6, 6}}; 321 /// std::vector<float> all_scores = {0.6, 0.5, 0.4, 0.9}; 322 /// std::vector<int> keep = ApplyNms(all_boxes, all_scores, 0.5, 10); 323 /// \endcode 324 /// \return Remaining bounding boxes. 325 std::vector<int> DATASET_API ApplyNms(const std::vector<std::vector<float>> &all_boxes, std::vector<float> &all_scores, 326 float thres, int max_boxes); 327 328 /// \brief affine image by linear. 329 /// \param[in] src Input image data. 330 /// \param[in] dst Output image data. 331 /// \param[in] M Transformation matrix 332 /// \param[in] dst_w The width of the output image. 333 /// \param[in] dst_h The height of the output image. 334 /// \param[in] borderType Edge processing type. 335 /// - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values. 336 /// - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode. 337 /// - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode. 338 /// - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode. 339 /// \param[in] borderValue Boundary fill value. 340 /// \par Example 341 /// \code 342 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 343 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 344 /// LiteMat lite_mat_dst; 345 /// 346 /// /* Define Affine matrix and apply */ 347 /// double M[6] = {1, 0, 20, 348 /// 0, 1, 20}; 349 /// LiteMat Matrix(3, 2, M, LDataType::DOUBLE); 350 /// std::vector<uint8_t> border_value = {0, 0, 0}; 351 /// WarpAffineBilinear(lite_mat_src, lite_mat_dst, Matrix, width, height, 352 /// PaddBorderType::PADD_BORDER_CONSTANT, border_value); 353 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 354 /// \endcode 355 /// \return Return true if transform successfully. 356 bool DATASET_API WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h, 357 PaddBorderType borderType, std::vector<uint8_t> &borderValue); 358 359 /// \brief affine image by linear. 360 /// \param[in] src Input image data. 361 /// \param[in] dst Output image data. 362 /// \param[in] M Transformation matrix 363 /// \param[in] dst_w The width of the output image. 364 /// \param[in] dst_h The height of the output image. 365 /// \param[in] borderType Edge processing type. 366 /// - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values. 367 /// - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode. 368 /// - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode. 369 /// - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode. 370 /// \param[in] borderValue Boundary fill value. 371 /// \par Example 372 /// \code 373 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 374 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 375 /// LiteMat lite_mat_dst; 376 /// 377 /// /* Get Perspective matrix and apply */ 378 /// std::vector<Point> src = {Point(165, 270), Point(835, 270), Point(360, 125), Point(615, 125)}; 379 /// std::vector<Point> dst = {Point(165, 270), Point(835, 270), Point(100, 100), Point(500, 30)}; 380 /// LiteMat M; 381 /// GetPerspectiveTransform(src, dst, M); 382 /// std::vector<uint8_t> border_value = {0, 0, 0}; 383 /// WarpPerspectiveBilinear(lite_mat_src, lite_mat_dst, M, width, height, 384 /// PaddBorderType::PADD_BORDER_CONSTANT, border_value); 385 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 386 /// \endcode 387 /// \return Return true if transform successfully. 388 bool DATASET_API WarpPerspectiveBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h, 389 PaddBorderType borderType, std::vector<uint8_t> &borderValue); 390 391 /// \brief Matrix rotation. 392 /// \param[in] x The value of the x-axis of the coordinate rotation point. 393 /// \param[in] y The value of the y-axis of the coordinate rotation point. 394 /// \param[in] angle Rotation angle. 395 /// \param[in] scale Scaling ratio. 396 /// \param[in] M Output transformation matrix. 397 /// \par Example 398 /// \code 399 /// /* Get Rotation matrix */ 400 /// double angle = 60.0; 401 /// double scale = 0.5; 402 /// LiteMat M; 403 /// GetRotationMatrix2D(1.0f, 2.0f, angle, scale, M); 404 /// std::cout << M.width_ << " " << M.height_ << " " << M.channel_ << std::endl; 405 /// \endcode 406 /// \return Return true if transform successfully. 407 bool DATASET_API GetRotationMatrix2D(float x, float y, double angle, double scale, LiteMat &M); 408 409 /// \brief Perspective transformation. 410 /// \param[in] src_point Input coordinate point. 411 /// \param[in] dst_point Output coordinate point. 412 /// \param[in] M Output matrix. 413 /// \par Example 414 /// \code 415 /// /* Get Perspective matrix */ 416 /// std::vector<Point> src = {Point(165, 270), Point(835, 270), Point(360, 125), Point(615, 125)}; 417 /// std::vector<Point> dst = {Point(165, 270), Point(835, 270), Point(100, 100), Point(500, 30)}; 418 /// LiteMat M; 419 /// GetPerspectiveTransform(src, dst, M); 420 /// std::cout << M.width_ << " " << M.height_ << " " << M.channel_ << std::endl; 421 /// \endcode 422 /// \return Return true if transform successfully. 423 bool DATASET_API GetPerspectiveTransform(std::vector<Point> src_point, std::vector<Point> dst_point, LiteMat &M); 424 425 /// \brief Affine transformation. 426 /// \param[in] src_point Input coordinate point. 427 /// \param[in] dst_point Output coordinate point. 428 /// \param[in] M Output matrix. 429 /// \par Example 430 /// \code 431 /// /* Get Affine matrix */ 432 /// std::vector<Point> src = {Point(50, 50), Point(200, 50), Point(50, 200)}; 433 /// std::vector<Point> dst = {Point(40, 40), Point(100, 40), Point(50, 90)}; 434 /// LiteMat M; 435 /// GetAffineTransform(src, dst, M); 436 /// std::cout << M.width_ << " " << M.height_ << " " << M.channel_ << std::endl; 437 /// \endcode 438 /// \return Return true if transform successfully. 439 bool DATASET_API GetAffineTransform(std::vector<Point> src_point, std::vector<Point> dst_point, LiteMat &M); 440 441 /// \brief Matrix transpose. 442 /// \param[in] src Input matrix. 443 /// \param[in] dst Output matrix. 444 /// \par Example 445 /// \code 446 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 447 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 448 /// LiteMat lite_mat_src2; 449 /// ConvertTo(lite_mat_src, lite_mat_src2); 450 /// LiteMat lite_mat_dst; 451 /// 452 /// /* Transpose image */ 453 /// Transpose(lite_mat_src2, lite_mat_dst); 454 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 455 /// \endcode 456 /// \return Return true if transform successfully. 457 bool DATASET_API Transpose(const LiteMat &src, LiteMat &dst); 458 459 /// \brief Filter the image by a Gaussian kernel 460 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now. 461 /// \param[in] dst LiteMat image after processing. 462 /// \param[in] ksize The size of Gaussian kernel. It should be a vector of size 2 as {kernel_x, kernel_y}, both value of 463 /// which should be positive and odd. 464 /// \param[in] sigmaX The Gaussian kernel standard deviation of width. It should be a positive value. 465 /// \param[in] sigmaY The Gaussian kernel standard deviation of height (default=0.f). It should be a positive value, 466 /// or will use the value of sigmaX. 467 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). 468 /// - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values. 469 /// - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode. 470 /// - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode. 471 /// - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode. 472 /// \par Example 473 /// \code 474 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 475 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 476 /// LiteMat lite_mat_dst; 477 /// 478 /// /* Blur image */ 479 /// GaussianBlur(lite_mat_src, lite_mat_dst, {3, 5}, 3, 3); 480 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 481 /// \endcode 482 /// \return Return true if transform successfully. 483 bool DATASET_API GaussianBlur(const LiteMat &src, LiteMat &dst, const std::vector<int> &ksize, double sigmaX, 484 double sigmaY = 0.f, PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); 485 486 /// \brief Detect edges in an image 487 /// \param[in] src LiteMat image to be processed. Only single channel LiteMat of type UINT8 is supported now. 488 /// \param[in] dst LiteMat image after processing. 489 /// \param[in] low_thresh The lower bound of the edge. Pixel with value below it will not be considered as a boundary. 490 /// It should be a nonnegative value. 491 //// \param[in] high_thresh The higher bound of the edge. Pixel with value over it will 492 /// be absolutely considered as a boundary. It should be a nonnegative value and no less than low_thresh. 493 /// \param[in] ksize The size of Sobel kernel (default=3). It can only be 3, 5 or 7. 494 /// \param[in] L2gradient Whether to use L2 distance while calculating gradient (default=false). 495 /// \par Example 496 /// \code 497 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 498 /// LiteMat lite_mat_src; 499 /// InitFromPixel(p_rgb, LPixelType::RGB, LDataType::UINT8, width, height, lite_mat_src); 500 /// LiteMat lite_mat_src2; 501 /// ConvertRgbToGray(lite_mat_src, LDataType::UINT8, image.cols, image.rows, lite_mat_src2); 502 /// 503 /// LiteMat lite_mat_dst; 504 /// Canny(lite_mat_src2, lite_mat_dst, 200, 300, 5); 505 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 506 /// \endcode 507 /// \return Return true if transform successfully. 508 bool DATASET_API Canny(const LiteMat &src, LiteMat &dst, double low_thresh, double high_thresh, int ksize = 3, 509 bool L2gradient = false); 510 511 /// \brief Apply a 2D convolution over the image. 512 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now. 513 /// \param[in] kernel LiteMat 2D convolution kernel. Only LiteMat of type FLOAT32 is supported now. 514 /// \param[in] dst LiteMat image after processing. 515 /// \param[in] dst_type Output data type of dst. 516 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). 517 /// \par Example 518 /// \code 519 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 520 /// LiteMat lite_mat_src(width, height, channel, (void *)p_rgb, LDataType::UINT8); 521 /// LiteMat lite_mat_dst; 522 /// 523 /// LiteMat kernel; 524 /// kernel.Init(3, 3, 1, LDataType::FLOAT32); 525 /// float *kernel_ptr = kernel; 526 /// for (int i = 0; i < 9; i++) { 527 /// kernel_ptr[i] = i % 2; 528 /// } 529 /// Conv2D(lite_mat_src, kernel, lite_mat_dst, LDataType::UINT8); 530 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 531 /// \endcode 532 /// \return Return true if transform successfully. 533 bool DATASET_API Conv2D(const LiteMat &src, const LiteMat &kernel, LiteMat &dst, LDataType dst_type, 534 PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); 535 536 /// \brief Applies a separable linear convolution over the image 537 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now. 538 /// \param[in] kx LiteMat 1D convolution kernel. Only LiteMat of type FLOAT32 is supported now. 539 /// \param[in] ky LiteMat 1D convolution kernel. Only LiteMat of type FLOAT32 is supported now. 540 /// \param[in] dst LiteMat image after processing. 541 /// \param[in] dst_type Output data type of dst. 542 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). 543 bool DATASET_API ConvRowCol(const LiteMat &src, const LiteMat &kx, const LiteMat &ky, LiteMat &dst, LDataType dst_type, 544 PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); 545 546 /// \brief Filter the image by a Sobel kernel 547 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now. 548 /// \param[in] dst LiteMat image after processing. 549 /// \param[in] flag_x Order of the derivative x. It should be a nonnegative value and can not be equal to 0 at the same 550 /// time with flag_y. 551 /// \param[in] flag_y Order of the derivative y. It should be a nonnegative value and can not be equal 552 /// to 0 at the same time with flag_x. 553 /// \param[in] ksize The size of Sobel kernel (default=3). It can only be 1, 3, 5 or 7. 554 /// \param[in] scale The scale factor for the computed derivative values (default=1.0). 555 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). 556 /// - PaddBorderType.PADD_BORDER_CONSTANT, fills the border with constant values. 557 /// - PaddBorderType.PADD_BORDER_REPLICATE, fills the border with replicate mode. 558 /// - PaddBorderType.PADD_BORDER_REFLECT_101, fills the border with reflect 101 mode. 559 /// - PaddBorderType.PADD_BORDER_DEFAULT, default pad mode, use reflect 101 mode. 560 /// \par Example 561 /// \code 562 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 563 /// LiteMat lite_mat_src; 564 /// InitFromPixel(p_rgb, LPixelType::RGB, LDataType::UINT8, width, height, lite_mat_src); 565 /// LiteMat lite_mat_src2; 566 /// ConvertRgbToGray(lite_mat_src, LDataType::UINT8, image.cols, image.rows, lite_mat_src2); 567 /// 568 /// LiteMat lite_mat_dst; 569 /// Sobel(lite_mat_src2, lite_mat_dst, 1, 0, 3, 1, PaddBorderType::PADD_BORDER_REPLICATE); 570 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 571 /// \endcode 572 /// \return Return true if transform successfully. 573 bool DATASET_API Sobel(const LiteMat &src, LiteMat &dst, int flag_x, int flag_y, int ksize = 3, double scale = 1.0, 574 PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); 575 576 /// \brief Convert RGB image or color image to BGR image. 577 /// \param[in] src Input image data. 578 /// \param[in] data_type The type of data (refer to LDataType class). 579 /// \param[in] w The width of output image. 580 /// \param[in] h The height of output image. 581 /// \param[in] mat Output image data. 582 /// \par Example 583 /// \code 584 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 585 /// LiteMat lite_mat_src; 586 /// lite_mat_src.Init(width, height, channel, p_rgb, LDataType::UINT8); 587 /// LiteMat lite_mat_dst; 588 /// 589 /// ConvertRgbToBgr(lite_mat_src, LDataType::UINT8, width, height, lite_mat_dst); 590 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 591 /// \endcode 592 /// \return Return true if transform successfully. 593 bool DATASET_API ConvertRgbToBgr(const LiteMat &src, const LDataType &data_type, int w, int h, LiteMat &mat); 594 595 /// \brief Convert RGB image or color image to grayscale image. 596 /// \param[in] src Input image data. 597 /// \param[in] data_type The type of data (refer to LDataType class). 598 /// \param[in] w The width of output image. 599 /// \param[in] h The height of output image. 600 /// \param[in] mat Output image data. 601 /// \par Example 602 /// \code 603 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 604 /// LiteMat lite_mat_src; 605 /// lite_mat_src.Init(width, height, channel, p_rgb, LDataType::UINT8); 606 /// LiteMat lite_mat_dst; 607 /// 608 /// ConvertRgbToGray(lite_mat_src, LDataType::UINT8, width, height, lite_mat_dst); 609 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 610 /// \endcode 611 /// \return Return true if transform successfully. 612 bool DATASET_API ConvertRgbToGray(const LiteMat &src, LDataType data_type, int w, int h, LiteMat &mat); 613 614 /// \brief Resize preserve AR with filler. 615 /// \param[in] src Input image data. 616 /// \param[in] dst Output image data. 617 /// \param[in] h The height of output image. 618 /// \param[in] w The width of output image. 619 /// \param[in] ratioShiftWShiftH Array that records the ratio, width shift, and height shift. 620 /// \param[in] invM Fixed direction array. 621 /// \param[in] img_orientation Way of export direction. 622 /// \par Example 623 /// \code 624 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 625 /// LiteMat lite_mat_src; 626 /// lite_mat_src.Init(width, height, channel, p_rgb, LDataType::UINT8); 627 /// LiteMat lite_mat_dst; 628 /// 629 /// float ratioShiftWShiftH[3] = {0}; 630 /// float invM[2][3] = {{0, 0, 0}, {0, 0, 0}}; 631 /// int h = 1000; 632 /// int w = 1000; 633 /// ResizePreserveARWithFiller(lite_mat_src, lite_mat_dst, h, w, &ratioShiftWShiftH, &invM, 0); 634 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 635 /// \endcode 636 /// \return Return true if transform successfully. 637 bool DATASET_API ResizePreserveARWithFiller(LiteMat &src, LiteMat &dst, int h, int w, float (*ratioShiftWShiftH)[3], 638 float (*invM)[2][3], int img_orientation); 639 640 /// \brief Transpose the input image; shape (H, W, C) to shape (C, H, W). 641 /// \param[in] src Input image data. 642 /// \param[in] dst Output image data. 643 /// \par Example 644 /// \code 645 /// /* Assume p_rgb is a pointer that points to an image with shape (width, height, channel) */ 646 /// LiteMat lite_mat_src; 647 /// lite_mat_src.Init(width, height, channel, p_rgb, LDataType::UINT8); 648 /// LiteMat lite_mat_dst; 649 /// 650 /// HWC2CHW(lite_mat_src, lite_mat_dst); 651 /// std::cout << lite_mat_dst.width_ << " " << lite_mat_dst.height_ << " " << lite_mat_dst.channel_ << std::endl; 652 /// \endcode 653 /// \return Return true if transform successfully. 654 bool DATASET_API HWC2CHW(LiteMat &src, LiteMat &dst); 655 } // namespace dataset 656 } // namespace mindspore 657 #endif // IMAGE_PROCESS_H_ 658