1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef IMAGE_PROCESS_H_ 18 #define IMAGE_PROCESS_H_ 19 20 #include <math.h> 21 #include <vector> 22 #include <algorithm> 23 #include <iostream> 24 25 #include "lite_cv/lite_mat.h" 26 27 namespace mindspore { 28 namespace dataset { 29 30 #define CV_PI 3.1415926535897932384626433832795 31 #define IM_TOOL_EXIF_ORIENTATION_0_DEG 1 32 #define IM_TOOL_EXIF_ORIENTATION_0_DEG_MIRROR 2 33 #define IM_TOOL_EXIF_ORIENTATION_180_DEG 3 34 #define IM_TOOL_EXIF_ORIENTATION_180_DEG_MIRROR 4 35 #define IM_TOOL_EXIF_ORIENTATION_90_DEG_MIRROR 5 36 #define IM_TOOL_EXIF_ORIENTATION_90_DEG 6 37 #define IM_TOOL_EXIF_ORIENTATION_270_DEG_MIRROR 7 38 #define IM_TOOL_EXIF_ORIENTATION_270_DEG 8 39 #define NUM_OF_RGB_CHANNELS 9 40 #define IM_TOOL_DATA_TYPE_FLOAT (1) 41 #define IM_TOOL_DATA_TYPE_UINT8 (2) 42 #define IM_TOOL_RETURN_STATUS_SUCCESS (0) 43 #define IM_TOOL_RETURN_STATUS_INVALID_INPUT (1) 44 #define IM_TOOL_RETURN_STATUS_FAILED (2) 45 46 #define INT16_CAST(X) \ 47 static_cast<int16_t>(::std::min(::std::max(static_cast<int>(X + (X >= 0.f ? 0.5f : -0.5f)), -32768), 32767)); 48 49 enum PaddBorderType { 50 PADD_BORDER_CONSTANT = 0, /**< Fills the border with constant values. */ 51 PADD_BORDER_REPLICATE = 1, /**< Fills the border with replicate mode. */ 52 PADD_BORDER_REFLECT_101 = 4, /**< Fills the border with reflect 101 mode. */ 53 PADD_BORDER_DEFAULT = PADD_BORDER_REFLECT_101 /**< Default pad mode, use reflect 101 mode. */ 54 }; 55 56 struct BoxesConfig { 57 public: 58 std::vector<size_t> img_shape; 59 std::vector<int> num_default; 60 std::vector<int> feature_size; 61 float min_scale; 62 float max_scale; 63 std::vector<std::vector<float>> aspect_rations; 64 std::vector<int> steps; 65 std::vector<float> prior_scaling; 66 }; 67 68 /// \brief resizing image by bilinear algorithm, the data type of currently only supports is uint8, 69 /// the channel of currently supports is 3 and 1. 70 /// \param[in] src Input image data. 71 /// \param[in] dst Output image data. 72 /// \param[in] dst_w The width of the output image. 73 /// \param[in] dst_h The length of the output image. 74 bool ResizeBilinear(const LiteMat &src, LiteMat &dst, int dst_w, int dst_h); 75 76 /// \brief Init Lite Mat from pixel, the conversion of currently supports is rbgaTorgb and rgbaTobgr. 77 /// \note The length of the pointer must be the same as that of the multiplication of w and h. 78 /// \param[in] data Input image data. 79 /// \param[in] pixel_type The type of pixel_type. 80 /// \param[in] data_type The type of data_type. 81 /// \param[in] w The width of the output image. 82 /// \param[in] h The length of the output image. 83 /// \param[in] m Used to store image data. 84 bool InitFromPixel(const unsigned char *data, LPixelType pixel_type, LDataType data_type, int w, int h, LiteMat &m); 85 86 /// \brief convert the data type, the conversion of currently supports is uint8 to float. 87 /// \param[in] src Input image data. 88 /// \param[in] dst Output image data. 89 /// \param[in] scale Scale pixel value(default:1.0). 90 bool ConvertTo(const LiteMat &src, LiteMat &dst, double scale = 1.0); 91 92 /// \brief crop image, the channel supports is 3 and 1. 93 /// \param[in] src Input image data. 94 /// \param[in] dst Output image data. 95 /// \param[in] x The x coordinate value of the starting point of the screenshot. 96 /// \param[in] y The y coordinate value of the starting point of the screenshot. 97 /// \param[in] w The width of the screenshot. 98 /// \param[in] h The height of the screenshot. 99 bool Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h); 100 101 /// \brief normalize image, currently the supports data type is float. 102 /// \param[in] src Input image data. 103 /// \param[in] dst Output image data. 104 /// \param[in] mean Mean of the data set. 105 /// \param[in] std Norm of the data set. 106 bool SubStractMeanNormalize(const LiteMat &src, LiteMat &dst, const std::vector<float> &mean, 107 const std::vector<float> &std); 108 109 /// \brief padd image, the channel supports is 3 and 1. 110 /// \param[in] src Input image data. 111 /// \param[in] dst Output image data. 112 /// \param[in] top The length of top. 113 /// \param[in] bottom The length of bottom. 114 /// \param[in] left The length of left. 115 /// \param[in] right he length of right. 116 /// \param[in] pad_type The type of pad. 117 /// \param[in] fill_b_or_gray B or GRAY. 118 /// \param[in] fill_g G. 119 /// \param[in] fill_r R. 120 bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int right, PaddBorderType pad_type, 121 uint8_t fill_b_or_gray = 0, uint8_t fill_g = 0, uint8_t fill_r = 0); 122 123 /// \brief Extract image channel by index. 124 /// \param[in] src Input image data. 125 /// \param[in] dst Output image data. 126 /// \param[in] col The serial number of the channel. 127 bool ExtractChannel(LiteMat &src, LiteMat &dst, int col); 128 129 /// \brief Split image channels to single channel. 130 /// \param[in] src Input image data. 131 /// \param[in] mv Single channel data. 132 bool Split(const LiteMat &src, std::vector<LiteMat> &mv); 133 134 /// \brief Create a multi-channel image out of several single-channel arrays. 135 /// \param[in] mv Single channel data. 136 /// \param[in] dst Output image data. 137 bool Merge(const std::vector<LiteMat> &mv, LiteMat &dst); 138 139 /// \brief Apply affine transformation for 1 channel image. 140 /// \param[in] src Input image data. 141 /// \param[in] out_img Output image data. 142 /// \param[in] M[6] Affine transformation matrix. 143 /// \param[in] dsize The size of the output image. 144 /// \param[in] borderValue The pixel value is used for filing after the image is captured. 145 bool Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector<size_t> dsize, UINT8_C1 borderValue); 146 147 /// \brief Apply affine transformation for 3 channel image. 148 /// \param[in] src Input image data. 149 /// \param[in] out_img Output image data. 150 /// \param[in] M[6] Affine transformation matrix. 151 /// \param[in] dsize The size of the output image. 152 /// \param[in] borderValue The pixel value is used for filing after the image is captured. 153 bool Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector<size_t> dsize, UINT8_C3 borderValue); 154 155 /// \brief Get default anchor boxes for Faster R-CNN, SSD, YOLO etc. 156 /// \param[in] config Objects of BoxesConfig structure. 157 std::vector<std::vector<float>> GetDefaultBoxes(const BoxesConfig config); 158 159 /// \brief Convert the prediction boxes to the actual boxes of (y, x, h, w). 160 /// \param[in] boxes Actual size box. 161 /// \param[in] default_boxes Default box. 162 /// \param[in] config Objects of BoxesConfig structure. 163 void ConvertBoxes(std::vector<std::vector<float>> &boxes, const std::vector<std::vector<float>> &default_boxes, 164 const BoxesConfig config); 165 166 /// \brief Apply Non-Maximum Suppression. 167 /// \param[in] all_boxes All input boxes. 168 /// \param[in] all_scores Score after all boxes are executed through the network. 169 /// \param[in] thres Pre-value of IOU. 170 /// \param[in] max_boxes Maximum value of output box. 171 std::vector<int> ApplyNms(const std::vector<std::vector<float>> &all_boxes, std::vector<float> &all_scores, float thres, 172 int max_boxes); 173 174 /// \brief affine image by linear. 175 /// \param[in] src Input image data. 176 /// \param[in] dst Output image data. 177 /// \param[in] M Transformation matrix 178 /// \param[in] dst_w The width of the output image. 179 /// \param[in] dst_h The height of the output image. 180 /// \param[in] borderType Edge processing type. 181 /// \param[in] borderValue Boundary fill value. 182 bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h, 183 PaddBorderType borderType, std::vector<uint8_t> &borderValue); 184 185 /// \brief affine image by linear. 186 /// \param[in] src Input image data. 187 /// \param[in] dst Output image data. 188 /// \param[in] M Transformation matrix 189 /// \param[in] dst_w The width of the output image. 190 /// \param[in] dst_h The height of the output image. 191 /// \param[in] borderType Edge processing type. 192 /// \param[in] borderValue Boundary fill value. 193 bool WarpPerspectiveBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int dst_w, int dst_h, 194 PaddBorderType borderType, std::vector<uint8_t> &borderValue); 195 196 /// \brief Matrix rotation. 197 /// \param[in] x The value of the x-axis of the coordinate rotation point. 198 /// \param[in] y The value of the y-axis of the coordinate rotation point. 199 /// \param[in] angle Rotation angle. 200 /// \param[in] scale Scaling ratio. 201 /// \param[in] M Output transformation matrix. 202 bool GetRotationMatrix2D(float x, float y, double angle, double scale, LiteMat &M); 203 204 /// \brief Perspective transformation. 205 /// \param[in] src_point Input coordinate point. 206 /// \param[in] dst_point Output coordinate point. 207 /// \param[in] M Output matrix. 208 bool GetPerspectiveTransform(std::vector<Point> src_point, std::vector<Point> dst_point, LiteMat &M); 209 210 /// \brief Affine transformation. 211 /// \param[in] src_point Input coordinate point. 212 /// \param[in] dst_point Output coordinate point. 213 /// \param[in] M Output matrix. 214 bool GetAffineTransform(std::vector<Point> src_point, std::vector<Point> dst_point, LiteMat &M); 215 216 /// \brief Matrix transpose. 217 /// \param[in] src Input matrix. 218 /// \param[in] dst Output matrix. 219 bool Transpose(const LiteMat &src, LiteMat &dst); 220 221 /// \brief Filter the image by a Gaussian kernel 222 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now. 223 /// \param[in] dst LiteMat image after processing. 224 /// \param[in] ksize The size of Gaussian kernel. It should be a vector of size 2 as {kernel_x, kernel_y}, both value of 225 /// which should be positive and odd. 226 /// \param[in] sigmaX The Gaussian kernel standard deviation of width. It should be a positive value. 227 /// \param[in] sigmaY The Gaussian kernel standard deviation of height (default=0.f). It should be a positive value, 228 /// or will use the value of sigmaX. 229 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). 230 bool GaussianBlur(const LiteMat &src, LiteMat &dst, const std::vector<int> &ksize, double sigmaX, double sigmaY = 0.f, 231 PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); 232 233 /// \brief Detect edges in an image 234 /// \param[in] src LiteMat image to be processed. Only single channel LiteMat of type UINT8 is supported now. 235 /// \param[in] dst LiteMat image after processing. 236 /// \param[in] low_thresh The lower bound of the edge. Pixel with value below it will not be considered as a boundary. 237 /// It should be a nonnegative value. 238 //// \param[in] high_thresh The higher bound of the edge. Pixel with value over it will 239 /// be absolutely considered as a boundary. It should be a nonnegative value and no less than low_thresh. 240 /// \param[in] ksize The size of Sobel kernel (default=3). It can only be 3, 5 or 7. 241 /// \param[in] L2gradient Whether to use L2 distance while calculating gradient (default=false). 242 bool Canny(const LiteMat &src, LiteMat &dst, double low_thresh, double high_thresh, int ksize = 3, 243 bool L2gradient = false); 244 245 /// \brief Apply a 2D convolution over the image. 246 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now. 247 /// \param[in] kernel LiteMat 2D convolution kernel. Only LiteMat of type FLOAT32 is supported now. 248 /// \param[in] dst LiteMat image after processing. 249 /// \param[in] dst_type Output data type of dst. 250 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). 251 bool Conv2D(const LiteMat &src, const LiteMat &kernel, LiteMat &dst, LDataType dst_type, 252 PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); 253 254 /// \brief Applies a separable linear convolution over the image 255 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 and FLOAT32 is supported now. 256 /// \param[in] kx LiteMat 1D convolution kernel. Only LiteMat of type FLOAT32 is supported now. 257 /// \param[in] ky LiteMat 1D convolution kernel. Only LiteMat of type FLOAT32 is supported now. 258 /// \param[in] dst LiteMat image after processing. 259 /// \param[in] dst_type Output data type of dst. 260 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). 261 bool ConvRowCol(const LiteMat &src, const LiteMat &kx, const LiteMat &ky, LiteMat &dst, LDataType dst_type, 262 PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); 263 264 /// \brief Filter the image by a Sobel kernel 265 /// \param[in] src LiteMat image to be processed. Only LiteMat of type UINT8 is supported now. 266 /// \param[in] dst LiteMat image after processing. 267 /// \param[in] flag_x Order of the derivative x. It should be a nonnegative value and can not be equal to 0 at the same 268 /// time with flag_y. 269 /// \param[in] flag_y Order of the derivative y. It should be a nonnegative value and can not be equal 270 /// to 0 at the same time with flag_x. 271 /// \param[in] ksize The size of Sobel kernel (default=3). It can only be 1, 3, 5 or 7. 272 /// \param[in] scale The scale factor for the computed derivative values (default=1.0). 273 /// \param[in] pad_type The padding type used while filtering (default=PaddBorderType::PADD_BORDER_DEFAULT). 274 bool Sobel(const LiteMat &src, LiteMat &dst, int flag_x, int flag_y, int ksize = 3, double scale = 1.0, 275 PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT); 276 277 /// \brief Convert RGB image or color image to BGR image. 278 /// \param[in] src Input image data. 279 /// \param[in] data_type The type of data_type. 280 /// \param[in] w The width of output image. 281 /// \param[in] h The height of output image. 282 /// \param[in] mat Output image data. 283 bool ConvertRgbToBgr(const LiteMat &src, const LDataType &data_type, int w, int h, LiteMat &mat); 284 285 /// \brief Convert RGB image or color image to grayscale image. 286 /// \param[in] src Input image data. 287 /// \param[in] data_type The type of data_type. 288 /// \param[in] w The width of output image. 289 /// \param[in] h The height of output image. 290 /// \param[in] mat Output image data. 291 bool ConvertRgbToGray(const LiteMat &src, LDataType data_type, int w, int h, LiteMat &mat); 292 293 /// \brief Resize preserve AR with filler. 294 /// \param[in] src Input image data. 295 /// \param[in] dst Output image data. 296 /// \param[in] h The height of output image. 297 /// \param[in] w The width of output image. 298 /// \param[in] ratioShiftWShiftH Array that records the ratio, width shift, and height shift. 299 /// \param[in] invM Fixed direction array. 300 /// \param[in] img_orientation Way of export direction. 301 bool ResizePreserveARWithFiller(LiteMat &src, LiteMat &dst, int h, int w, float (*ratioShiftWShiftH)[3], 302 float (*invM)[2][3], int img_orientation); 303 304 } // namespace dataset 305 } // namespace mindspore 306 #endif // IMAGE_PROCESS_H_ 307