1 /** 2 * Copyright 2020-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_ 19 20 #include <map> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 #include "include/api/status.h" 26 #include "include/dataset/constants.h" 27 #include "include/dataset/transforms.h" 28 29 namespace mindspore { 30 namespace dataset { 31 32 // Transform operations for performing computer vision. 33 namespace vision { 34 35 // Forward Declarations 36 class RotateOperation; 37 38 /// \brief Apply affine transform on the input image. 39 class Affine final : public TensorTransform { 40 public: 41 /// \brief Constructor. 42 /// \param[in] degrees The degrees to rotate the image. 43 /// \param[in] translation The values representing vertical and horizontal translation (default = {0.0, 0.0}). 44 /// The first value represents the x axis translation while the second represents the y axis translation. 45 /// \param[in] scale The scaling factor for the image (default = 0.0). 46 /// \param[in] shear A float vector of size 2, representing the shear degrees (default = {0.0, 0.0}). 47 /// \param[in] interpolation An enum for the mode of interpolation. 48 /// - InterpolationMode::kLinear, Interpolation method is blinear interpolation (Only supports this mode in Lite). 49 /// - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation. 50 /// - InterpolationMode::kCubic, Interpolation method is bicubic interpolation. 51 /// - InterpolationMode::kArea, Interpolation method is pixel area interpolation. 52 /// \param[in] fill_value A vector representing the value to fill the area outside the transformation 53 /// in the output image. If 1 value is provided, it is used for all RGB channels. 54 /// If 3 values are provided, it is used to fill R, G, B channels respectively. 55 explicit Affine(float_t degrees, const std::vector<float> &translation = {0.0, 0.0}, float scale = 0.0, 56 const std::vector<float> &shear = {0.0, 0.0}, 57 InterpolationMode interpolation = InterpolationMode::kNearestNeighbour, 58 const std::vector<uint8_t> &fill_value = {0, 0, 0}); 59 60 /// \brief Destructor. 61 ~Affine() = default; 62 63 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 64 /// \return Shared pointer to TensorOperation object. 65 std::shared_ptr<TensorOperation> Parse() override; 66 67 private: 68 struct Data; 69 std::shared_ptr<Data> data_; 70 }; 71 72 /// \brief Crop the input image at the center to the given size. 73 class CenterCrop final : public TensorTransform { 74 public: 75 /// \brief Constructor. 76 /// \param[in] size A vector representing the output size of the cropped image. 77 /// If the size is a single value, a squared crop of size (size, size) is returned. 78 /// If the size has 2 values, it should be (height, width). 79 explicit CenterCrop(std::vector<int32_t> size); 80 81 /// \brief Destructor. 82 ~CenterCrop() = default; 83 84 protected: 85 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 86 /// \return Shared pointer to TensorOperation object. 87 std::shared_ptr<TensorOperation> Parse() override; 88 89 std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override; 90 91 private: 92 struct Data; 93 std::shared_ptr<Data> data_; 94 }; 95 96 /// \brief Crop an image based on location and crop size. 97 class Crop final : public TensorTransform { 98 public: 99 /// \brief Constructor. 100 /// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor}. 101 /// \param[in] size Size of the cropped area. 102 /// If the size is a single value, a squared crop of size (size, size) is returned. 103 /// If the size has 2 values, it should be (height, width). 104 Crop(std::vector<int32_t> coordinates, std::vector<int32_t> size); 105 106 /// \brief Destructor. 107 ~Crop() = default; 108 109 protected: 110 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 111 /// \return Shared pointer to TensorOperation object. 112 std::shared_ptr<TensorOperation> Parse() override; 113 114 private: 115 struct Data; 116 std::shared_ptr<Data> data_; 117 }; 118 119 /// \brief Decode the input image in RGB mode. 120 class Decode final : public TensorTransform { 121 public: 122 /// \brief Constructor. 123 /// \param[in] rgb A boolean indicating whether to decode the image in RGB mode or not. 124 explicit Decode(bool rgb = true); 125 126 /// \brief Destructor. 127 ~Decode() = default; 128 129 protected: 130 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 131 /// \return Shared pointer to TensorOperation object. 132 std::shared_ptr<TensorOperation> Parse() override; 133 134 std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override; 135 136 private: 137 struct Data; 138 std::shared_ptr<Data> data_; 139 }; 140 141 /// \brief Blur the input image with the specified Gaussian kernel. 142 class GaussianBlur final : public TensorTransform { 143 public: 144 /// \brief Constructor. 145 /// \param[in] kernel_size A vector of Gaussian kernel size for width and height. The value must be positive and odd. 146 /// \param[in] sigma A vector of Gaussian kernel standard deviation sigma for width and height. The values must be 147 /// positive. Using default value 0 means to calculate the sigma according to the kernel size. 148 GaussianBlur(const std::vector<int32_t> &kernel_size, const std::vector<float> &sigma = {0., 0.}); 149 150 /// \brief Destructor. 151 ~GaussianBlur() = default; 152 153 protected: 154 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 155 /// \return Shared pointer to TensorOperation object. 156 std::shared_ptr<TensorOperation> Parse() override; 157 158 private: 159 struct Data; 160 std::shared_ptr<Data> data_; 161 }; 162 163 /// \brief Normalize the input image with respect to mean and standard deviation. 164 class Normalize final : public TensorTransform { 165 public: 166 /// \brief Constructor. 167 /// \param[in] mean A vector of mean values for each channel, with respect to channel order. 168 /// The mean values must be in range [0.0, 255.0]. 169 /// \param[in] std A vector of standard deviations for each channel, with respect to channel order. 170 /// The standard deviation values must be in range (0.0, 255.0]. 171 Normalize(std::vector<float> mean, std::vector<float> std); 172 173 /// \brief Destructor. 174 ~Normalize() = default; 175 176 protected: 177 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 178 /// \return Shared pointer to TensorOperation object. 179 std::shared_ptr<TensorOperation> Parse() override; 180 181 std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override; 182 183 private: 184 struct Data; 185 std::shared_ptr<Data> data_; 186 }; 187 188 /// \brief Apply a Random Affine transformation on the input image in RGB or Greyscale mode. 189 class RandomAffine final : public TensorTransform { 190 public: 191 /// \brief Constructor. 192 /// \param[in] degrees A float vector of size 2, representing the starting and ending degree. 193 /// \param[in] translate_range A float vector of size 2 or 4, representing percentages of translation on x and y axes. 194 /// If the size is 2, (min_dx, max_dx, 0, 0). 195 /// If the size is 4, (min_dx, max_dx, min_dy, max_dy), 196 /// all values are in range [-1, 1]. 197 /// \param[in] scale_range A float vector of size 2, representing the starting and ending scales in the range. 198 /// \param[in] shear_ranges A float vector of size 2 or 4, representing the starting and ending shear degrees 199 /// vertically and horizontally. 200 /// If the size is 2, (min_shear_x, max_shear_x, 0, 0), 201 /// if the size is 4, (min_shear_x, max_shear_x, min_shear_y, max_shear_y). 202 /// \param[in] interpolation An enum for the mode of interpolation. 203 /// - InterpolationMode::kLinear, Interpolation method is blinear interpolation (Only supports this mode in Lite). 204 /// - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation. 205 /// - InterpolationMode::kCubic, Interpolation method is bicubic interpolation. 206 /// - InterpolationMode::kArea, Interpolation method is pixel area interpolation. 207 /// \param[in] fill_value A vector representing the value to fill the area outside the transform 208 /// in the output image. If 1 value is provided, it is used for all RGB channels. 209 /// If 3 values are provided, it is used to fill R, G and B channels respectively. 210 explicit RandomAffine(const std::vector<float_t> °rees, 211 const std::vector<float_t> &translate_range = {0.0, 0.0, 0.0, 0.0}, 212 const std::vector<float_t> &scale_range = {1.0, 1.0}, 213 const std::vector<float_t> &shear_ranges = {0.0, 0.0, 0.0, 0.0}, 214 InterpolationMode interpolation = InterpolationMode::kNearestNeighbour, 215 const std::vector<uint8_t> &fill_value = {0, 0, 0}); 216 217 /// \brief Destructor. 218 ~RandomAffine() = default; 219 220 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 221 /// \return Shared pointer to TensorOperation object. 222 std::shared_ptr<TensorOperation> Parse() override; 223 224 private: 225 struct Data; 226 std::shared_ptr<Data> data_; 227 }; 228 229 /// \brief Resize the input image to the given size. 230 class Resize final : public TensorTransform { 231 public: 232 /// \brief Constructor. 233 /// \param[in] size A vector representing the output size of the resized image. 234 /// If the size is a single value, the image will be resized to this value with 235 /// the same image aspect ratio. If the size has 2 values, it should be (height, width). 236 /// \param[in] interpolation An enum for the mode of interpolation. 237 /// - InterpolationMode::kLinear, Interpolation method is blinear interpolation (Only supports this mode in Lite). 238 /// - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation. 239 /// - InterpolationMode::kCubic, Interpolation method is bicubic interpolation. 240 /// - InterpolationMode::kArea, Interpolation method is pixel area interpolation. 241 /// - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow. 242 explicit Resize(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear); 243 244 /// \brief Destructor. 245 ~Resize() = default; 246 247 protected: 248 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 249 /// \return Shared pointer to TensorOperation object. 250 std::shared_ptr<TensorOperation> Parse() override; 251 252 std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override; 253 254 private: 255 struct Data; 256 std::shared_ptr<Data> data_; 257 }; 258 259 /// \brief Keep the original picture ratio and fills the rest. 260 class ResizePreserveAR final : public TensorTransform { 261 public: 262 /// \brief Constructor. 263 /// \param[in] height The height of image output value after resizing. 264 /// \param[in] width The width of image output value after resizing. 265 /// \param[in] img_orientation optional rotation angle. 266 /// - img_orientation = 1, Rotate 0 degree. 267 /// - img_orientation = 2, Rotate 0 degree and apply horizontal flip. 268 /// - img_orientation = 3, Rotate 180 degree. 269 /// - img_orientation = 4, Rotate 180 degree and apply horizontal flip. 270 /// - img_orientation = 5, Rotate 90 degree and apply horizontal flip. 271 /// - img_orientation = 6, Rotate 90 degree. 272 /// - img_orientation = 7, Rotate 270 degree and apply horizontal flip. 273 /// - img_orientation = 8, Rotate 270 degree. 274 ResizePreserveAR(int32_t height, int32_t width, int32_t img_orientation = 0); 275 276 /// \brief Destructor. 277 ~ResizePreserveAR() = default; 278 279 protected: 280 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 281 /// \return Shared pointer to TensorOperation object. 282 std::shared_ptr<TensorOperation> Parse() override; 283 284 private: 285 struct Data; 286 std::shared_ptr<Data> data_; 287 }; 288 289 /// \brief RGB2BGR TensorTransform. 290 /// \notes Convert the format of input image from RGB to BGR. 291 class RGB2BGR final : public TensorTransform { 292 public: 293 /// \brief Constructor. 294 RGB2BGR() = default; 295 296 /// \brief Destructor. 297 ~RGB2BGR() = default; 298 299 protected: 300 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 301 /// \return Shared pointer to TensorOperation object. 302 std::shared_ptr<TensorOperation> Parse() override; 303 }; 304 305 /// \brief RGB2GRAY TensorTransform. 306 /// \note Convert RGB image or color image to grayscale image. 307 /// \brief Convert a RGB image or color image to a grayscale one. 308 class RGB2GRAY final : public TensorTransform { 309 public: 310 /// \brief Constructor. 311 RGB2GRAY() = default; 312 313 /// \brief Destructor. 314 ~RGB2GRAY() = default; 315 316 protected: 317 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 318 /// \return Shared pointer to TensorOperation object. 319 std::shared_ptr<TensorOperation> Parse() override; 320 }; 321 322 /// \brief Rotate the input image according to parameters. 323 class Rotate final : public TensorTransform { 324 public: 325 /// \brief Constructor. 326 /// \note This api is only used in Lite, the interpolation mode is bilinear. 327 /// \param[in] angle_id The fix rotation angle. 328 /// - FixRotationAngle::k0Degree = 1, Rotate 0 degree. 329 /// - FixRotationAngle::k0DegreeAndMirror = 2, Rotate 0 degree and apply horizontal flip. 330 /// - FixRotationAngle::k180Degree = 3, Rotate 180 degree. 331 /// - FixRotationAngle::k180DegreeAndMirror = 4, Rotate 180 degree and apply horizontal flip. 332 /// - FixRotationAngle::k90DegreeAndMirror = 5, Rotate 90 degree and apply horizontal flip. 333 /// - FixRotationAngle::k90Degree = 6, Rotate 90 degree. 334 /// - FixRotationAngle::k270DegreeAndMirror = 7, Rotate 270 degree and apply horizontal flip. 335 /// - FixRotationAngle::k270Degree = 8, Rotate 270 degree. 336 explicit Rotate(FixRotationAngle angle_id = FixRotationAngle::k0Degree); 337 338 /// \brief Constructor. 339 /// \param[in] degrees A float value, representing the rotation degrees. 340 /// \param[in] resample An enum for the mode of interpolation. 341 /// - InterpolationMode::kLinear, Interpolation method is blinear interpolation. 342 /// - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation. 343 /// - InterpolationMode::kCubic, Interpolation method is bicubic interpolation. 344 /// - InterpolationMode::kArea, Interpolation method is pixel area interpolation. 345 /// \param[in] expand A boolean representing whether the image is expanded after rotation. 346 /// \param[in] center A float vector of size 2 or empty, representing the x and y center of rotation 347 /// or the center of the image. 348 /// \param[in] fill_value A vector representing the value to fill the area outside the transform 349 /// in the output image. If 1 value is provided, it is used for all RGB channels. 350 /// If 3 values are provided, it is used to fill R, G, B channels respectively. 351 Rotate(float degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false, 352 std::vector<float> center = {}, std::vector<uint8_t> fill_value = {0, 0, 0}); 353 354 /// \brief Destructor. 355 ~Rotate() = default; 356 357 protected: 358 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 359 /// \return Shared pointer to TensorOperation object. 360 std::shared_ptr<TensorOperation> Parse() override; 361 362 private: 363 std::shared_ptr<RotateOperation> op_; 364 struct Data; 365 std::shared_ptr<Data> data_; 366 }; 367 368 } // namespace vision 369 } // namespace dataset 370 } // namespace mindspore 371 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_ 372