1 /** 2 * Copyright 2020-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_ 18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_ 19 20 #include <map> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 #include "include/api/dual_abi_helper.h" 27 #include "include/api/status.h" 28 #include "include/dataset/constants.h" 29 #include "include/dataset/transforms.h" 30 #include "include/dataset/vision_lite.h" 31 32 namespace mindspore { 33 namespace dataset { 34 35 class TensorOperation; 36 37 // Transform operations for performing computer vision. 38 namespace vision { 39 40 /// \brief AdjustGamma TensorTransform. 41 /// \notes Apply gamma correction on input image. 42 class AdjustGamma final : public TensorTransform { 43 public: 44 /// \brief Constructor. 45 /// \param[in] gamma Non negative real number, which makes the output image pixel value 46 /// exponential in relation to the input image pixel value. 47 /// \param[in] gain The constant multiplier. 48 explicit AdjustGamma(float gamma, float gain = 1); 49 50 /// \brief Destructor. 51 ~AdjustGamma() = default; 52 53 protected: 54 /// \brief Function to convert TensorTransform object into a TensorOperation object. 55 /// \return Shared pointer to TensorOperation object. 56 std::shared_ptr<TensorOperation> Parse() override; 57 58 private: 59 struct Data; 60 std::shared_ptr<Data> data_; 61 }; 62 63 /// \brief Apply automatic contrast on the input image. 64 class AutoContrast final : public TensorTransform { 65 public: 66 /// \brief Constructor. 67 /// \param[in] cutoff Percent of pixels to cut off from the histogram, the valid range of cutoff value is 0 to 50. 68 /// \param[in] ignore Pixel values to ignore. 69 explicit AutoContrast(float cutoff = 0.0, std::vector<uint32_t> ignore = {}); 70 71 /// \brief Destructor. 72 ~AutoContrast() = default; 73 74 protected: 75 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 76 /// \return Shared pointer to TensorOperation object. 77 std::shared_ptr<TensorOperation> Parse() override; 78 79 private: 80 struct Data; 81 std::shared_ptr<Data> data_; 82 }; 83 84 /// \brief BoundingBoxAugment TensorTransform. 85 /// \note Apply a given image transform on a random selection of bounding box regions of a given image. 86 class BoundingBoxAugment final : public TensorTransform { 87 public: 88 /// \brief Constructor. 89 /// \param[in] transform Raw pointer to the TensorTransform operation. 90 /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3). 91 explicit BoundingBoxAugment(TensorTransform *transform, float ratio = 0.3); 92 93 /// \brief Constructor. 94 /// \param[in] transform Smart pointer to the TensorTransform operation. 95 /// \param[in] ratio Ratio of bounding boxes where augmentation is applied to. Range: [0, 1] (default=0.3). 96 explicit BoundingBoxAugment(const std::shared_ptr<TensorTransform> &transform, float ratio = 0.3); 97 98 /// \brief Constructor. 99 /// \param[in] transform Object pointer to the TensorTransform operation. 100 /// \param[in] ratio Ratio of bounding boxes where augmentation is applied to. Range: [0, 1] (default=0.3). 101 explicit BoundingBoxAugment(const std::reference_wrapper<TensorTransform> transform, float ratio = 0.3); 102 103 /// \brief Destructor. 104 ~BoundingBoxAugment() = default; 105 106 protected: 107 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 108 /// \return Shared pointer to TensorOperation object. 109 std::shared_ptr<TensorOperation> Parse() override; 110 111 private: 112 struct Data; 113 std::shared_ptr<Data> data_; 114 }; 115 116 /// \brief Change the color space of the image. 117 class ConvertColor final : public TensorTransform { 118 public: 119 /// \brief Constructor. 120 /// \param[in] convert_mode The mode of image channel conversion. 121 explicit ConvertColor(ConvertMode convert_mode); 122 123 /// \brief Destructor. 124 ~ConvertColor() = default; 125 126 protected: 127 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 128 /// \return Shared pointer to TensorOperation object. 129 std::shared_ptr<TensorOperation> Parse() override; 130 131 private: 132 struct Data; 133 std::shared_ptr<Data> data_; 134 }; 135 136 /// \brief Mask a random section of each image with the corresponding part of another randomly 137 /// selected image in that batch. 138 class CutMixBatch final : public TensorTransform { 139 public: 140 /// \brief Constructor. 141 /// \param[in] image_batch_format The format of the batch. 142 /// \param[in] alpha The hyperparameter of beta distribution (default = 1.0). 143 /// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0). 144 explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0); 145 146 /// \brief Destructor. 147 ~CutMixBatch() = default; 148 149 protected: 150 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 151 /// \return Shared pointer to TensorOperation object. 152 std::shared_ptr<TensorOperation> Parse() override; 153 154 private: 155 struct Data; 156 std::shared_ptr<Data> data_; 157 }; 158 159 /// \brief Randomly cut (mask) out a given number of square patches from the input image. 160 class CutOut final : public TensorTransform { 161 public: 162 /// \brief Constructor. 163 /// \param[in] length Integer representing the side length of each square patch. 164 /// \param[in] num_patches Integer representing the number of patches to be cut out of an image. 165 explicit CutOut(int32_t length, int32_t num_patches = 1); 166 167 /// \brief Destructor. 168 ~CutOut() = default; 169 170 protected: 171 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 172 /// \return Shared pointer to TensorOperation object. 173 std::shared_ptr<TensorOperation> Parse() override; 174 175 private: 176 struct Data; 177 std::shared_ptr<Data> data_; 178 }; 179 180 /// \brief Apply histogram equalization on the input image. 181 class Equalize final : public TensorTransform { 182 public: 183 /// \brief Constructor. 184 Equalize(); 185 186 /// \brief Destructor. 187 ~Equalize() = default; 188 189 protected: 190 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 191 /// \return Shared pointer to TensorOperation object. 192 std::shared_ptr<TensorOperation> Parse() override; 193 }; 194 195 /// \brief Flip the input image horizontally. 196 class HorizontalFlip final : public TensorTransform { 197 public: 198 /// \brief Constructor. 199 HorizontalFlip(); 200 201 /// \brief Destructor. 202 ~HorizontalFlip() = default; 203 204 protected: 205 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 206 /// \return Shared pointer to TensorOperation object. 207 std::shared_ptr<TensorOperation> Parse() override; 208 }; 209 210 /// \brief Transpose the input image; shape (H, W, C) to shape (C, H, W). 211 class HWC2CHW final : public TensorTransform { 212 public: 213 /// \brief Constructor. 214 HWC2CHW(); 215 216 /// \brief Destructor. 217 ~HWC2CHW() = default; 218 219 protected: 220 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 221 /// \return Shared pointer to TensorOperation object. 222 std::shared_ptr<TensorOperation> Parse() override; 223 }; 224 225 /// \brief Apply invert on the input image in RGB mode. 226 class Invert final : public TensorTransform { 227 public: 228 /// \brief Constructor. 229 Invert(); 230 231 /// \brief Destructor. 232 ~Invert() = default; 233 234 protected: 235 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 236 /// \return Shared pointer to TensorOperation object. 237 std::shared_ptr<TensorOperation> Parse() override; 238 }; 239 240 /// \brief Apply MixUp transformation on an input batch of images and labels. The labels must be in 241 /// one-hot format and Batch must be called before calling this function. 242 class MixUpBatch final : public TensorTransform { 243 public: 244 /// \brief Constructor. 245 /// \param[in] alpha hyperparameter of beta distribution (default = 1.0). 246 explicit MixUpBatch(float alpha = 1); 247 248 /// \brief Destructor. 249 ~MixUpBatch() = default; 250 251 protected: 252 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 253 /// \return Shared pointer to TensorOperation object. 254 std::shared_ptr<TensorOperation> Parse() override; 255 256 private: 257 struct Data; 258 std::shared_ptr<Data> data_; 259 }; 260 261 /// \brief Normalize the input image with respect to mean and standard deviation and pads an extra 262 /// channel with value zero. 263 class NormalizePad final : public TensorTransform { 264 public: 265 /// \brief Constructor. 266 /// \param[in] mean A vector of mean values for each channel, with respect to channel order. 267 /// The mean values must be in range [0.0, 255.0]. 268 /// \param[in] std A vector of standard deviations for each channel, with respect to channel order. 269 /// The standard deviation values must be in range (0.0, 255.0]. 270 /// \param[in] dtype The output datatype of Tensor. 271 /// The standard deviation values must be "float32" or "float16"(default = "float32"). 272 NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::string &dtype = "float32") NormalizePad(mean,std,StringToChar (dtype))273 : NormalizePad(mean, std, StringToChar(dtype)) {} 274 275 NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::vector<char> &dtype); 276 277 /// \brief Destructor. 278 ~NormalizePad() = default; 279 280 protected: 281 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 282 /// \return Shared pointer to TensorOperation object. 283 std::shared_ptr<TensorOperation> Parse() override; 284 285 private: 286 struct Data; 287 std::shared_ptr<Data> data_; 288 }; 289 290 /// \brief Pad the image according to padding parameters. 291 class Pad final : public TensorTransform { 292 public: 293 /// \brief Constructor. 294 /// \param[in] padding A vector representing the number of pixels to pad the image. 295 /// If the vector has one value, it pads all sides of the image with that value. 296 /// If the vector has two values, it pads left and top with the first and 297 /// right and bottom with the second value. 298 /// If the vector has four values, it pads left, top, right, and bottom with 299 /// those values respectively. 300 /// \param[in] fill_value A vector representing the pixel intensity of the borders. Only valid if the 301 /// padding_mode is BorderType.kConstant. If 1 value is provided, it is used for all RGB channels. 302 /// If 3 values are provided, it is used to fill R, G, B channels respectively. 303 /// \param[in] padding_mode The method of padding (default=BorderType.kConstant). 304 /// Can be any of 305 /// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric] 306 /// - BorderType.kConstant, means it fills the border with constant values 307 /// - BorderType.kEdge, means it pads with the last value on the edge 308 /// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge 309 /// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge 310 explicit Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0}, 311 BorderType padding_mode = BorderType::kConstant); 312 313 /// \brief Destructor. 314 ~Pad() = default; 315 316 protected: 317 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 318 /// \return Shared pointer to TensorOperation object. 319 std::shared_ptr<TensorOperation> Parse() override; 320 321 private: 322 struct Data; 323 std::shared_ptr<Data> data_; 324 }; 325 326 /// \brief Blend an image with its grayscale version with random weights 327 /// t and 1 - t generated from a given range. If the range is trivial 328 /// then the weights are determinate and t equals to the bound of the interval. 329 class RandomColor final : public TensorTransform { 330 public: 331 /// \brief Constructor. 332 /// \param[in] t_lb Lower bound random weights. 333 /// \param[in] t_ub Upper bound random weights. 334 RandomColor(float t_lb, float t_ub); 335 336 /// \brief Destructor. 337 ~RandomColor() = default; 338 339 protected: 340 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 341 /// \return Shared pointer to TensorOperation object. 342 std::shared_ptr<TensorOperation> Parse() override; 343 344 private: 345 struct Data; 346 std::shared_ptr<Data> data_; 347 }; 348 349 /// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image. 350 class RandomColorAdjust final : public TensorTransform { 351 public: 352 /// \brief Constructor. 353 /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values 354 /// if it is a vector of two values it needs to be in the form of [min, max] (Default={1, 1}). 355 /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values 356 /// if it is a vector of two values, it needs to be in the form of [min, max] (Default={1, 1}). 357 /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values 358 /// if it is a vector of two values, it needs to be in the form of [min, max] (Default={1, 1}). 359 /// \param[in] hue Hue adjustment factor. Must be a vector of one or two values 360 /// if it is a vector of two values, it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5 361 /// (Default={0, 0}). 362 explicit RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0}, 363 std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0}); 364 365 /// \brief Destructor. 366 ~RandomColorAdjust() = default; 367 368 protected: 369 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 370 /// \return Shared pointer to TensorOperation object. 371 std::shared_ptr<TensorOperation> Parse() override; 372 373 private: 374 struct Data; 375 std::shared_ptr<Data> data_; 376 }; 377 378 /// \brief Crop the input image at a random location. 379 class RandomCrop final : public TensorTransform { 380 public: 381 /// \brief Constructor. 382 /// \param[in] size A vector representing the output size of the cropped image. 383 /// If the size is a single value, a squared crop of size (size, size) is returned. 384 /// If the size has 2 values, it should be (height, width). 385 /// \param[in] padding A vector representing the number of pixels to pad the image. 386 /// If the vector has one value, it pads all sides of the image with that value. 387 /// If the vector has two values, it pads left and top with the first and 388 /// right and bottom with the second value. 389 /// If the vector has four values, it pads left, top, right, and bottom with 390 /// those values respectively. 391 /// \param[in] pad_if_needed A boolean indicating that whether to pad the image 392 /// if either side is smaller than the given output size. 393 /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is 394 /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels. 395 /// If 3 values are provided, it is used to fill R, G, B channels respectively. 396 /// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of 397 /// [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric]. 398 /// - BorderType::kConstant, Fill the border with constant values. 399 /// - BorderType::kEdge, Fill the border with the last value on the edge. 400 /// - BorderType::kReflect, Reflect the values on the edge omitting the last value of edge. 401 /// - BorderType::kSymmetric, Reflect the values on the edge repeating the last value of edge. 402 /// \note If the input image is more than one, then make sure that the image size is the same. 403 explicit RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0}, 404 bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0}, 405 BorderType padding_mode = BorderType::kConstant); 406 407 /// \brief Destructor. 408 ~RandomCrop() = default; 409 410 protected: 411 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 412 /// \return Shared pointer to TensorOperation object. 413 std::shared_ptr<TensorOperation> Parse() override; 414 415 private: 416 struct Data; 417 std::shared_ptr<Data> data_; 418 }; 419 420 /// \brief Equivalent to RandomResizedCrop TensorTransform, but crop the image before decoding. 421 class RandomCropDecodeResize final : public TensorTransform { 422 public: 423 /// \brief Constructor. 424 /// \param[in] size A vector representing the output size of the cropped image. 425 /// If the size is a single value, a squared crop of size (size, size) is returned. 426 /// If the size has 2 values, it should be (height, width). 427 /// \param[in] scale Range [min, max) of respective size of the 428 /// original size to be cropped (default=(0.08, 1.0)). 429 /// \param[in] ratio Range [min, max) of aspect ratio to be 430 /// cropped (default=(3. / 4., 4. / 3.)). 431 /// \param[in] interpolation An enum for the mode of interpolation. 432 /// - InterpolationMode::kLinear, Interpolation method is blinear interpolation. 433 /// - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation. 434 /// - InterpolationMode::kCubic, Interpolation method is bicubic interpolation. 435 /// - InterpolationMode::kArea, Interpolation method is pixel area interpolation. 436 /// - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow. 437 /// \param[in] max_attempts The maximum number of attempts to propose a valid crop_area (default=10). 438 /// If exceeded, fall back to use center_crop instead. 439 explicit RandomCropDecodeResize(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, 440 std::vector<float> ratio = {3. / 4, 4. / 3}, 441 InterpolationMode interpolation = InterpolationMode::kLinear, 442 int32_t max_attempts = 10); 443 444 /// \brief Destructor. 445 ~RandomCropDecodeResize() = default; 446 447 protected: 448 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 449 /// \return Shared pointer to TensorOperation object. 450 std::shared_ptr<TensorOperation> Parse() override; 451 452 private: 453 struct Data; 454 std::shared_ptr<Data> data_; 455 }; 456 457 /// \brief Crop the input image at a random location and adjust bounding boxes accordingly. 458 /// If the cropped area is out of bbox, the returned bbox will be empty. 459 class RandomCropWithBBox final : public TensorTransform { 460 public: 461 /// \brief Constructor. 462 /// \param[in] size A vector representing the output size of the cropped image. 463 /// If the size is a single value, a squared crop of size (size, size) is returned. 464 /// If the size has 2 values, it should be (height, width). 465 /// \param[in] padding A vector representing the number of pixels to pad the image 466 /// If the vector has one value, it pads all sides of the image with that value. 467 /// If the vector has two values, it pads left and top with the first and 468 /// right and bottom with the second value. 469 /// If the vector has four values, it pads left, top, right, and bottom with 470 /// those values respectively. 471 /// \param[in] pad_if_needed A boolean indicating that whether to pad the image 472 /// if either side is smaller than the given output size. 473 /// \param[in] fill_value A vector representing the pixel intensity of the borders. Only valid 474 /// if the padding_mode is BorderType.kConstant. If 1 value is provided, it is used for all 475 /// RGB channels. If 3 values are provided, it is used to fill R, G, B channels respectively. 476 /// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of 477 /// [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric]. 478 /// - BorderType::kConstant, Fill the border with constant values. 479 /// - BorderType::kEdge, Fill the border with the last value on the edge. 480 /// - BorderType::kReflect, Reflect the values on the edge omitting the last value of edge. 481 /// - BorderType::kSymmetric, Reflect the values on the edge repeating the last value of edge. 482 explicit RandomCropWithBBox(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0}, 483 bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0}, 484 BorderType padding_mode = BorderType::kConstant); 485 486 /// \brief Destructor. 487 ~RandomCropWithBBox() = default; 488 489 protected: 490 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 491 /// \return Shared pointer to TensorOperation object. 492 std::shared_ptr<TensorOperation> Parse() override; 493 494 private: 495 struct Data; 496 std::shared_ptr<Data> data_; 497 }; 498 499 /// \brief Randomly flip the input image horizontally with a given probability. 500 class RandomHorizontalFlip final : public TensorTransform { 501 public: 502 /// \brief Constructor. 503 /// \param[in] prob A float representing the probability of flip. 504 explicit RandomHorizontalFlip(float prob = 0.5); 505 506 /// \brief Destructor. 507 ~RandomHorizontalFlip() = default; 508 509 protected: 510 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 511 /// \return Shared pointer to TensorOperation object. 512 std::shared_ptr<TensorOperation> Parse() override; 513 514 private: 515 struct Data; 516 std::shared_ptr<Data> data_; 517 }; 518 519 /// \brief Randomly flip the input image horizontally with a given probability and adjust bounding boxes accordingly. 520 class RandomHorizontalFlipWithBBox final : public TensorTransform { 521 public: 522 /// \brief Constructor. 523 /// \param[in] prob A float representing the probability of flip. 524 explicit RandomHorizontalFlipWithBBox(float prob = 0.5); 525 526 /// \brief Destructor. 527 ~RandomHorizontalFlipWithBBox() = default; 528 529 protected: 530 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 531 /// \return Shared pointer to TensorOperation object. 532 std::shared_ptr<TensorOperation> Parse() override; 533 534 private: 535 struct Data; 536 std::shared_ptr<Data> data_; 537 }; 538 539 /// \brief Reduce the number of bits for each color channel randomly. 540 class RandomPosterize final : public TensorTransform { 541 public: 542 /// \brief Constructor. 543 /// \param[in] bit_range Range of random posterize to compress image. 544 /// uint8_t vector representing the minimum and maximum bit in range of [1,8] (Default={4, 8}). 545 explicit RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8}); 546 547 /// \brief Destructor. 548 ~RandomPosterize() = default; 549 550 protected: 551 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 552 /// \return Shared pointer to TensorOperation object. 553 std::shared_ptr<TensorOperation> Parse() override; 554 555 private: 556 struct Data; 557 std::shared_ptr<Data> data_; 558 }; 559 560 /// \brief Resize the input image using a randomly selected interpolation mode. 561 class RandomResize final : public TensorTransform { 562 public: 563 /// \brief Constructor. 564 /// \param[in] size A vector representing the output size of the resized image. 565 /// If the size is a single value, the smaller edge of the image will be resized to this value with 566 /// the same image aspect ratio. If the size has 2 values, it should be (height, width). 567 explicit RandomResize(std::vector<int32_t> size); 568 569 /// \brief Destructor. 570 ~RandomResize() = default; 571 572 protected: 573 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 574 /// \return Shared pointer to TensorOperation object. 575 std::shared_ptr<TensorOperation> Parse() override; 576 577 private: 578 struct Data; 579 std::shared_ptr<Data> data_; 580 }; 581 582 /// \brief Resize the input image using a randomly selected interpolation mode and adjust 583 /// bounding boxes accordingly. 584 class RandomResizeWithBBox final : public TensorTransform { 585 public: 586 /// \brief Constructor. 587 /// \param[in] size A vector representing the output size of the resized image. 588 /// If the size is a single value, the smaller edge of the image will be resized to this value with 589 /// the same image aspect ratio. If the size has 2 values, it should be (height, width). 590 explicit RandomResizeWithBBox(std::vector<int32_t> size); 591 592 /// \brief Destructor. 593 ~RandomResizeWithBBox() = default; 594 595 protected: 596 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 597 /// \return Shared pointer to TensorOperation object. 598 std::shared_ptr<TensorOperation> Parse() override; 599 600 private: 601 struct Data; 602 std::shared_ptr<Data> data_; 603 }; 604 605 /// \brief Crop the input image to a random size and aspect ratio. 606 class RandomResizedCrop final : public TensorTransform { 607 public: 608 /// \brief Constructor. 609 /// \param[in] size A vector representing the output size of the cropped image. 610 /// If the size is a single value, a squared crop of size (size, size) is returned. 611 /// If the size has 2 values, it should be (height, width). 612 /// \param[in] scale Range [min, max) of respective size of the original 613 /// size to be cropped (default=(0.08, 1.0)). 614 /// \param[in] ratio Range [min, max) of aspect ratio to be cropped 615 /// (default=(3. / 4., 4. / 3.)). 616 /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear). 617 /// - InterpolationMode::kLinear, Interpolation method is blinear interpolation. 618 /// - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation. 619 /// - InterpolationMode::kCubic, Interpolation method is bicubic interpolation. 620 /// - InterpolationMode::kArea, Interpolation method is pixel area interpolation. 621 /// - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow. 622 /// \param[in] max_attempts The maximum number of attempts to propose a valid. 623 /// crop_area (default=10). If exceeded, fall back to use center_crop instead. 624 /// \note If the input image is more than one, then make sure that the image size is the same. 625 explicit RandomResizedCrop(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, 626 std::vector<float> ratio = {3. / 4., 4. / 3.}, 627 InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10); 628 629 /// \brief Destructor. 630 ~RandomResizedCrop() = default; 631 632 protected: 633 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 634 /// \return Shared pointer to TensorOperation object. 635 std::shared_ptr<TensorOperation> Parse() override; 636 637 private: 638 struct Data; 639 std::shared_ptr<Data> data_; 640 }; 641 642 /// \brief Crop the input image to a random size and aspect ratio. 643 /// If cropped area is out of bbox, the return bbox will be empty. 644 class RandomResizedCropWithBBox final : public TensorTransform { 645 public: 646 /// \brief Constructor. 647 /// \param[in] size A vector representing the output size of the cropped image. 648 /// If the size is a single value, a squared crop of size (size, size) is returned. 649 /// If the size has 2 values, it should be (height, width). 650 /// \param[in] scale Range [min, max) of respective size of the original 651 /// size to be cropped (default=(0.08, 1.0)). 652 /// \param[in] ratio Range [min, max) of aspect ratio to be cropped 653 /// (default=(3. / 4., 4. / 3.)). 654 /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear). 655 /// - InterpolationMode::kLinear, Interpolation method is blinear interpolation. 656 /// - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation. 657 /// - InterpolationMode::kCubic, Interpolation method is bicubic interpolation. 658 /// - InterpolationMode::kArea, Interpolation method is pixel area interpolation. 659 /// - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow. 660 /// \param[in] max_attempts The maximum number of attempts to propose a valid 661 /// crop_area (default=10). If exceeded, fall back to use center_crop instead. 662 RandomResizedCropWithBBox(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, 663 std::vector<float> ratio = {3. / 4., 4. / 3.}, 664 InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10); 665 666 /// \brief Destructor. 667 ~RandomResizedCropWithBBox() = default; 668 669 protected: 670 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 671 /// \return Shared pointer to TensorOperation object. 672 std::shared_ptr<TensorOperation> Parse() override; 673 674 private: 675 struct Data; 676 std::shared_ptr<Data> data_; 677 }; 678 679 /// \brief Rotate the image according to parameters. 680 class RandomRotation final : public TensorTransform { 681 public: 682 /// \brief Constructor. 683 /// \param[in] degrees A float vector of size 2, representing the starting and ending degrees. 684 /// \param[in] resample An enum for the mode of interpolation. 685 /// - InterpolationMode::kLinear, Interpolation method is blinear interpolation. 686 /// - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation. 687 /// - InterpolationMode::kCubic, Interpolation method is bicubic interpolation. 688 /// - InterpolationMode::kArea, Interpolation method is pixel area interpolation. 689 /// - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow. 690 /// \param[in] expand A boolean representing whether the image is expanded after rotation. 691 /// \param[in] center A float vector of size 2 or empty, representing the x and y center of rotation 692 /// or the center of the image. 693 /// \param[in] fill_value A vector representing the value to fill the area outside the transform 694 /// in the output image. If 1 value is provided, it is used for all RGB channels. 695 /// If 3 values are provided, it is used to fill R, G, B channels respectively. 696 RandomRotation(std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, 697 bool expand = false, std::vector<float> center = {}, std::vector<uint8_t> fill_value = {0, 0, 0}); 698 699 /// \brief Destructor. 700 ~RandomRotation() = default; 701 702 protected: 703 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 704 /// \return Shared pointer to TensorOperation object. 705 std::shared_ptr<TensorOperation> Parse() override; 706 707 private: 708 struct Data; 709 std::shared_ptr<Data> data_; 710 }; 711 712 /// \brief Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples 713 /// (operation, prob), where operation is a TensorTransform operation and prob is the probability that this 714 /// operation will be applied. Once a sub-policy is selected, each operation within the sub-policy with be 715 /// applied in sequence according to its probability. 716 class RandomSelectSubpolicy final : public TensorTransform { 717 public: 718 /// \brief Constructor. 719 /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are raw pointers. 720 explicit RandomSelectSubpolicy(const std::vector<std::vector<std::pair<TensorTransform *, double>>> &policy); 721 722 /// \brief Constructor. 723 /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are shared pointers. 724 explicit RandomSelectSubpolicy( 725 const std::vector<std::vector<std::pair<std::shared_ptr<TensorTransform>, double>>> &policy); 726 727 /// \brief Constructor. 728 /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are object pointers. 729 explicit RandomSelectSubpolicy( 730 const std::vector<std::vector<std::pair<std::reference_wrapper<TensorTransform>, double>>> &policy); 731 732 /// \brief Destructor. 733 ~RandomSelectSubpolicy() = default; 734 735 protected: 736 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 737 /// \return Shared pointer to TensorOperation object. 738 std::shared_ptr<TensorOperation> Parse() override; 739 740 private: 741 struct Data; 742 std::shared_ptr<Data> data_; 743 }; 744 745 /// \brief Adjust the sharpness of the input image by a fixed or random degree. 746 class RandomSharpness final : public TensorTransform { 747 public: 748 /// \brief Constructor. 749 /// \param[in] degrees A float vector of size 2, representing the range of random sharpness 750 /// adjustment degrees. It should be in (min, max) format. If min=max, then it is a 751 /// single fixed magnitude operation (default = (0.1, 1.9)). 752 explicit RandomSharpness(std::vector<float> degrees = {0.1, 1.9}); 753 754 /// \brief Destructor. 755 ~RandomSharpness() = default; 756 757 protected: 758 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 759 /// \return Shared pointer to TensorOperation object. 760 std::shared_ptr<TensorOperation> Parse() override; 761 762 private: 763 struct Data; 764 std::shared_ptr<Data> data_; 765 }; 766 767 /// \brief Invert pixels randomly within a specified range. 768 class RandomSolarize final : public TensorTransform { 769 public: 770 /// \brief Constructor. 771 /// \param[in] threshold A vector with two elements specifying the pixel range to invert. 772 /// Threshold values should always be in (min, max) format. 773 /// If min=max, it will to invert all pixels above min(max). 774 explicit RandomSolarize(std::vector<uint8_t> threshold = {0, 255}); 775 776 /// \brief Destructor. 777 ~RandomSolarize() = default; 778 779 protected: 780 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 781 /// \return Shared pointer to TensorOperation object. 782 std::shared_ptr<TensorOperation> Parse() override; 783 784 private: 785 struct Data; 786 std::shared_ptr<Data> data_; 787 }; 788 789 /// \brief Randomly flip the input image vertically with a given probability. 790 class RandomVerticalFlip final : public TensorTransform { 791 public: 792 /// \brief Constructor. 793 /// \param[in] prob A float representing the probability of flip. 794 explicit RandomVerticalFlip(float prob = 0.5); 795 796 /// \brief Destructor. 797 ~RandomVerticalFlip() = default; 798 799 protected: 800 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 801 /// \return Shared pointer to TensorOperation object. 802 std::shared_ptr<TensorOperation> Parse() override; 803 804 private: 805 struct Data; 806 std::shared_ptr<Data> data_; 807 }; 808 809 /// \brief Randomly flip the input image vertically with a given probability and adjust bounding boxes accordingly. 810 class RandomVerticalFlipWithBBox final : public TensorTransform { 811 public: 812 /// \brief Constructor. 813 /// \param[in] prob A float representing the probability of flip. 814 explicit RandomVerticalFlipWithBBox(float prob = 0.5); 815 816 /// \brief Destructor. 817 ~RandomVerticalFlipWithBBox() = default; 818 819 protected: 820 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 821 /// \return Shared pointer to TensorOperation object. 822 std::shared_ptr<TensorOperation> Parse() override; 823 824 private: 825 struct Data; 826 std::shared_ptr<Data> data_; 827 }; 828 829 /// \brief Rescale the pixel value of input image. 830 class Rescale final : public TensorTransform { 831 public: 832 /// \brief Constructor. 833 /// \param[in] rescale Rescale factor. 834 /// \param[in] shift Shift factor. 835 Rescale(float rescale, float shift); 836 837 /// \brief Destructor. 838 ~Rescale() = default; 839 840 protected: 841 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 842 /// \return Shared pointer to TensorOperation object. 843 std::shared_ptr<TensorOperation> Parse() override; 844 845 private: 846 struct Data; 847 std::shared_ptr<Data> data_; 848 }; 849 850 /// \brief Resize the input image to the given size and adjust bounding boxes accordingly. 851 class ResizeWithBBox final : public TensorTransform { 852 public: 853 /// \brief Constructor. 854 /// \param[in] size The output size of the resized image. 855 /// If the size is an integer, smaller edge of the image will be resized to this value with the same image aspect 856 /// ratio. If the size is a sequence of length 2, it should be (height, width). 857 /// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kLinear). 858 /// - InterpolationMode::kLinear, Interpolation method is blinear interpolation. 859 /// - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation. 860 /// - InterpolationMode::kCubic, Interpolation method is bicubic interpolation. 861 /// - InterpolationMode::kArea, Interpolation method is pixel area interpolation. 862 /// - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow. 863 explicit ResizeWithBBox(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear); 864 865 /// \brief Destructor. 866 ~ResizeWithBBox() = default; 867 868 protected: 869 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 870 /// \return Shared pointer to TensorOperation object. 871 std::shared_ptr<TensorOperation> Parse() override; 872 873 private: 874 struct Data; 875 std::shared_ptr<Data> data_; 876 }; 877 878 /// \brief Change the format of input tensor from 4-channel RGBA to 3-channel BGR. 879 class RGBA2BGR final : public TensorTransform { 880 public: 881 /// \brief Constructor. 882 RGBA2BGR(); 883 884 /// \brief Destructor. 885 ~RGBA2BGR() = default; 886 887 protected: 888 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 889 /// \return Shared pointer to TensorOperation object. 890 std::shared_ptr<TensorOperation> Parse() override; 891 }; 892 893 /// \brief Change the input 4 channel RGBA tensor to 3 channel RGB. 894 class RGBA2RGB final : public TensorTransform { 895 public: 896 /// \brief Constructor. 897 RGBA2RGB(); 898 899 /// \brief Destructor. 900 ~RGBA2RGB() = default; 901 902 protected: 903 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 904 /// \return Shared pointer to TensorOperation object. 905 std::shared_ptr<TensorOperation> Parse() override; 906 }; 907 908 /// \note Slice the tensor to multiple patches in horizontal and vertical directions. 909 class SlicePatches final : public TensorTransform { 910 public: 911 /// \brief Constructor. 912 /// \param[in] num_height The number of patches in vertical direction (default=1). 913 /// \param[in] num_width The number of patches in horizontal direction (default=1). 914 /// \param[in] slice_mode An enum for the mode of slice (default=SliceMode::kPad). 915 /// \param[in] fill_value A value representing the pixel to fill the padding area in right and 916 /// bottom border if slice_mode is kPad. Then padded tensor could be just sliced to multiple patches (default=0). 917 /// \note The usage scenerio is suitable to tensor with large height and width. The tensor will keep the same 918 /// if set both num_height and num_width to 1. And the number of output tensors is equal to num_height*num_width. 919 SlicePatches(int32_t num_height = 1, int32_t num_width = 1, SliceMode slice_mode = SliceMode::kPad, 920 uint8_t fill_value = 0); 921 922 /// \brief Destructor. 923 ~SlicePatches() = default; 924 925 protected: 926 /// \brief Function to convert TensorTransform object into a TensorOperation object. 927 /// \return Shared pointer to TensorOperation object. 928 std::shared_ptr<TensorOperation> Parse() override; 929 930 private: 931 struct Data; 932 std::shared_ptr<Data> data_; 933 }; 934 935 /// \brief Decode, randomly crop and resize a JPEG image using the simulation algorithm of 936 /// Ascend series chip DVPP module. The application scenario is consistent with SoftDvppDecodeResizeJpeg. 937 /// The input image size should be in range [32*32, 8192*8192]. 938 /// The zoom-out and zoom-in multiples of the image length and width should be in the range [1/32, 16]. 939 /// Only images with an even resolution can be output. The output of odd resolution is not supported. 940 class SoftDvppDecodeRandomCropResizeJpeg final : public TensorTransform { 941 public: 942 /// \brief Constructor. 943 /// \param[in] size A vector representing the output size of the resized image. 944 /// If the size is a single value, smaller edge of the image will be resized to this value with 945 /// the same image aspect ratio. If the size has 2 values, it should be (height, width). 946 /// \param[in] scale Range [min, max) of respective size of the original 947 /// size to be cropped (default=(0.08, 1.0)). 948 /// \param[in] ratio Range [min, max) of aspect ratio to be cropped 949 /// (default=(3. / 4., 4. / 3.)). 950 /// \param[in] max_attempts The maximum number of attempts to propose a valid 951 /// crop_area (default=10). If exceeded, fall back to use center_crop instead. 952 SoftDvppDecodeRandomCropResizeJpeg(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, 953 std::vector<float> ratio = {3. / 4., 4. / 3.}, int32_t max_attempts = 10); 954 955 /// \brief Destructor. 956 ~SoftDvppDecodeRandomCropResizeJpeg() = default; 957 958 protected: 959 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 960 /// \return Shared pointer to TensorOperation object. 961 std::shared_ptr<TensorOperation> Parse() override; 962 963 private: 964 struct Data; 965 std::shared_ptr<Data> data_; 966 }; 967 968 /// \brief Decode and resize a JPEG image using the simulation algorithm of Ascend series 969 /// chip DVPP module. It is recommended to use this algorithm in the following scenarios: 970 /// When training, the DVPP of the Ascend chip is not used, 971 /// and the DVPP of the Ascend chip is used during inference, 972 /// and the accuracy of inference is lower than the accuracy of training; 973 /// and the input image size should be in range [32*32, 8192*8192]. 974 /// The zoom-out and zoom-in multiples of the image length and width should be in the range [1/32, 16]. 975 /// Only images with an even resolution can be output. The output of odd resolution is not supported. 976 class SoftDvppDecodeResizeJpeg final : public TensorTransform { 977 public: 978 /// \brief Constructor. 979 /// \param[in] size A vector representing the output size of the resized image. 980 /// If the size is a single value, smaller edge of the image will be resized to this value with 981 /// the same image aspect ratio. If the size has 2 values, it should be (height, width). 982 explicit SoftDvppDecodeResizeJpeg(std::vector<int32_t> size); 983 984 /// \brief Destructor. 985 ~SoftDvppDecodeResizeJpeg() = default; 986 987 protected: 988 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 989 /// \return Shared pointer to TensorOperation object. 990 std::shared_ptr<TensorOperation> Parse() override; 991 992 private: 993 struct Data; 994 std::shared_ptr<Data> data_; 995 }; 996 997 /// \brief Swap the red and blue channels of the input image. 998 class SwapRedBlue final : public TensorTransform { 999 public: 1000 /// \brief Constructor. 1001 SwapRedBlue(); 1002 1003 /// \brief Destructor. 1004 ~SwapRedBlue() = default; 1005 1006 protected: 1007 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 1008 /// \return Shared pointer to TensorOperation object. 1009 std::shared_ptr<TensorOperation> Parse() override; 1010 }; 1011 1012 /// \brief Randomly perform transformations, as selected from input transform list, on the input tensor. 1013 class UniformAugment final : public TensorTransform { 1014 public: 1015 /// \brief Constructor. 1016 /// \param[in] transforms Raw pointer to vector of TensorTransform operations. 1017 /// \param[in] num_ops An integer representing the number of operations to be selected and applied. 1018 explicit UniformAugment(const std::vector<TensorTransform *> &transforms, int32_t num_ops = 2); 1019 1020 /// \brief Constructor. 1021 /// \param[in] transforms Smart pointer to vector of TensorTransform operations. 1022 /// \param[in] num_ops An integer representing the number of operations to be selected and applied. 1023 explicit UniformAugment(const std::vector<std::shared_ptr<TensorTransform>> &transforms, int32_t num_ops = 2); 1024 1025 /// \brief Constructor. 1026 /// \param[in] transforms Object pointer to vector of TensorTransform operations. 1027 /// \param[in] num_ops An integer representing the number of operations to be selected and applied. 1028 explicit UniformAugment(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, int32_t num_ops = 2); 1029 1030 /// \brief Destructor. 1031 ~UniformAugment() = default; 1032 1033 protected: 1034 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 1035 /// \return Shared pointer to TensorOperation object. 1036 std::shared_ptr<TensorOperation> Parse() override; 1037 1038 private: 1039 struct Data; 1040 std::shared_ptr<Data> data_; 1041 }; 1042 1043 /// \brief Flip the input image vertically. 1044 class VerticalFlip final : public TensorTransform { 1045 public: 1046 /// \brief Constructor. 1047 VerticalFlip(); 1048 1049 /// \brief Destructor. 1050 ~VerticalFlip() = default; 1051 1052 protected: 1053 /// \brief The function to convert a TensorTransform object into a TensorOperation object. 1054 /// \return Shared pointer to TensorOperation object. 1055 std::shared_ptr<TensorOperation> Parse() override; 1056 }; 1057 1058 } // namespace vision 1059 } // namespace dataset 1060 } // namespace mindspore 1061 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_ 1062