• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 #include "include/api/dual_abi_helper.h"
27 #include "include/api/status.h"
28 #include "include/dataset/constants.h"
29 #include "include/dataset/transforms.h"
30 #include "include/dataset/vision_lite.h"
31 
32 namespace mindspore {
33 namespace dataset {
34 
35 class TensorOperation;
36 
37 // Transform operations for performing computer vision.
38 namespace vision {
39 
40 /// \brief AdjustGamma TensorTransform.
41 /// \notes Apply gamma correction on input image.
42 class AdjustGamma final : public TensorTransform {
43  public:
44   /// \brief Constructor.
45   /// \param[in] gamma Non negative real number, which makes the output image pixel value
46   ///     exponential in relation to the input image pixel value.
47   /// \param[in] gain The constant multiplier.
48   explicit AdjustGamma(float gamma, float gain = 1);
49 
50   /// \brief Destructor.
51   ~AdjustGamma() = default;
52 
53  protected:
54   /// \brief Function to convert TensorTransform object into a TensorOperation object.
55   /// \return Shared pointer to TensorOperation object.
56   std::shared_ptr<TensorOperation> Parse() override;
57 
58  private:
59   struct Data;
60   std::shared_ptr<Data> data_;
61 };
62 
63 /// \brief Apply automatic contrast on the input image.
64 class AutoContrast final : public TensorTransform {
65  public:
66   /// \brief Constructor.
67   /// \param[in] cutoff Percent of pixels to cut off from the histogram, the valid range of cutoff value is 0 to 50.
68   /// \param[in] ignore Pixel values to ignore.
69   explicit AutoContrast(float cutoff = 0.0, std::vector<uint32_t> ignore = {});
70 
71   /// \brief Destructor.
72   ~AutoContrast() = default;
73 
74  protected:
75   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
76   /// \return Shared pointer to TensorOperation object.
77   std::shared_ptr<TensorOperation> Parse() override;
78 
79  private:
80   struct Data;
81   std::shared_ptr<Data> data_;
82 };
83 
84 /// \brief BoundingBoxAugment TensorTransform.
85 /// \note  Apply a given image transform on a random selection of bounding box regions of a given image.
86 class BoundingBoxAugment final : public TensorTransform {
87  public:
88   /// \brief Constructor.
89   /// \param[in] transform Raw pointer to the TensorTransform operation.
90   /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
91   explicit BoundingBoxAugment(TensorTransform *transform, float ratio = 0.3);
92 
93   /// \brief Constructor.
94   /// \param[in] transform Smart pointer to the TensorTransform operation.
95   /// \param[in] ratio Ratio of bounding boxes where augmentation is applied to. Range: [0, 1] (default=0.3).
96   explicit BoundingBoxAugment(const std::shared_ptr<TensorTransform> &transform, float ratio = 0.3);
97 
98   /// \brief Constructor.
99   /// \param[in] transform Object pointer to the TensorTransform operation.
100   /// \param[in] ratio Ratio of bounding boxes where augmentation is applied to. Range: [0, 1] (default=0.3).
101   explicit BoundingBoxAugment(const std::reference_wrapper<TensorTransform> transform, float ratio = 0.3);
102 
103   /// \brief Destructor.
104   ~BoundingBoxAugment() = default;
105 
106  protected:
107   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
108   /// \return Shared pointer to TensorOperation object.
109   std::shared_ptr<TensorOperation> Parse() override;
110 
111  private:
112   struct Data;
113   std::shared_ptr<Data> data_;
114 };
115 
116 /// \brief Change the color space of the image.
117 class ConvertColor final : public TensorTransform {
118  public:
119   /// \brief Constructor.
120   /// \param[in] convert_mode The mode of image channel conversion.
121   explicit ConvertColor(ConvertMode convert_mode);
122 
123   /// \brief Destructor.
124   ~ConvertColor() = default;
125 
126  protected:
127   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
128   /// \return Shared pointer to TensorOperation object.
129   std::shared_ptr<TensorOperation> Parse() override;
130 
131  private:
132   struct Data;
133   std::shared_ptr<Data> data_;
134 };
135 
136 /// \brief Mask a random section of each image with the corresponding part of another randomly
137 ///     selected image in that batch.
138 class CutMixBatch final : public TensorTransform {
139  public:
140   /// \brief Constructor.
141   /// \param[in] image_batch_format The format of the batch.
142   /// \param[in] alpha The hyperparameter of beta distribution (default = 1.0).
143   /// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0).
144   explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0);
145 
146   /// \brief Destructor.
147   ~CutMixBatch() = default;
148 
149  protected:
150   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
151   /// \return Shared pointer to TensorOperation object.
152   std::shared_ptr<TensorOperation> Parse() override;
153 
154  private:
155   struct Data;
156   std::shared_ptr<Data> data_;
157 };
158 
159 /// \brief Randomly cut (mask) out a given number of square patches from the input image.
160 class CutOut final : public TensorTransform {
161  public:
162   /// \brief Constructor.
163   /// \param[in] length Integer representing the side length of each square patch.
164   /// \param[in] num_patches Integer representing the number of patches to be cut out of an image.
165   explicit CutOut(int32_t length, int32_t num_patches = 1);
166 
167   /// \brief Destructor.
168   ~CutOut() = default;
169 
170  protected:
171   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
172   /// \return Shared pointer to TensorOperation object.
173   std::shared_ptr<TensorOperation> Parse() override;
174 
175  private:
176   struct Data;
177   std::shared_ptr<Data> data_;
178 };
179 
180 /// \brief Apply histogram equalization on the input image.
181 class Equalize final : public TensorTransform {
182  public:
183   /// \brief Constructor.
184   Equalize();
185 
186   /// \brief Destructor.
187   ~Equalize() = default;
188 
189  protected:
190   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
191   /// \return Shared pointer to TensorOperation object.
192   std::shared_ptr<TensorOperation> Parse() override;
193 };
194 
195 /// \brief Flip the input image horizontally.
196 class HorizontalFlip final : public TensorTransform {
197  public:
198   /// \brief Constructor.
199   HorizontalFlip();
200 
201   /// \brief Destructor.
202   ~HorizontalFlip() = default;
203 
204  protected:
205   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
206   /// \return Shared pointer to TensorOperation object.
207   std::shared_ptr<TensorOperation> Parse() override;
208 };
209 
210 /// \brief Transpose the input image; shape (H, W, C) to shape (C, H, W).
211 class HWC2CHW final : public TensorTransform {
212  public:
213   /// \brief Constructor.
214   HWC2CHW();
215 
216   /// \brief Destructor.
217   ~HWC2CHW() = default;
218 
219  protected:
220   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
221   /// \return Shared pointer to TensorOperation object.
222   std::shared_ptr<TensorOperation> Parse() override;
223 };
224 
225 /// \brief Apply invert on the input image in RGB mode.
226 class Invert final : public TensorTransform {
227  public:
228   /// \brief Constructor.
229   Invert();
230 
231   /// \brief Destructor.
232   ~Invert() = default;
233 
234  protected:
235   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
236   /// \return Shared pointer to TensorOperation object.
237   std::shared_ptr<TensorOperation> Parse() override;
238 };
239 
240 /// \brief Apply MixUp transformation on an input batch of images and labels. The labels must be in
241 ///     one-hot format and Batch must be called before calling this function.
242 class MixUpBatch final : public TensorTransform {
243  public:
244   /// \brief Constructor.
245   /// \param[in] alpha hyperparameter of beta distribution (default = 1.0).
246   explicit MixUpBatch(float alpha = 1);
247 
248   /// \brief Destructor.
249   ~MixUpBatch() = default;
250 
251  protected:
252   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
253   /// \return Shared pointer to TensorOperation object.
254   std::shared_ptr<TensorOperation> Parse() override;
255 
256  private:
257   struct Data;
258   std::shared_ptr<Data> data_;
259 };
260 
261 /// \brief Normalize the input image with respect to mean and standard deviation and pads an extra
262 ///     channel with value zero.
263 class NormalizePad final : public TensorTransform {
264  public:
265   /// \brief Constructor.
266   /// \param[in] mean A vector of mean values for each channel, with respect to channel order.
267   ///     The mean values must be in range [0.0, 255.0].
268   /// \param[in] std A vector of standard deviations for each channel, with respect to channel order.
269   ///     The standard deviation values must be in range (0.0, 255.0].
270   /// \param[in] dtype The output datatype of Tensor.
271   ///     The standard deviation values must be "float32" or "float16"(default = "float32").
272   NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::string &dtype = "float32")
NormalizePad(mean,std,StringToChar (dtype))273       : NormalizePad(mean, std, StringToChar(dtype)) {}
274 
275   NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::vector<char> &dtype);
276 
277   /// \brief Destructor.
278   ~NormalizePad() = default;
279 
280  protected:
281   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
282   /// \return Shared pointer to TensorOperation object.
283   std::shared_ptr<TensorOperation> Parse() override;
284 
285  private:
286   struct Data;
287   std::shared_ptr<Data> data_;
288 };
289 
290 /// \brief Pad the image according to padding parameters.
291 class Pad final : public TensorTransform {
292  public:
293   /// \brief Constructor.
294   /// \param[in] padding A vector representing the number of pixels to pad the image.
295   ///    If the vector has one value, it pads all sides of the image with that value.
296   ///    If the vector has two values, it pads left and top with the first and
297   ///    right and bottom with the second value.
298   ///    If the vector has four values, it pads left, top, right, and bottom with
299   ///    those values respectively.
300   /// \param[in] fill_value A vector representing the pixel intensity of the borders. Only valid if the
301   ///    padding_mode is BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
302   ///    If 3 values are provided, it is used to fill R, G, B channels respectively.
303   /// \param[in] padding_mode The method of padding (default=BorderType.kConstant).
304   ///    Can be any of
305   ///    [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
306   ///    - BorderType.kConstant, means it fills the border with constant values
307   ///    - BorderType.kEdge, means it pads with the last value on the edge
308   ///    - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
309   ///    - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
310   explicit Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
311                BorderType padding_mode = BorderType::kConstant);
312 
313   /// \brief Destructor.
314   ~Pad() = default;
315 
316  protected:
317   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
318   /// \return Shared pointer to TensorOperation object.
319   std::shared_ptr<TensorOperation> Parse() override;
320 
321  private:
322   struct Data;
323   std::shared_ptr<Data> data_;
324 };
325 
326 /// \brief Blend an image with its grayscale version with random weights
327 ///        t and 1 - t generated from a given range. If the range is trivial
328 ///        then the weights are determinate and t equals to the bound of the interval.
329 class RandomColor final : public TensorTransform {
330  public:
331   /// \brief Constructor.
332   /// \param[in] t_lb Lower bound random weights.
333   /// \param[in] t_ub Upper bound random weights.
334   RandomColor(float t_lb, float t_ub);
335 
336   /// \brief Destructor.
337   ~RandomColor() = default;
338 
339  protected:
340   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
341   /// \return Shared pointer to TensorOperation object.
342   std::shared_ptr<TensorOperation> Parse() override;
343 
344  private:
345   struct Data;
346   std::shared_ptr<Data> data_;
347 };
348 
349 /// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image.
350 class RandomColorAdjust final : public TensorTransform {
351  public:
352   /// \brief Constructor.
353   /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
354   ///     if it is a vector of two values it needs to be in the form of [min, max] (Default={1, 1}).
355   /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
356   ///     if it is a vector of two values, it needs to be in the form of [min, max] (Default={1, 1}).
357   /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
358   ///     if it is a vector of two values, it needs to be in the form of [min, max] (Default={1, 1}).
359   /// \param[in] hue Hue adjustment factor. Must be a vector of one or two values
360   ///     if it is a vector of two values, it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
361   ///     (Default={0, 0}).
362   explicit RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
363                              std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
364 
365   /// \brief Destructor.
366   ~RandomColorAdjust() = default;
367 
368  protected:
369   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
370   /// \return Shared pointer to TensorOperation object.
371   std::shared_ptr<TensorOperation> Parse() override;
372 
373  private:
374   struct Data;
375   std::shared_ptr<Data> data_;
376 };
377 
378 /// \brief Crop the input image at a random location.
379 class RandomCrop final : public TensorTransform {
380  public:
381   /// \brief Constructor.
382   /// \param[in] size A vector representing the output size of the cropped image.
383   ///     If the size is a single value, a squared crop of size (size, size) is returned.
384   ///     If the size has 2 values, it should be (height, width).
385   /// \param[in] padding A vector representing the number of pixels to pad the image.
386   ///    If the vector has one value, it pads all sides of the image with that value.
387   ///    If the vector has two values, it pads left and top with the first and
388   ///    right and bottom with the second value.
389   ///    If the vector has four values, it pads left, top, right, and bottom with
390   ///    those values respectively.
391   /// \param[in] pad_if_needed A boolean indicating that whether to pad the image
392   ///    if either side is smaller than the given output size.
393   /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
394   ///     BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
395   ///     If 3 values are provided, it is used to fill R, G, B channels respectively.
396   /// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of
397   ///     [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric].
398   ///   - BorderType::kConstant, Fill the border with constant values.
399   ///   - BorderType::kEdge, Fill the border with the last value on the edge.
400   ///   - BorderType::kReflect, Reflect the values on the edge omitting the last value of edge.
401   ///   - BorderType::kSymmetric, Reflect the values on the edge repeating the last value of edge.
402   /// \note If the input image is more than one, then make sure that the image size is the same.
403   explicit RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
404                       bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
405                       BorderType padding_mode = BorderType::kConstant);
406 
407   /// \brief Destructor.
408   ~RandomCrop() = default;
409 
410  protected:
411   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
412   /// \return Shared pointer to TensorOperation object.
413   std::shared_ptr<TensorOperation> Parse() override;
414 
415  private:
416   struct Data;
417   std::shared_ptr<Data> data_;
418 };
419 
420 /// \brief Equivalent to RandomResizedCrop TensorTransform, but crop the image before decoding.
421 class RandomCropDecodeResize final : public TensorTransform {
422  public:
423   /// \brief Constructor.
424   /// \param[in] size A vector representing the output size of the cropped image.
425   ///               If the size is a single value, a squared crop of size (size, size) is returned.
426   ///               If the size has 2 values, it should be (height, width).
427   /// \param[in] scale Range [min, max) of respective size of the
428   ///               original size to be cropped (default=(0.08, 1.0)).
429   /// \param[in] ratio Range [min, max) of aspect ratio to be
430   ///               cropped (default=(3. / 4., 4. / 3.)).
431   /// \param[in] interpolation An enum for the mode of interpolation.
432   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
433   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
434   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
435   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
436   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
437   /// \param[in] max_attempts The maximum number of attempts to propose a valid crop_area (default=10).
438   ///               If exceeded, fall back to use center_crop instead.
439   explicit RandomCropDecodeResize(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
440                                   std::vector<float> ratio = {3. / 4, 4. / 3},
441                                   InterpolationMode interpolation = InterpolationMode::kLinear,
442                                   int32_t max_attempts = 10);
443 
444   /// \brief Destructor.
445   ~RandomCropDecodeResize() = default;
446 
447  protected:
448   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
449   /// \return Shared pointer to TensorOperation object.
450   std::shared_ptr<TensorOperation> Parse() override;
451 
452  private:
453   struct Data;
454   std::shared_ptr<Data> data_;
455 };
456 
457 /// \brief Crop the input image at a random location and adjust bounding boxes accordingly.
458 ///        If the cropped area is out of bbox, the returned bbox will be empty.
459 class RandomCropWithBBox final : public TensorTransform {
460  public:
461   /// \brief Constructor.
462   /// \param[in] size A vector representing the output size of the cropped image.
463   ///     If the size is a single value, a squared crop of size (size, size) is returned.
464   ///     If the size has 2 values, it should be (height, width).
465   /// \param[in] padding A vector representing the number of pixels to pad the image
466   ///    If the vector has one value, it pads all sides of the image with that value.
467   ///    If the vector has two values, it pads left and top with the first and
468   ///    right and bottom with the second value.
469   ///    If the vector has four values, it pads left, top, right, and bottom with
470   ///    those values respectively.
471   /// \param[in] pad_if_needed A boolean indicating that whether to pad the image
472   ///    if either side is smaller than the given output size.
473   /// \param[in] fill_value A vector representing the pixel intensity of the borders. Only valid
474   ///    if the padding_mode is BorderType.kConstant. If 1 value is provided, it is used for all
475   ///    RGB channels. If 3 values are provided, it is used to fill R, G, B channels respectively.
476   /// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of
477   ///     [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric].
478   ///   - BorderType::kConstant, Fill the border with constant values.
479   ///   - BorderType::kEdge, Fill the border with the last value on the edge.
480   ///   - BorderType::kReflect, Reflect the values on the edge omitting the last value of edge.
481   ///   - BorderType::kSymmetric, Reflect the values on the edge repeating the last value of edge.
482   explicit RandomCropWithBBox(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
483                               bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
484                               BorderType padding_mode = BorderType::kConstant);
485 
486   /// \brief Destructor.
487   ~RandomCropWithBBox() = default;
488 
489  protected:
490   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
491   /// \return Shared pointer to TensorOperation object.
492   std::shared_ptr<TensorOperation> Parse() override;
493 
494  private:
495   struct Data;
496   std::shared_ptr<Data> data_;
497 };
498 
499 /// \brief Randomly flip the input image horizontally with a given probability.
500 class RandomHorizontalFlip final : public TensorTransform {
501  public:
502   /// \brief Constructor.
503   /// \param[in] prob A float representing the probability of flip.
504   explicit RandomHorizontalFlip(float prob = 0.5);
505 
506   /// \brief Destructor.
507   ~RandomHorizontalFlip() = default;
508 
509  protected:
510   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
511   /// \return Shared pointer to TensorOperation object.
512   std::shared_ptr<TensorOperation> Parse() override;
513 
514  private:
515   struct Data;
516   std::shared_ptr<Data> data_;
517 };
518 
519 /// \brief Randomly flip the input image horizontally with a given probability and adjust bounding boxes accordingly.
520 class RandomHorizontalFlipWithBBox final : public TensorTransform {
521  public:
522   /// \brief Constructor.
523   /// \param[in] prob A float representing the probability of flip.
524   explicit RandomHorizontalFlipWithBBox(float prob = 0.5);
525 
526   /// \brief Destructor.
527   ~RandomHorizontalFlipWithBBox() = default;
528 
529  protected:
530   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
531   /// \return Shared pointer to TensorOperation object.
532   std::shared_ptr<TensorOperation> Parse() override;
533 
534  private:
535   struct Data;
536   std::shared_ptr<Data> data_;
537 };
538 
539 /// \brief Reduce the number of bits for each color channel randomly.
540 class RandomPosterize final : public TensorTransform {
541  public:
542   /// \brief Constructor.
543   /// \param[in] bit_range Range of random posterize to compress image.
544   ///     uint8_t vector representing the minimum and maximum bit in range of [1,8] (Default={4, 8}).
545   explicit RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});
546 
547   /// \brief Destructor.
548   ~RandomPosterize() = default;
549 
550  protected:
551   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
552   /// \return Shared pointer to TensorOperation object.
553   std::shared_ptr<TensorOperation> Parse() override;
554 
555  private:
556   struct Data;
557   std::shared_ptr<Data> data_;
558 };
559 
560 /// \brief Resize the input image using a randomly selected interpolation mode.
561 class RandomResize final : public TensorTransform {
562  public:
563   /// \brief Constructor.
564   /// \param[in] size A vector representing the output size of the resized image.
565   ///     If the size is a single value, the smaller edge of the image will be resized to this value with
566   ///      the same image aspect ratio. If the size has 2 values, it should be (height, width).
567   explicit RandomResize(std::vector<int32_t> size);
568 
569   /// \brief Destructor.
570   ~RandomResize() = default;
571 
572  protected:
573   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
574   /// \return Shared pointer to TensorOperation object.
575   std::shared_ptr<TensorOperation> Parse() override;
576 
577  private:
578   struct Data;
579   std::shared_ptr<Data> data_;
580 };
581 
582 /// \brief Resize the input image using a randomly selected interpolation mode and adjust
583 ///     bounding boxes accordingly.
584 class RandomResizeWithBBox final : public TensorTransform {
585  public:
586   /// \brief Constructor.
587   /// \param[in] size A vector representing the output size of the resized image.
588   ///     If the size is a single value, the smaller edge of the image will be resized to this value with
589   ///      the same image aspect ratio. If the size has 2 values, it should be (height, width).
590   explicit RandomResizeWithBBox(std::vector<int32_t> size);
591 
592   /// \brief Destructor.
593   ~RandomResizeWithBBox() = default;
594 
595  protected:
596   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
597   /// \return Shared pointer to TensorOperation object.
598   std::shared_ptr<TensorOperation> Parse() override;
599 
600  private:
601   struct Data;
602   std::shared_ptr<Data> data_;
603 };
604 
605 /// \brief Crop the input image to a random size and aspect ratio.
606 class RandomResizedCrop final : public TensorTransform {
607  public:
608   /// \brief Constructor.
609   /// \param[in] size A vector representing the output size of the cropped image.
610   ///     If the size is a single value, a squared crop of size (size, size) is returned.
611   ///     If the size has 2 values, it should be (height, width).
612   /// \param[in] scale Range [min, max) of respective size of the original
613   ///     size to be cropped (default=(0.08, 1.0)).
614   /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
615   ///     (default=(3. / 4., 4. / 3.)).
616   /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear).
617   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
618   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
619   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
620   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
621   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
622   /// \param[in] max_attempts The maximum number of attempts to propose a valid.
623   ///     crop_area (default=10). If exceeded, fall back to use center_crop instead.
624   /// \note If the input image is more than one, then make sure that the image size is the same.
625   explicit RandomResizedCrop(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
626                              std::vector<float> ratio = {3. / 4., 4. / 3.},
627                              InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
628 
629   /// \brief Destructor.
630   ~RandomResizedCrop() = default;
631 
632  protected:
633   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
634   /// \return Shared pointer to TensorOperation object.
635   std::shared_ptr<TensorOperation> Parse() override;
636 
637  private:
638   struct Data;
639   std::shared_ptr<Data> data_;
640 };
641 
642 /// \brief Crop the input image to a random size and aspect ratio.
643 ///        If cropped area is out of bbox, the return bbox will be empty.
644 class RandomResizedCropWithBBox final : public TensorTransform {
645  public:
646   /// \brief Constructor.
647   /// \param[in] size A vector representing the output size of the cropped image.
648   ///     If the size is a single value, a squared crop of size (size, size) is returned.
649   ///     If the size has 2 values, it should be (height, width).
650   /// \param[in] scale Range [min, max) of respective size of the original
651   ///     size to be cropped (default=(0.08, 1.0)).
652   /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
653   ///     (default=(3. / 4., 4. / 3.)).
654   /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear).
655   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
656   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
657   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
658   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
659   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
660   /// \param[in] max_attempts The maximum number of attempts to propose a valid
661   ///     crop_area (default=10). If exceeded, fall back to use center_crop instead.
662   RandomResizedCropWithBBox(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
663                             std::vector<float> ratio = {3. / 4., 4. / 3.},
664                             InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
665 
666   /// \brief Destructor.
667   ~RandomResizedCropWithBBox() = default;
668 
669  protected:
670   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
671   /// \return Shared pointer to TensorOperation object.
672   std::shared_ptr<TensorOperation> Parse() override;
673 
674  private:
675   struct Data;
676   std::shared_ptr<Data> data_;
677 };
678 
679 /// \brief Rotate the image according to parameters.
680 class RandomRotation final : public TensorTransform {
681  public:
682   /// \brief Constructor.
683   /// \param[in] degrees A float vector of size 2, representing the starting and ending degrees.
684   /// \param[in] resample An enum for the mode of interpolation.
685   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
686   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
687   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
688   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
689   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
690   /// \param[in] expand A boolean representing whether the image is expanded after rotation.
691   /// \param[in] center A float vector of size 2 or empty, representing the x and y center of rotation
692   ///     or the center of the image.
693   /// \param[in] fill_value A vector representing the value to fill the area outside the transform
694   ///    in the output image. If 1 value is provided, it is used for all RGB channels.
695   ///    If 3 values are provided, it is used to fill R, G, B channels respectively.
696   RandomRotation(std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour,
697                  bool expand = false, std::vector<float> center = {}, std::vector<uint8_t> fill_value = {0, 0, 0});
698 
699   /// \brief Destructor.
700   ~RandomRotation() = default;
701 
702  protected:
703   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
704   /// \return Shared pointer to TensorOperation object.
705   std::shared_ptr<TensorOperation> Parse() override;
706 
707  private:
708   struct Data;
709   std::shared_ptr<Data> data_;
710 };
711 
712 /// \brief Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples
713 ///     (operation, prob), where operation is a TensorTransform operation and prob is the probability that this
714 ///     operation will be applied. Once a sub-policy is selected, each operation within the sub-policy with be
715 ///     applied in sequence according to its probability.
716 class RandomSelectSubpolicy final : public TensorTransform {
717  public:
718   /// \brief Constructor.
719   /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are raw pointers.
720   explicit RandomSelectSubpolicy(const std::vector<std::vector<std::pair<TensorTransform *, double>>> &policy);
721 
722   /// \brief Constructor.
723   /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are shared pointers.
724   explicit RandomSelectSubpolicy(
725     const std::vector<std::vector<std::pair<std::shared_ptr<TensorTransform>, double>>> &policy);
726 
727   /// \brief Constructor.
728   /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are object pointers.
729   explicit RandomSelectSubpolicy(
730     const std::vector<std::vector<std::pair<std::reference_wrapper<TensorTransform>, double>>> &policy);
731 
732   /// \brief Destructor.
733   ~RandomSelectSubpolicy() = default;
734 
735  protected:
736   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
737   /// \return Shared pointer to TensorOperation object.
738   std::shared_ptr<TensorOperation> Parse() override;
739 
740  private:
741   struct Data;
742   std::shared_ptr<Data> data_;
743 };
744 
745 /// \brief Adjust the sharpness of the input image by a fixed or random degree.
746 class RandomSharpness final : public TensorTransform {
747  public:
748   /// \brief Constructor.
749   /// \param[in] degrees A float vector of size 2, representing the range of random sharpness
750   ///     adjustment degrees. It should be in (min, max) format. If min=max, then it is a
751   ///     single fixed magnitude operation (default = (0.1, 1.9)).
752   explicit RandomSharpness(std::vector<float> degrees = {0.1, 1.9});
753 
754   /// \brief Destructor.
755   ~RandomSharpness() = default;
756 
757  protected:
758   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
759   /// \return Shared pointer to TensorOperation object.
760   std::shared_ptr<TensorOperation> Parse() override;
761 
762  private:
763   struct Data;
764   std::shared_ptr<Data> data_;
765 };
766 
767 /// \brief Invert pixels randomly within a specified range.
768 class RandomSolarize final : public TensorTransform {
769  public:
770   /// \brief Constructor.
771   /// \param[in] threshold A vector with two elements specifying the pixel range to invert.
772   ///     Threshold values should always be in (min, max) format.
773   ///     If min=max, it will to invert all pixels above min(max).
774   explicit RandomSolarize(std::vector<uint8_t> threshold = {0, 255});
775 
776   /// \brief Destructor.
777   ~RandomSolarize() = default;
778 
779  protected:
780   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
781   /// \return Shared pointer to TensorOperation object.
782   std::shared_ptr<TensorOperation> Parse() override;
783 
784  private:
785   struct Data;
786   std::shared_ptr<Data> data_;
787 };
788 
789 /// \brief Randomly flip the input image vertically with a given probability.
790 class RandomVerticalFlip final : public TensorTransform {
791  public:
792   /// \brief Constructor.
793   /// \param[in] prob A float representing the probability of flip.
794   explicit RandomVerticalFlip(float prob = 0.5);
795 
796   /// \brief Destructor.
797   ~RandomVerticalFlip() = default;
798 
799  protected:
800   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
801   /// \return Shared pointer to TensorOperation object.
802   std::shared_ptr<TensorOperation> Parse() override;
803 
804  private:
805   struct Data;
806   std::shared_ptr<Data> data_;
807 };
808 
809 /// \brief Randomly flip the input image vertically with a given probability and adjust bounding boxes accordingly.
810 class RandomVerticalFlipWithBBox final : public TensorTransform {
811  public:
812   /// \brief Constructor.
813   /// \param[in] prob A float representing the probability of flip.
814   explicit RandomVerticalFlipWithBBox(float prob = 0.5);
815 
816   /// \brief Destructor.
817   ~RandomVerticalFlipWithBBox() = default;
818 
819  protected:
820   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
821   /// \return Shared pointer to TensorOperation object.
822   std::shared_ptr<TensorOperation> Parse() override;
823 
824  private:
825   struct Data;
826   std::shared_ptr<Data> data_;
827 };
828 
829 /// \brief Rescale the pixel value of input image.
830 class Rescale final : public TensorTransform {
831  public:
832   /// \brief Constructor.
833   /// \param[in] rescale Rescale factor.
834   /// \param[in] shift Shift factor.
835   Rescale(float rescale, float shift);
836 
837   /// \brief Destructor.
838   ~Rescale() = default;
839 
840  protected:
841   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
842   /// \return Shared pointer to TensorOperation object.
843   std::shared_ptr<TensorOperation> Parse() override;
844 
845  private:
846   struct Data;
847   std::shared_ptr<Data> data_;
848 };
849 
850 /// \brief Resize the input image to the given size and adjust bounding boxes accordingly.
851 class ResizeWithBBox final : public TensorTransform {
852  public:
853   /// \brief Constructor.
854   /// \param[in] size The output size of the resized image.
855   ///     If the size is an integer, smaller edge of the image will be resized to this value with the same image aspect
856   ///     ratio. If the size is a sequence of length 2, it should be (height, width).
857   /// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kLinear).
858   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
859   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
860   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
861   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
862   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
863   explicit ResizeWithBBox(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear);
864 
865   /// \brief Destructor.
866   ~ResizeWithBBox() = default;
867 
868  protected:
869   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
870   /// \return Shared pointer to TensorOperation object.
871   std::shared_ptr<TensorOperation> Parse() override;
872 
873  private:
874   struct Data;
875   std::shared_ptr<Data> data_;
876 };
877 
878 /// \brief Change the format of input tensor from 4-channel RGBA to 3-channel BGR.
879 class RGBA2BGR final : public TensorTransform {
880  public:
881   /// \brief Constructor.
882   RGBA2BGR();
883 
884   /// \brief Destructor.
885   ~RGBA2BGR() = default;
886 
887  protected:
888   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
889   /// \return Shared pointer to TensorOperation object.
890   std::shared_ptr<TensorOperation> Parse() override;
891 };
892 
893 /// \brief Change the input 4 channel RGBA tensor to 3 channel RGB.
894 class RGBA2RGB final : public TensorTransform {
895  public:
896   /// \brief Constructor.
897   RGBA2RGB();
898 
899   /// \brief Destructor.
900   ~RGBA2RGB() = default;
901 
902  protected:
903   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
904   /// \return Shared pointer to TensorOperation object.
905   std::shared_ptr<TensorOperation> Parse() override;
906 };
907 
908 /// \note Slice the tensor to multiple patches in horizontal and vertical directions.
909 class SlicePatches final : public TensorTransform {
910  public:
911   /// \brief Constructor.
912   /// \param[in] num_height The number of patches in vertical direction (default=1).
913   /// \param[in] num_width The number of patches in horizontal direction (default=1).
914   /// \param[in] slice_mode An enum for the mode of slice (default=SliceMode::kPad).
915   /// \param[in] fill_value A value representing the pixel to fill the padding area in right and
916   ///     bottom border if slice_mode is kPad. Then padded tensor could be just sliced to multiple patches (default=0).
917   /// \note The usage scenerio is suitable to tensor with large height and width. The tensor will keep the same
918   ///     if set both num_height and num_width to 1. And the number of output tensors is equal to num_height*num_width.
919   SlicePatches(int32_t num_height = 1, int32_t num_width = 1, SliceMode slice_mode = SliceMode::kPad,
920                uint8_t fill_value = 0);
921 
922   /// \brief Destructor.
923   ~SlicePatches() = default;
924 
925  protected:
926   /// \brief Function to convert TensorTransform object into a TensorOperation object.
927   /// \return Shared pointer to TensorOperation object.
928   std::shared_ptr<TensorOperation> Parse() override;
929 
930  private:
931   struct Data;
932   std::shared_ptr<Data> data_;
933 };
934 
935 /// \brief Decode, randomly crop and resize a JPEG image using the simulation algorithm of
936 ///     Ascend series chip DVPP module. The application scenario is consistent with SoftDvppDecodeResizeJpeg.
937 ///     The input image size should be in range [32*32, 8192*8192].
938 ///     The zoom-out and zoom-in multiples of the image length and width should be in the range [1/32, 16].
939 ///     Only images with an even resolution can be output. The output of odd resolution is not supported.
940 class SoftDvppDecodeRandomCropResizeJpeg final : public TensorTransform {
941  public:
942   /// \brief Constructor.
943   /// \param[in] size A vector representing the output size of the resized image.
944   ///     If the size is a single value, smaller edge of the image will be resized to this value with
945   ///     the same image aspect ratio. If the size has 2 values, it should be (height, width).
946   /// \param[in] scale Range [min, max) of respective size of the original
947   ///     size to be cropped (default=(0.08, 1.0)).
948   /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
949   ///     (default=(3. / 4., 4. / 3.)).
950   /// \param[in] max_attempts The maximum number of attempts to propose a valid
951   ///     crop_area (default=10). If exceeded, fall back to use center_crop instead.
952   SoftDvppDecodeRandomCropResizeJpeg(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
953                                      std::vector<float> ratio = {3. / 4., 4. / 3.}, int32_t max_attempts = 10);
954 
955   /// \brief Destructor.
956   ~SoftDvppDecodeRandomCropResizeJpeg() = default;
957 
958  protected:
959   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
960   /// \return Shared pointer to TensorOperation object.
961   std::shared_ptr<TensorOperation> Parse() override;
962 
963  private:
964   struct Data;
965   std::shared_ptr<Data> data_;
966 };
967 
968 /// \brief Decode and resize a JPEG image using the simulation algorithm of Ascend series
969 ///     chip DVPP module. It is recommended to use this algorithm in the following scenarios:
970 ///     When training, the DVPP of the Ascend chip is not used,
971 ///     and the DVPP of the Ascend chip is used during inference,
972 ///     and the accuracy of inference is lower than the accuracy of training;
973 ///     and the input image size should be in range [32*32, 8192*8192].
974 ///     The zoom-out and zoom-in multiples of the image length and width should be in the range [1/32, 16].
975 ///     Only images with an even resolution can be output. The output of odd resolution is not supported.
976 class SoftDvppDecodeResizeJpeg final : public TensorTransform {
977  public:
978   /// \brief Constructor.
979   /// \param[in] size A vector representing the output size of the resized image.
980   ///     If the size is a single value, smaller edge of the image will be resized to this value with
981   ///     the same image aspect ratio. If the size has 2 values, it should be (height, width).
982   explicit SoftDvppDecodeResizeJpeg(std::vector<int32_t> size);
983 
984   /// \brief Destructor.
985   ~SoftDvppDecodeResizeJpeg() = default;
986 
987  protected:
988   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
989   /// \return Shared pointer to TensorOperation object.
990   std::shared_ptr<TensorOperation> Parse() override;
991 
992  private:
993   struct Data;
994   std::shared_ptr<Data> data_;
995 };
996 
997 /// \brief Swap the red and blue channels of the input image.
998 class SwapRedBlue final : public TensorTransform {
999  public:
1000   /// \brief Constructor.
1001   SwapRedBlue();
1002 
1003   /// \brief Destructor.
1004   ~SwapRedBlue() = default;
1005 
1006  protected:
1007   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1008   /// \return Shared pointer to TensorOperation object.
1009   std::shared_ptr<TensorOperation> Parse() override;
1010 };
1011 
1012 /// \brief Randomly perform transformations, as selected from input transform list, on the input tensor.
1013 class UniformAugment final : public TensorTransform {
1014  public:
1015   /// \brief Constructor.
1016   /// \param[in] transforms Raw pointer to vector of TensorTransform operations.
1017   /// \param[in] num_ops An integer representing the number of operations to be selected and applied.
1018   explicit UniformAugment(const std::vector<TensorTransform *> &transforms, int32_t num_ops = 2);
1019 
1020   /// \brief Constructor.
1021   /// \param[in] transforms Smart pointer to vector of TensorTransform operations.
1022   /// \param[in] num_ops An integer representing the number of operations to be selected and applied.
1023   explicit UniformAugment(const std::vector<std::shared_ptr<TensorTransform>> &transforms, int32_t num_ops = 2);
1024 
1025   /// \brief Constructor.
1026   /// \param[in] transforms Object pointer to vector of TensorTransform operations.
1027   /// \param[in] num_ops An integer representing the number of operations to be selected and applied.
1028   explicit UniformAugment(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, int32_t num_ops = 2);
1029 
1030   /// \brief Destructor.
1031   ~UniformAugment() = default;
1032 
1033  protected:
1034   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1035   /// \return Shared pointer to TensorOperation object.
1036   std::shared_ptr<TensorOperation> Parse() override;
1037 
1038  private:
1039   struct Data;
1040   std::shared_ptr<Data> data_;
1041 };
1042 
1043 /// \brief Flip the input image vertically.
1044 class VerticalFlip final : public TensorTransform {
1045  public:
1046   /// \brief Constructor.
1047   VerticalFlip();
1048 
1049   /// \brief Destructor.
1050   ~VerticalFlip() = default;
1051 
1052  protected:
1053   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1054   /// \return Shared pointer to TensorOperation object.
1055   std::shared_ptr<TensorOperation> Parse() override;
1056 };
1057 
1058 }  // namespace vision
1059 }  // namespace dataset
1060 }  // namespace mindspore
1061 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
1062