• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <tuple>
24 #include <utility>
25 #include <vector>
26 
27 #include "include/api/dual_abi_helper.h"
28 #include "include/api/status.h"
29 #include "include/dataset/constants.h"
30 #include "include/dataset/transforms.h"
31 #include "include/dataset/vision_lite.h"
32 
33 namespace mindspore {
34 namespace dataset {
35 class TensorOperation;
36 
37 // Transform operations for performing computer vision.
38 namespace vision {
39 /// \brief Apply brightness adjustment on input image.
40 class DATASET_API AdjustBrightness final : public TensorTransform {
41  public:
42   /// \brief Constructor.
43   /// \param[in] brightness_factor Adjusts image brightness, non negative real number.
44   /// \par Example
45   /// \code
46   ///     /* Define operations */
47   ///     auto decode_op = vision::Decode();
48   ///     auto adjust_brightness_op = vision::AdjustBrightness(2.0);
49   ///
50   ///     /* dataset is an instance of Dataset object */
51   ///     dataset = dataset->Map({decode_op, adjust_brightness_op},  // operations
52   ///                            {"image"});                         // input columns
53   /// \endcode
54   explicit AdjustBrightness(float brightness_factor);
55 
56   /// \brief Destructor.
57   ~AdjustBrightness() override = default;
58 
59  protected:
60   /// \brief Function to convert TensorTransform object into a TensorOperation object.
61   /// \return Shared pointer to TensorOperation object.
62   std::shared_ptr<TensorOperation> Parse() override;
63 
64  private:
65   struct Data;
66   std::shared_ptr<Data> data_;
67 };
68 
69 /// \brief Apply contrast adjustment on input image.
70 class DATASET_API AdjustContrast final : public TensorTransform {
71  public:
72   /// \brief Constructor.
73   /// \param[in] contrast_factor Adjusts image contrast, non negative real number.
74   /// \par Example
75   /// \code
76   ///     /* Define operations */
77   ///     auto decode_op = vision::Decode();
78   ///     auto adjust_contrast_op = vision::AdjustContrast(10.0);
79   ///
80   ///     /* dataset is an instance of Dataset object */
81   ///     dataset = dataset->Map({decode_op, adjust_contrast_op},  // operations
82   ///                            {"image"});                       // input columns
83   /// \endcode
84   explicit AdjustContrast(float contrast_factor);
85 
86   /// \brief Destructor.
87   ~AdjustContrast() override = default;
88 
89  protected:
90   /// \brief Function to convert TensorTransform object into a TensorOperation object.
91   /// \return Shared pointer to TensorOperation object.
92   std::shared_ptr<TensorOperation> Parse() override;
93 
94  private:
95   struct Data;
96   std::shared_ptr<Data> data_;
97 };
98 
99 /// \brief AdjustGamma TensorTransform.
100 /// \note Apply gamma correction on input image.
101 class DATASET_API AdjustGamma final : public TensorTransform {
102  public:
103   /// \brief Constructor.
104   /// \param[in] gamma Non negative real number, which makes the output image pixel value
105   ///     exponential in relation to the input image pixel value.
106   /// \param[in] gain The constant multiplier. Default: 1.0.
107   /// \par Example
108   /// \code
109   ///     /* Define operations */
110   ///     auto decode_op = vision::Decode();
111   ///     auto adjust_gamma_op = vision::AdjustGamma(10.0);
112   ///
113   ///     /* dataset is an instance of Dataset object */
114   ///     dataset = dataset->Map({decode_op, adjust_gamma_op},  // operations
115   ///                            {"image"});                   // input columns
116   /// \endcode
117   explicit AdjustGamma(float gamma, float gain = 1.0);
118 
119   /// \brief Destructor.
120   ~AdjustGamma() override = default;
121 
122  protected:
123   /// \brief Function to convert TensorTransform object into a TensorOperation object.
124   /// \return Shared pointer to TensorOperation object.
125   std::shared_ptr<TensorOperation> Parse() override;
126 
127  private:
128   struct Data;
129   std::shared_ptr<Data> data_;
130 };
131 
132 /// \note Apply hue adjustment on input image.
133 class DATASET_API AdjustHue final : public TensorTransform {
134  public:
135   /// \brief Constructor.
136   /// \param[in] hue_factor How much to shift the hue channel, must be in the interval [-0.5, 0.5].
137   /// \par Example
138   /// \code
139   ///     /* Define operations */
140   ///     auto decode_op = vision::Decode();
141   ///     auto adjust_hue_op = vision::AdjustHue(0.2);
142   ///
143   ///     /* dataset is an instance of Dataset object */
144   ///     dataset = dataset->Map({decode_op, adjust_contrast_op},  // operations
145   ///                            {"image"});                       // input columns
146   /// \endcode
147   explicit AdjustHue(float hue_factor);
148 
149   /// \brief Destructor.
150   ~AdjustHue() override = default;
151 
152  protected:
153   /// \brief Function to convert TensorTransform object into a TensorOperation object.
154   /// \return Shared pointer to TensorOperation object.
155   std::shared_ptr<TensorOperation> Parse() override;
156 
157  private:
158   struct Data;
159   std::shared_ptr<Data> data_;
160 };
161 
162 /// \brief Adjust the color saturation of the input image.
163 class DATASET_API AdjustSaturation final : public TensorTransform {
164  public:
165   /// \brief Constructor.
166   /// \param[in] saturation_factor Adjust image saturation, non negative real number.
167   /// \par Example
168   /// \code
169   ///     /* Define operations */
170   ///     auto decode_op = vision::Decode();
171   ///     auto adjust_saturation_op = vision::AdjustSaturation(2.0);
172   ///
173   ///     /* dataset is an instance of Dataset object */
174   ///     dataset = dataset->Map({decode_op, adjust_saturation_op},  // operations
175   ///                            {"image"});                         // input columns
176   /// \endcode
177   explicit AdjustSaturation(float saturation_factor);
178 
179   /// \brief Destructor.
180   ~AdjustSaturation() override = default;
181 
182  protected:
183   /// \brief Function to convert TensorTransform object into a TensorOperation object.
184   /// \return Shared pointer to TensorOperation object.
185   std::shared_ptr<TensorOperation> Parse() override;
186 
187  private:
188   struct Data;
189   std::shared_ptr<Data> data_;
190 };
191 
192 /// \brief Apply adjust sharpness on input image. Input image is expected to be in [H, W, C] or [H, W] format.
193 class DATASET_API AdjustSharpness final : public TensorTransform {
194  public:
195   /// \brief Constructor.
196   /// \param[in] sharpness_factor How much to adjust the sharpness. Can be any Non negative real number.
197   ///     0 gives a blurred image, 1 gives the original image while 2 increases the Sharpness by a factor of 2.
198   /// \par Example
199   /// \code
200   ///     /* Define operations */
201   ///     auto decode_op = vision::Decode();
202   ///     auto adjust_sharpness_op = vision::AdjustSharpness(2.0);
203   ///
204   ///     /* dataset is an instance of Dataset object */
205   ///     dataset = dataset->Map({decode_op, adjust_sharpness_op},   // operations
206   ///                            {"image"});                         // input columns
207   /// \endcode
208   explicit AdjustSharpness(float sharpness_factor);
209 
210   /// \brief Destructor.
211   ~AdjustSharpness() override = default;
212 
213  protected:
214   /// \brief Function to convert TensorTransform object into a TensorOperation object.
215   /// \return Shared pointer to TensorOperation object.
216   std::shared_ptr<TensorOperation> Parse() override;
217 
218  private:
219   struct Data;
220   std::shared_ptr<Data> data_;
221 };
222 
223 /// \brief Apply AutoAugment data augmentation method.
224 class DATASET_API AutoAugment final : public TensorTransform {
225  public:
226   /// \brief Constructor.
227   /// \param[in] policy An enum for the data auto augmentation policy (default=AutoAugmentPolicy::kImageNet).
228   ///     - AutoAugmentPolicy::kImageNet, AutoAugment policy learned on the ImageNet dataset.
229   ///     - AutoAugmentPolicy::kCifar10, AutoAugment policy learned on the Cifar10 dataset.
230   ///     - AutoAugmentPolicy::kSVHN, AutoAugment policy learned on the SVHN dataset.
231   /// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kNearestNeighbour).
232   ///     - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
233   ///     - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
234   ///     - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
235   ///     - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
236   /// \param[in] fill_value A vector representing the pixel intensity of the borders (default={0, 0, 0}).
237   /// \par Example
238   /// \code
239   ///     /* Define operations */
240   ///     auto decode_op = vision::Decode();
241   ///     auto auto_augment_op = vision::AutoAugment(AutoAugmentPolicy::kImageNet,
242   ///                                                InterpolationMode::kNearestNeighbour, {0, 0, 0});
243   ///     /* dataset is an instance of Dataset object */
244   ///     dataset = dataset->Map({decode_op, auto_augment_op}, // operations
245   ///                            {"image"});                   // input columns
246   /// \endcode
247   explicit AutoAugment(AutoAugmentPolicy policy = AutoAugmentPolicy::kImageNet,
248                        InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
249                        const std::vector<uint8_t> &fill_value = {0, 0, 0});
250 
251   /// \brief Destructor.
252   ~AutoAugment() override = default;
253 
254  protected:
255   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
256   /// \return Shared pointer to TensorOperation object.
257   std::shared_ptr<TensorOperation> Parse() override;
258 
259  private:
260   struct Data;
261   std::shared_ptr<Data> data_;
262 };
263 
264 /// \brief Apply automatic contrast on the input image.
265 class DATASET_API AutoContrast final : public TensorTransform {
266  public:
267   /// \brief Constructor.
268   /// \param[in] cutoff Percent of pixels to cut off from the histogram, the valid range of cutoff value is 0 to 50.
269   /// \param[in] ignore Pixel values to ignore.
270   /// \par Example
271   /// \code
272   ///     /* Define operations */
273   ///     auto decode_op = vision::Decode();
274   ///     auto autocontrast_op = vision::AutoContrast(10.0, {10, 20});
275   ///
276   ///     /* dataset is an instance of Dataset object */
277   ///     dataset = dataset->Map({decode_op, autocontrast_op},  // operations
278   ///                            {"image"});                    // input columns
279   /// \endcode
280   explicit AutoContrast(float cutoff = 0.0, const std::vector<uint32_t> &ignore = {});
281 
282   /// \brief Destructor.
283   ~AutoContrast() override = default;
284 
285  protected:
286   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
287   /// \return Shared pointer to TensorOperation object.
288   std::shared_ptr<TensorOperation> Parse() override;
289 
290  private:
291   struct Data;
292   std::shared_ptr<Data> data_;
293 };
294 
295 /// \brief BoundingBoxAugment TensorTransform.
296 /// \note  Apply a given image transform on a random selection of bounding box regions of a given image.
297 class DATASET_API BoundingBoxAugment final : public TensorTransform {
298  public:
299   /// \brief Constructor.
300   /// \param[in] transform Raw pointer to the TensorTransform operation.
301   /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
302   /// \par Example
303   /// \code
304   ///     /* Define operations */
305   ///     TensorTransform *rotate_op = new vision::RandomRotation({-180, 180});
306   ///     auto bbox_aug_op = vision::BoundingBoxAugment(rotate_op, 0.5);
307   ///
308   ///     /* dataset is an instance of Dataset object */
309   ///     dataset = dataset->Map({bbox_aug_op},       // operations
310   ///                            {"image", "bbox"});  // input columns
311   /// \endcode
312   explicit BoundingBoxAugment(TensorTransform *transform, float ratio = 0.3);
313 
314   /// \brief Constructor.
315   /// \param[in] transform Smart pointer to the TensorTransform operation.
316   /// \param[in] ratio Ratio of bounding boxes where augmentation is applied to. Range: [0, 1] (default=0.3).
317   /// \par Example
318   /// \code
319   ///     /* Define operations */
320   ///     std::shared_ptr<TensorTransform> flip_op = std::make_shared<vision::RandomHorizontalFlip>(0.5);
321   ///     std::shared_ptr<TensorTransform> bbox_aug_op = std::make_shared<vision::BoundingBoxAugment>(flip_op, 0.1);
322   ///
323   ///     /* dataset is an instance of Dataset object */
324   ///     dataset = dataset->Map({bbox_aug_op},       // operations
325   ///                            {"image", "bbox"});  // input columns
326   /// \endcode
327   explicit BoundingBoxAugment(const std::shared_ptr<TensorTransform> &transform, float ratio = 0.3);
328 
329   /// \brief Constructor.
330   /// \param[in] transform Object pointer to the TensorTransform operation.
331   /// \param[in] ratio Ratio of bounding boxes where augmentation is applied to. Range: [0, 1] (default=0.3).
332   /// \par Example
333   /// \code
334   ///     /* Define operations */
335   ///     vision::RandomColor random_color_op = vision::RandomColor(0.5, 1.0);
336   ///     vision::BoundingBoxAugment bbox_aug_op = vision::BoundingBoxAugment(random_color_op, 0.8);
337   ///
338   ///     /* dataset is an instance of Dataset object */
339   ///     dataset = dataset->Map({bbox_aug_op},       // operations
340   ///                            {"image", "bbox"});  // input columns
341   /// \endcode
342   explicit BoundingBoxAugment(const std::reference_wrapper<TensorTransform> &transform, float ratio = 0.3);
343 
344   /// \brief Destructor.
345   ~BoundingBoxAugment() override = default;
346 
347  protected:
348   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
349   /// \return Shared pointer to TensorOperation object.
350   std::shared_ptr<TensorOperation> Parse() override;
351 
352  private:
353   struct Data;
354   std::shared_ptr<Data> data_;
355 };
356 
357 /// \brief Change the color space of the image.
358 class DATASET_API ConvertColor final : public TensorTransform {
359  public:
360   /// \brief Constructor.
361   /// \param[in] convert_mode The mode of image channel conversion.
362   /// \par Example
363   /// \code
364   ///     /* dataset is an instance of Dataset object */
365   ///     dataset = dataset->Map({std::make_shared<vision::Decode>(),
366   ///                             std::make_shared<vision::ConvertColor>(ConvertMode::COLOR_BGR2RGB)}, // operations
367   ///                            {"image"});                                                           // input columns
368   /// \endcode
369   explicit ConvertColor(ConvertMode convert_mode);
370 
371   /// \brief Destructor.
372   ~ConvertColor() override = default;
373 
374  protected:
375   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
376   /// \return Shared pointer to TensorOperation object.
377   std::shared_ptr<TensorOperation> Parse() override;
378 
379  private:
380   struct Data;
381   std::shared_ptr<Data> data_;
382 };
383 
384 /// \brief Mask a random section of each image with the corresponding part of another randomly
385 ///     selected image in that batch.
386 class DATASET_API CutMixBatch final : public TensorTransform {
387  public:
388   /// \brief Constructor.
389   /// \param[in] image_batch_format The format of the batch.
390   /// \param[in] alpha The hyperparameter of beta distribution (default = 1.0).
391   /// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0).
392   /// \par Example
393   /// \code
394   ///     /* dataset is an instance of Dataset object */
395   ///     dataset = dataset->Batch(5);
396   ///     dataset = dataset->Map({std::make_shared<vision::CutMixBatch>(ImageBatchFormat::kNHWC)}, // operations
397   ///                            {"image", "label"});                                             // input columns
398   /// \endcode
399   explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0);
400 
401   /// \brief Destructor.
402   ~CutMixBatch() override = default;
403 
404  protected:
405   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
406   /// \return Shared pointer to TensorOperation object.
407   std::shared_ptr<TensorOperation> Parse() override;
408 
409  private:
410   struct Data;
411   std::shared_ptr<Data> data_;
412 };
413 
414 /// \brief Randomly cut (mask) out a given number of square patches from the input image.
415 class DATASET_API CutOut final : public TensorTransform {
416  public:
417   /// \brief Constructor.
418   /// \param[in] length Integer representing the side length of each square patch.
419   /// \param[in] num_patches Integer representing the number of patches to be cut out of an image.
420   /// \param[in] is_hwc A boolean to indicate whether the input image is in HWC format (true) or CHW
421   ///     format (false) (default = true).
422   /// \par Example
423   /// \code
424   ///     /* dataset is an instance of Dataset object */
425   ///     dataset = dataset->Map({std::make_shared<vision::Decode>(),
426   ///                             std::make_shared<vision::CutOut>(1, 4, true)}, // operations
427   ///                            {"image"});                                     // input columns
428   /// \endcode
429   explicit CutOut(int32_t length, int32_t num_patches = 1, bool is_hwc = true);
430 
431   /// \brief Destructor.
432   ~CutOut() override = default;
433 
434  protected:
435   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
436   /// \return Shared pointer to TensorOperation object.
437   std::shared_ptr<TensorOperation> Parse() override;
438 
439  private:
440   struct Data;
441   std::shared_ptr<Data> data_;
442 };
443 
444 /// \brief Decode the input video.
445 class DATASET_API DecodeVideo final : public TensorTransform {
446  public:
447   /// \brief Constructor. It will decode a vector containing a raw video tensor into a vector containing two tensors.
448   ///     The raw video tensor in the input vector should be 1D array of UINT8.
449   ///     The first tensor in the output vector is a visual tensor, the shape is <T,H,W,C>, the type is DE_UINT8. Pixel
450   ///     order is RGB. The second tensor in the output vector is an audio tensor, the shape is <C, L>.
451   /// \par Example
452   /// \code
453   ///     /* Read video file into tensor */
454   ///     mindspore::MSTensor video;
455   ///     ASSERT_OK(mindspore::dataset::vision::ReadFile("/path/to/video/file", &video));
456   ///     std::vector<mindspore::MSTensor> input_tensor;
457   ///     std::vector<mindspore::MSTensor> output_tensor;
458   ///     input_tensor.push_back(video);
459   ///     auto decode_video = vision::DecodeVideo();
460   ///     auto transform = Execute(decode_video);
461   ///     Status rc = transform(input_tensor, &output_tensor);
462   /// \endcode
463   DecodeVideo();
464 
465   /// \brief Destructor.
466   ~DecodeVideo() = default;
467 
468  protected:
469   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
470   /// \return Shared pointer to TensorOperation object.
471   std::shared_ptr<TensorOperation> Parse() override;
472 };
473 
474 /// \brief Encode the image as JPEG data.
475 /// \param[in] image The image to be encoded.
476 /// \param[out] output The Tensor data.
477 /// \param[in] quality The quality for the output tensor, in range of [1, 100]. Default: 75.
478 /// \return The status code.
479 Status DATASET_API EncodeJpeg(const mindspore::MSTensor &image, mindspore::MSTensor *output, int quality = 75);
480 
481 /// \brief Encode the image as PNG data.
482 /// \param[in] image The image to be encoded.
483 /// \param[out] output The Tensor data.
484 /// \param[in] compression_level The compression_level for encoding, in range of [0, 9]. Default: 6.
485 /// \return The status code.
486 Status DATASET_API EncodePng(const mindspore::MSTensor &image, mindspore::MSTensor *output, int compression_level = 6);
487 
488 /// \brief Apply histogram equalization on the input image.
489 class DATASET_API Equalize final : public TensorTransform {
490  public:
491   /// \brief Constructor.
492   /// \par Example
493   /// \code
494   ///     /* dataset is an instance of Dataset object */
495   ///     dataset = dataset->Map({std::make_shared<vision::Decode>(),
496   ///                             std::make_shared<vision::Equalize>()}, // operations
497   ///                            {"image"});                             // input columns
498   /// \endcode
499   Equalize();
500 
501   /// \brief Destructor.
502   ~Equalize() override = default;
503 
504  protected:
505   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
506   /// \return Shared pointer to TensorOperation object.
507   std::shared_ptr<TensorOperation> Parse() override;
508 };
509 
510 /// \brief Erase the input image with given value.
511 class DATASET_API Erase final : public TensorTransform {
512  public:
513   /// \brief Constructor.
514   /// \param[in] top Vertical ordinate of the upper left corner of erased region.
515   /// \param[in] left Horizontal ordinate of the upper left corner of erased region.
516   /// \param[in] height Height of erased region.
517   /// \param[in] width Width of erased region.
518   /// \param[in] value Pixel value used to pad the erased area.
519   ///     If a single integer is provided, it will be used for all RGB channels.
520   ///     If a sequence of length 3 is provided, it will be used for R, G, B channels respectively. Default: 0.
521   /// \param[in] inplace Whether to erase inplace. Default: False.
522   /// \par Example
523   /// \code
524   ///     /* dataset is an instance of Dataset object */
525   ///     dataset = dataset->Map({std::make_shared<vision::Decode>(),
526   ///                             std::make_shared<vision::Erase>(10, 10, 10, 10)}, // operations
527   ///                            {"image"});                                        // input columns
528   /// \endcode
529   Erase(int32_t top, int32_t left, int32_t height, int32_t width, const std::vector<float> &value = {0., 0., 0.},
530         bool inplace = false);
531 
532   /// \brief Destructor.
533   ~Erase() override = default;
534 
535  protected:
536   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
537   /// \return Shared pointer to TensorOperation object.
538   std::shared_ptr<TensorOperation> Parse() override;
539 
540  private:
541   struct Data;
542   std::shared_ptr<Data> data_;
543 };
544 
545 /// \brief Get the number of input image channels.
546 /// \param[in] image Tensor of the image.
547 /// \param[out] channels Channels of the image.
548 /// \return The status code.
549 Status DATASET_API GetImageNumChannels(const mindspore::MSTensor &image, dsize_t *channels);
550 
551 /// \brief Get the size of input image.
552 /// \param[in] image Tensor of the image.
553 /// \param[out] size Size of the image as [height, width].
554 /// \return The status code.
555 Status DATASET_API GetImageSize(const mindspore::MSTensor &image, std::vector<dsize_t> *size);
556 
557 /// \brief Flip the input image horizontally.
558 class DATASET_API HorizontalFlip final : public TensorTransform {
559  public:
560   /// \brief Constructor.
561   /// \par Example
562   /// \code
563   ///     /* dataset is an instance of Dataset object */
564   ///     dataset = dataset->Map({std::make_shared<vision::Decode>(),
565   ///                             std::make_shared<vision::HorizontalFlip>()}, // operations
566   ///                            {"image"});                                   // input columns
567   /// \endcode
568   HorizontalFlip();
569 
570   /// \brief Destructor.
571   ~HorizontalFlip() override = default;
572 
573  protected:
574   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
575   /// \return Shared pointer to TensorOperation object.
576   std::shared_ptr<TensorOperation> Parse() override;
577 };
578 
579 /// \brief Apply invert on the input image in RGB mode.
580 class DATASET_API Invert final : public TensorTransform {
581  public:
582   /// \brief Constructor.
583   /// \par Example
584   /// \code
585   ///     /* dataset is an instance of Dataset object */
586   ///     dataset = dataset->Map({std::make_shared<vision::Decode>(),
587   ///                             std::make_shared<vision::Invert>()}, // operations
588   ///                            {"image"});                           // input columns
589   /// \endcode
590   Invert();
591 
592   /// \brief Destructor.
593   ~Invert() override = default;
594 
595  protected:
596   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
597   /// \return Shared pointer to TensorOperation object.
598   std::shared_ptr<TensorOperation> Parse() override;
599 };
600 
601 /// \brief Apply MixUp transformation on an input batch of images and labels. The labels must be in
602 ///     one-hot format and Batch must be called before calling this function.
603 class DATASET_API MixUpBatch final : public TensorTransform {
604  public:
605   /// \brief Constructor.
606   /// \param[in] alpha hyperparameter of beta distribution (default = 1.0).
607   /// \par Example
608   /// \code
609   ///     /* dataset is an instance of Dataset object */
610   ///     dataset = dataset->Batch(5);
611   ///     dataset = dataset->Map({std::make_shared<vision::MixUpBatch>()}, // operations
612   ///                            {"image"});                               // input columns
613   /// \endcode
614   explicit MixUpBatch(float alpha = 1.0);
615 
616   /// \brief Destructor.
617   ~MixUpBatch() override = default;
618 
619  protected:
620   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
621   /// \return Shared pointer to TensorOperation object.
622   std::shared_ptr<TensorOperation> Parse() override;
623 
624  private:
625   struct Data;
626   std::shared_ptr<Data> data_;
627 };
628 
629 /// \brief Normalize the input image with respect to mean and standard deviation and pads an extra
630 ///     channel with value zero.
631 class DATASET_API NormalizePad final : public TensorTransform {
632  public:
633   /// \brief Constructor.
634   /// \param[in] mean A vector of mean values for each channel, with respect to channel order.
635   ///     The mean values must be in range [0.0, 255.0].
636   /// \param[in] std A vector of standard deviations for each channel, with respect to channel order.
637   ///     The standard deviation values must be in range (0.0, 255.0].
638   /// \param[in] dtype The output datatype of Tensor.
639   ///     The standard deviation values must be "float32" or "float16"(default = "float32").
640   /// \param[in] is_hwc A boolean to indicate whether the input image is in HWC format (true) or CHW
641   ///     format (false) (default = true).
642   /// \par Example
643   /// \code
644   ///     /* Define operations */
645   ///     auto decode_op = vision::Decode();
646   ///     auto normalize_pad_op = vision::NormalizePad({121.0, 115.0, 100.0}, {70.0, 68.0, 71.0});
647   ///
648   ///     /* dataset is an instance of Dataset object */
649   ///     dataset = dataset->Map({decode_op, normalize_pad_op},  // operations
650   ///                            {"image"});                     // input columns
651   /// \endcode
652   NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::string &dtype = "float32",
653                bool is_hwc = true)
NormalizePad(mean,std,StringToChar (dtype),is_hwc)654       : NormalizePad(mean, std, StringToChar(dtype), is_hwc) {}
655 
656   NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::vector<char> &dtype,
657                bool is_hwc = true);
658 
659   /// \brief Destructor.
660   ~NormalizePad() override = default;
661 
662  protected:
663   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
664   /// \return Shared pointer to TensorOperation object.
665   std::shared_ptr<TensorOperation> Parse() override;
666 
667  private:
668   struct Data;
669   std::shared_ptr<Data> data_;
670 };
671 
672 /// \brief Pad the image to a fixed size.
673 class DATASET_API PadToSize final : public TensorTransform {
674  public:
675   /// \brief Constructor.
676   /// \param[in] size A two element vector representing the target size to pad, in order of [height, width].
677   /// \param[in] offset A two element vector representing the lengths to pad on the top and left,
678   ///    in order of [top, left]. Default: {}, means to pad symmetrically, keeping the original image in center.
679   /// \param[in] fill_value A vector representing the pixel intensity of the borders. Only valid if the
680   ///    padding_mode is BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
681   ///    If 3 values are provided, it is used to fill R, G, B channels respectively. Default: {0}.
682   /// \param[in] padding_mode The method of padding, which can be one of BorderType.kConstant, BorderType.kEdge,
683   ///    BorderType.kReflect or BorderType.kSymmetric. Default: BorderType.kConstant.
684   ///    - BorderType.kConstant, pads with a constant value.
685   ///    - BorderType.kEdge, pads with the last value at the edge of the image.
686   ///    - BorderType.kReflect, pads with reflection of the image omitting the last value on the edge.
687   ///    - BorderType.kSymmetric, pads with reflection of the image repeating the last value on the edge.
688   /// \par Example
689   /// \code
690   ///     /* Define operations */
691   ///     auto decode_op = vision::Decode();
692   ///     auto pad_to_size_op = vision::PadToSize({256, 256}, {10, 20}, {255, 255, 255});
693   ///
694   ///     /* dataset is an instance of Dataset object */
695   ///     dataset = dataset->Map({decode_op, pad_to_size_op},  // operations
696   ///                            {"image"});                   // input columns
697   /// \endcode
698   explicit PadToSize(const std::vector<int32_t> &size, const std::vector<int32_t> &offset = {},
699                      const std::vector<uint8_t> &fill_value = {0}, BorderType padding_mode = BorderType::kConstant);
700 
701   /// \brief Destructor.
702   ~PadToSize() override = default;
703 
704  protected:
705   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
706   /// \return Shared pointer to TensorOperation object.
707   std::shared_ptr<TensorOperation> Parse() override;
708 
709  private:
710   struct Data;
711   std::shared_ptr<Data> data_;
712 };
713 
714 /// \brief Perform perspective transform on the image.
715 class DATASET_API Perspective final : public TensorTransform {
716  public:
717   /// \brief Constructor.
718   /// \param[in] start_points List containing four lists of two integers corresponding to four
719   ///     corners [top-left, top-right, bottom-right, bottom-left] of the original image.
720   /// \param[in] end_points List containing four lists of two integers corresponding to four
721   ///     corners [top-left, top-right, bottom-right, bottom-left] of the transformed image.
722   /// \param[in] interpolation An enum for the mode of interpolation. Default: InterpolationMode::kLinear.
723   ///     - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
724   ///     - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
725   ///     - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
726   ///     - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
727   ///     - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
728   /// \par Example
729   /// \code
730   ///     /* Define operations */
731   ///     auto decode_op = vision::Decode();
732   ///     std::vector<std::vector<int32_t>> start_points = {{0, 0}, {1, 0}, {1, 1}, {0, 1}};
733   ///     std::vector<std::vector<int32_t>> end_points = {{0, 2}, {2, 0}, {2, 2}, {0, 2}};
734   ///     auto perspective_op = vision::Perspective(start_points, end_points, InterpolationMode::kLinear);
735   ///
736   ///     /* dataset is an instance of Dataset object */
737   ///     dataset = dataset->Map({decode_op, perspective_op},  // operations
738   ///                            {"image"});                   // input columns
739   /// \endcode
740   Perspective(const std::vector<std::vector<int32_t>> &start_points,
741               const std::vector<std::vector<int32_t>> &end_points, InterpolationMode interpolation);
742 
743   /// \brief Destructor.
744   ~Perspective() override = default;
745 
746  protected:
747   /// \brief Function to convert TensorTransform object into a TensorOperation object.
748   /// \return Shared pointer to TensorOperation object.
749   std::shared_ptr<TensorOperation> Parse() override;
750 
751  private:
752   struct Data;
753   std::shared_ptr<Data> data_;
754 };
755 
756 /// \brief Posterize an image by reducing the number of bits for each color channel.
757 class DATASET_API Posterize final : public TensorTransform {
758  public:
759   /// \brief Constructor.
760   /// \param[in] bits The number of bits to keep for each channel,
761   ///     should be in range of [0, 8].
762   /// \par Example
763   /// \code
764   ///     /* Define operations */
765   ///     auto decode_op = vision::Decode();
766   ///     auto posterize_op = vision::Posterize(8);
767   ///
768   ///     /* dataset is an instance of Dataset object */
769   ///     dataset = dataset->Map({decode_op, posterize_op},  // operations
770   ///                            {"image"});                 // input columns
771   /// \endcode
772   explicit Posterize(uint8_t bits);
773 
774   /// \brief Destructor.
775   ~Posterize() override = default;
776 
777  protected:
778   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
779   /// \return Shared pointer to TensorOperation object.
780   std::shared_ptr<TensorOperation> Parse() override;
781 
782  private:
783   struct Data;
784   std::shared_ptr<Data> data_;
785 };
786 
787 /// \brief Apply RandAugment data augmentation method.
788 class DATASET_API RandAugment final : public TensorTransform {
789  public:
790   /// \brief Constructor.
791   /// \param[in] num_ops Number of augmentation transformations to apply sequentially. Default: 2.
792   /// \param[in] magnitude Magnitude for all the transformations. Default: 9.
793   /// \param[in] num_magnitude_bins The number of different magnitude values. Default: 31.
794   /// \param[in] interpolation An enum for the mode of interpolation. Default: InterpolationMode::kNearestNeighbour.
795   ///     - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
796   ///     - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
797   ///     - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
798   /// \param[in] fill_value A vector representing the pixel intensity of the borders. Default: {0, 0, 0}.
799   /// \par Example
800   /// \code
801   ///     /* Define operations */
802   ///     auto decode_op = vision::Decode();
803   ///     auto rand_augment_op = vision::RandAugment();
804   ///     /* dataset is an instance of Dataset object */
805   ///     dataset = dataset->Map({decode_op, rand_augment_op}, // operations
806   ///                            {"image"});                   // input columns
807   /// \endcode
808   explicit RandAugment(int32_t num_ops = 2, int32_t magnitude = 9, int32_t num_magnitude_bins = 31,
809                        InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
810                        const std::vector<uint8_t> &fill_value = {0, 0, 0});
811 
812   /// \brief Destructor.
813   ~RandAugment() override = default;
814 
815  protected:
816   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
817   /// \return Shared pointer to TensorOperation object.
818   std::shared_ptr<TensorOperation> Parse() override;
819 
820  private:
821   struct Data;
822   std::shared_ptr<Data> data_;
823 };
824 
825 /// \brief Automatically adjust the contrast of the image with a given probability.
826 class DATASET_API RandomAutoContrast final : public TensorTransform {
827  public:
828   /// \brief Constructor.
829   /// \param[in] cutoff Percent of the lightest and darkest pixels to be cut off from
830   ///     the histogram of the input image. The value must be in range of [0.0, 50.0) (default=0.0).
831   /// \param[in] ignore The background pixel values to be ignored, each of which must be
832   ///     in range of [0, 255] (default={}).
833   /// \param[in] prob A float representing the probability of AutoContrast, which must be
834   ///     in range of [0, 1] (default=0.5).
835   /// \par Example
836   /// \code
837   ///     /* Define operations */
838   ///     auto decode_op = vision::Decode();
839   ///     auto random_auto_contrast_op = vision::RandomAutoContrast(5.0);
840   ///
841   ///     /* dataset is an instance of Dataset object */
842   ///     dataset = dataset->Map({decode_op, random_auto_contrast_op},  // operations
843   ///                            {"image"});                            // input columns
844   /// \endcode
845   explicit RandomAutoContrast(float cutoff = 0.0, const std::vector<uint32_t> &ignore = {}, float prob = 0.5);
846 
847   /// \brief Destructor.
848   ~RandomAutoContrast() override = default;
849 
850  protected:
851   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
852   /// \return Shared pointer to TensorOperation object.
853   std::shared_ptr<TensorOperation> Parse() override;
854 
855  private:
856   struct Data;
857   std::shared_ptr<Data> data_;
858 };
859 
860 /// \brief Randomly adjust the sharpness of the input image with a given probability.
861 class DATASET_API RandomAdjustSharpness final : public TensorTransform {
862  public:
863   /// \brief Constructor.
864   /// \param[in] degree A float representing sharpness adjustment degree, which must be non negative.
865   /// \param[in] prob A float representing the probability of the image being sharpness adjusted, which
866   ///     must in range of [0, 1] (default=0.5).
867   /// \par Example
868   /// \code
869   ///     /* Define operations */
870   ///     auto decode_op = vision::Decode();
871   ///     auto random_adjust_sharpness_op = vision::RandomAdjustSharpness(30.0);
872   ///
873   ///     /* dataset is an instance of Dataset object */
874   ///     dataset = dataset->Map({decode_op, random_adjust_sharpness_op},  // operations
875   ///                            {"image"});                               // input columns
876   /// \endcode
877   explicit RandomAdjustSharpness(float degree, float prob = 0.5);
878 
879   /// \brief Destructor.
880   ~RandomAdjustSharpness() override = default;
881 
882  protected:
883   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
884   /// \return Shared pointer to TensorOperation object.
885   std::shared_ptr<TensorOperation> Parse() override;
886 
887  private:
888   struct Data;
889   std::shared_ptr<Data> data_;
890 };
891 
892 /// \brief Blend an image with its grayscale version with random weights
893 ///        t and 1 - t generated from a given range. If the range is trivial
894 ///        then the weights are determinate and t equals to the bound of the interval.
895 class DATASET_API RandomColor final : public TensorTransform {
896  public:
897   /// \brief Constructor.
898   /// \param[in] t_lb Lower bound random weights.
899   /// \param[in] t_ub Upper bound random weights.
900   /// \par Example
901   /// \code
902   ///     /* Define operations */
903   ///     auto decode_op = vision::Decode();
904   ///     auto random_color_op = vision::RandomColor(5.0, 50.0);
905   ///
906   ///     /* dataset is an instance of Dataset object */
907   ///     dataset = dataset->Map({decode_op, random_color_op},  // operations
908   ///                            {"image"});                    // input columns
909   /// \endcode
910   RandomColor(float t_lb, float t_ub);
911 
912   /// \brief Destructor.
913   ~RandomColor() override = default;
914 
915  protected:
916   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
917   /// \return Shared pointer to TensorOperation object.
918   std::shared_ptr<TensorOperation> Parse() override;
919 
920  private:
921   struct Data;
922   std::shared_ptr<Data> data_;
923 };
924 
925 /// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image.
926 class DATASET_API RandomColorAdjust final : public TensorTransform {
927  public:
928   /// \brief Constructor.
929   /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
930   ///     if it is a vector of two values it needs to be in the form of [min, max] (Default={1, 1}).
931   /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
932   ///     if it is a vector of two values, it needs to be in the form of [min, max] (Default={1, 1}).
933   /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
934   ///     if it is a vector of two values, it needs to be in the form of [min, max] (Default={1, 1}).
935   /// \param[in] hue Hue adjustment factor. Must be a vector of one or two values
936   ///     if it is a vector of two values, it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
937   ///     (Default={0, 0}).
938   /// \par Example
939   /// \code
940   ///     /* Define operations */
941   ///     auto decode_op = vision::Decode();
942   ///     auto random_color_adjust_op = vision::RandomColorAdjust({1.0, 5.0}, {10.0, 20.0}, {40.0, 40.0});
943   ///
944   ///     /* dataset is an instance of Dataset object */
945   ///     dataset = dataset->Map({decode_op, random_color_adjust_op},  // operations
946   ///                            {"image"});                           // input columns
947   /// \endcode
948   explicit RandomColorAdjust(const std::vector<float> &brightness = {1.0, 1.0},
949                              const std::vector<float> &contrast = {1.0, 1.0},
950                              const std::vector<float> &saturation = {1.0, 1.0},
951                              const std::vector<float> &hue = {0.0, 0.0});
952 
953   /// \brief Destructor.
954   ~RandomColorAdjust() override = default;
955 
956  protected:
957   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
958   /// \return Shared pointer to TensorOperation object.
959   std::shared_ptr<TensorOperation> Parse() override;
960 
961  private:
962   struct Data;
963   std::shared_ptr<Data> data_;
964 };
965 
966 /// \brief Crop the input image at a random location.
967 class DATASET_API RandomCrop final : public TensorTransform {
968  public:
969   /// \brief Constructor.
970   /// \param[in] size A vector representing the output size of the cropped image.
971   ///     If the size is a single value, a squared crop of size (size, size) is returned.
972   ///     If the size has 2 values, it should be (height, width).
973   /// \param[in] padding A vector representing the number of pixels to pad the image.
974   ///    If the vector has one value, it pads all sides of the image with that value.
975   ///    If the vector has two values, it pads left and right with the first and
976   ///    top and bottom with the second value.
977   ///    If the vector has four values, it pads left, top, right, and bottom with
978   ///    those values respectively.
979   /// \param[in] pad_if_needed A boolean indicating that whether to pad the image
980   ///    if either side is smaller than the given output size.
981   /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
982   ///     BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
983   ///     If 3 values are provided, it is used to fill R, G, B channels respectively.
984   /// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of
985   ///     [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric].
986   ///   - BorderType::kConstant, Fill the border with constant values.
987   ///   - BorderType::kEdge, Fill the border with the last value on the edge.
988   ///   - BorderType::kReflect, Reflect the values on the edge omitting the last value of edge.
989   ///   - BorderType::kSymmetric, Reflect the values on the edge repeating the last value of edge.
990   /// \note If the input image is more than one, then make sure that the image size is the same.
991   /// \par Example
992   /// \code
993   ///     /* Define operations */
994   ///     auto decode_op = vision::Decode();
995   ///     auto random_crop_op = vision::RandomCrop({255, 255}, {10, 10, 10, 10});
996   ///
997   ///     /* dataset is an instance of Dataset object */
998   ///     dataset = dataset->Map({decode_op, random_crop_op},  // operations
999   ///                            {"image"});                   // input columns
1000   /// \endcode
1001   explicit RandomCrop(const std::vector<int32_t> &size, const std::vector<int32_t> &padding = {0, 0, 0, 0},
1002                       bool pad_if_needed = false, const std::vector<uint8_t> &fill_value = {0, 0, 0},
1003                       BorderType padding_mode = BorderType::kConstant);
1004 
1005   /// \brief Destructor.
1006   ~RandomCrop() override = default;
1007 
1008  protected:
1009   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1010   /// \return Shared pointer to TensorOperation object.
1011   std::shared_ptr<TensorOperation> Parse() override;
1012 
1013  private:
1014   struct Data;
1015   std::shared_ptr<Data> data_;
1016 };
1017 
1018 /// \brief Equivalent to RandomResizedCrop TensorTransform, but crop the image before decoding.
1019 class DATASET_API RandomCropDecodeResize final : public TensorTransform {
1020  public:
1021   /// \brief Constructor.
1022   /// \param[in] size A vector representing the output size of the cropped image.
1023   ///               If the size is a single value, a squared crop of size (size, size) is returned.
1024   ///               If the size has 2 values, it should be (height, width).
1025   /// \param[in] scale Range [min, max) of respective size of the
1026   ///               original size to be cropped (default=(0.08, 1.0)).
1027   /// \param[in] ratio Range [min, max) of aspect ratio to be
1028   ///               cropped (default=(3. / 4., 4. / 3.)).
1029   /// \param[in] interpolation An enum for the mode of interpolation.
1030   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
1031   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
1032   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
1033   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
1034   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
1035   /// \param[in] max_attempts The maximum number of attempts to propose a valid crop_area (default=10).
1036   ///               If exceeded, fall back to use center_crop instead.
1037   /// \par Example
1038   /// \code
1039   ///     /* Define operations */
1040   ///     auto random_op = vision::RandomCropDecodeResize({255, 255}, {0.1, 0.5});
1041   ///
1042   ///     /* dataset is an instance of Dataset object */
1043   ///     dataset = dataset->Map({random_op},  // operations
1044   ///                            {"image"});              // input columns
1045   /// \endcode
1046   explicit RandomCropDecodeResize(const std::vector<int32_t> &size, const std::vector<float> &scale = {0.08, 1.0},
1047                                   const std::vector<float> &ratio = {3. / 4., 4. / 3.},
1048                                   InterpolationMode interpolation = InterpolationMode::kLinear,
1049                                   int32_t max_attempts = 10);
1050 
1051   /// \brief Destructor.
1052   ~RandomCropDecodeResize() override = default;
1053 
1054  protected:
1055   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1056   /// \return Shared pointer to TensorOperation object.
1057   std::shared_ptr<TensorOperation> Parse() override;
1058 
1059  private:
1060   struct Data;
1061   std::shared_ptr<Data> data_;
1062 };
1063 
1064 /// \brief Crop the input image at a random location and adjust bounding boxes accordingly.
1065 ///        If the cropped area is out of bbox, the returned bbox will be empty.
1066 class DATASET_API RandomCropWithBBox final : public TensorTransform {
1067  public:
1068   /// \brief Constructor.
1069   /// \param[in] size A vector representing the output size of the cropped image.
1070   ///     If the size is a single value, a squared crop of size (size, size) is returned.
1071   ///     If the size has 2 values, it should be (height, width).
1072   /// \param[in] padding A vector representing the number of pixels to pad the image
1073   ///    If the vector has one value, it pads all sides of the image with that value.
1074   ///    If the vector has two values, it pads left and right with the first and
1075   ///    top and bottom with the second value.
1076   ///    If the vector has four values, it pads left, top, right, and bottom with
1077   ///    those values respectively.
1078   /// \param[in] pad_if_needed A boolean indicating that whether to pad the image
1079   ///    if either side is smaller than the given output size.
1080   /// \param[in] fill_value A vector representing the pixel intensity of the borders. Only valid
1081   ///    if the padding_mode is BorderType.kConstant. If 1 value is provided, it is used for all
1082   ///    RGB channels. If 3 values are provided, it is used to fill R, G, B channels respectively.
1083   /// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of
1084   ///     [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric].
1085   ///   - BorderType::kConstant, Fill the border with constant values.
1086   ///   - BorderType::kEdge, Fill the border with the last value on the edge.
1087   ///   - BorderType::kReflect, Reflect the values on the edge omitting the last value of edge.
1088   ///   - BorderType::kSymmetric, Reflect the values on the edge repeating the last value of edge.
1089   /// \par Example
1090   /// \code
1091   ///     /* Define operations */
1092   ///     auto random_op = vision::RandomCropWithBBox({224, 224}, {0, 0, 0, 0});
1093   ///
1094   ///     /* dataset is an instance of Dataset object */
1095   ///     dataset = dataset->Map({random_op},             // operations
1096   ///                            {"image", "bbox"});      // input columns
1097   /// \endcode
1098   explicit RandomCropWithBBox(const std::vector<int32_t> &size, const std::vector<int32_t> &padding = {0, 0, 0, 0},
1099                               bool pad_if_needed = false, const std::vector<uint8_t> &fill_value = {0, 0, 0},
1100                               BorderType padding_mode = BorderType::kConstant);
1101 
1102   /// \brief Destructor.
1103   ~RandomCropWithBBox() override = default;
1104 
1105  protected:
1106   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1107   /// \return Shared pointer to TensorOperation object.
1108   std::shared_ptr<TensorOperation> Parse() override;
1109 
1110  private:
1111   struct Data;
1112   std::shared_ptr<Data> data_;
1113 };
1114 
1115 /// \brief Randomly apply histogram equalization on the input image with a given probability.
1116 class DATASET_API RandomEqualize final : public TensorTransform {
1117  public:
1118   /// \brief Constructor.
1119   /// \param[in] prob A float representing the probability of equalization, which
1120   ///     must be in range of [0, 1] (default=0.5).
1121   /// \par Example
1122   /// \code
1123   ///     /* Define operations */
1124   ///     auto decode_op = vision::Decode();
1125   ///     auto random_op = vision::RandomEqualize(0.5);
1126   ///
1127   ///     /* dataset is an instance of Dataset object */
1128   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1129   ///                            {"image"});              // input columns
1130   /// \endcode
1131   explicit RandomEqualize(float prob = 0.5);
1132 
1133   /// \brief Destructor.
1134   ~RandomEqualize() override = default;
1135 
1136  protected:
1137   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1138   /// \return Shared pointer to TensorOperation object.
1139   std::shared_ptr<TensorOperation> Parse() override;
1140 
1141  private:
1142   struct Data;
1143   std::shared_ptr<Data> data_;
1144 };
1145 
1146 /// \brief Randomly flip the input image horizontally with a given probability.
1147 class DATASET_API RandomHorizontalFlip final : public TensorTransform {
1148  public:
1149   /// \brief Constructor.
1150   /// \param[in] prob A float representing the probability of flip.
1151   /// \par Example
1152   /// \code
1153   ///     /* Define operations */
1154   ///     auto decode_op = vision::Decode();
1155   ///     auto random_op = vision::RandomHorizontalFlip(0.8);
1156   ///
1157   ///     /* dataset is an instance of Dataset object */
1158   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1159   ///                            {"image"});              // input columns
1160   /// \endcode
1161   explicit RandomHorizontalFlip(float prob = 0.5);
1162 
1163   /// \brief Destructor.
1164   ~RandomHorizontalFlip() override = default;
1165 
1166  protected:
1167   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1168   /// \return Shared pointer to TensorOperation object.
1169   std::shared_ptr<TensorOperation> Parse() override;
1170 
1171  private:
1172   struct Data;
1173   std::shared_ptr<Data> data_;
1174 };
1175 
1176 /// \brief Randomly flip the input image horizontally with a given probability and adjust bounding boxes accordingly.
1177 class DATASET_API RandomHorizontalFlipWithBBox final : public TensorTransform {
1178  public:
1179   /// \brief Constructor.
1180   /// \param[in] prob A float representing the probability of flip.
1181   /// \par Example
1182   /// \code
1183   ///     /* Define operations */
1184   ///     auto random_op = vision::RandomHorizontalFlipWithBBox(1.0);
1185   ///
1186   ///     /* dataset is an instance of Dataset object */
1187   ///     dataset = dataset->Map({random_op},             // operations
1188   ///                            {"image", "bbox"});      // input columns
1189   /// \endcode
1190   explicit RandomHorizontalFlipWithBBox(float prob = 0.5);
1191 
1192   /// \brief Destructor.
1193   ~RandomHorizontalFlipWithBBox() override = default;
1194 
1195  protected:
1196   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1197   /// \return Shared pointer to TensorOperation object.
1198   std::shared_ptr<TensorOperation> Parse() override;
1199 
1200  private:
1201   struct Data;
1202   std::shared_ptr<Data> data_;
1203 };
1204 
1205 /// \brief Randomly invert the input image with a given probability.
1206 class DATASET_API RandomInvert final : public TensorTransform {
1207  public:
1208   /// \brief Constructor.
1209   /// \param[in] prob A float representing the probability of the image being inverted, which
1210   ///     must be in range of [0, 1] (default=0.5).
1211   /// \par Example
1212   /// \code
1213   ///     /* Define operations */
1214   ///     auto decode_op = vision::Decode();
1215   ///     auto random_op = vision::RandomInvert(0.8);
1216   ///
1217   ///     /* dataset is an instance of Dataset object */
1218   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1219   ///                            {"image"});              // input columns
1220   /// \endcode
1221   explicit RandomInvert(float prob = 0.5);
1222 
1223   /// \brief Destructor.
1224   ~RandomInvert() override = default;
1225 
1226  protected:
1227   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1228   /// \return Shared pointer to TensorOperation object.
1229   std::shared_ptr<TensorOperation> Parse() override;
1230 
1231  private:
1232   struct Data;
1233   std::shared_ptr<Data> data_;
1234 };
1235 
1236 /// \brief Add AlexNet-style PCA-based noise to an image.
1237 class DATASET_API RandomLighting final : public TensorTransform {
1238  public:
1239   /// \brief Constructor.
1240   /// \param[in] alpha A float representing the intensity of the image (default=0.05).
1241   /// \par Example
1242   /// \code
1243   ///     /* Define operations */
1244   ///     auto decode_op = vision::Decode();
1245   ///     auto random_op = vision::RandomLighting(0.1);
1246   ///
1247   ///     /* dataset is an instance of Dataset object */
1248   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1249   ///                            {"image"});              // input columns
1250   /// \endcode
1251   explicit RandomLighting(float alpha = 0.05);
1252 
1253   /// \brief Destructor.
1254   ~RandomLighting() override = default;
1255 
1256  protected:
1257   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1258   /// \return Shared pointer to TensorOperation object.
1259   std::shared_ptr<TensorOperation> Parse() override;
1260 
1261  private:
1262   struct Data;
1263   std::shared_ptr<Data> data_;
1264 };
1265 
1266 /// \brief Reduce the number of bits for each color channel randomly.
1267 class DATASET_API RandomPosterize final : public TensorTransform {
1268  public:
1269   /// \brief Constructor.
1270   /// \param[in] bit_range Range of random posterize to compress image.
1271   ///     uint8_t vector representing the minimum and maximum bit in range of [1,8] (Default={4, 8}).
1272   /// \par Example
1273   /// \code
1274   ///     /* Define operations */
1275   ///     auto decode_op = vision::Decode();
1276   ///     auto random_op = vision::RandomPosterize({4, 8});
1277   ///
1278   ///     /* dataset is an instance of Dataset object */
1279   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1280   ///                            {"image"});              // input columns
1281   /// \endcode
1282   explicit RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});
1283 
1284   /// \brief Destructor.
1285   ~RandomPosterize() override = default;
1286 
1287  protected:
1288   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1289   /// \return Shared pointer to TensorOperation object.
1290   std::shared_ptr<TensorOperation> Parse() override;
1291 
1292  private:
1293   struct Data;
1294   std::shared_ptr<Data> data_;
1295 };
1296 
1297 /// \brief Resize the input image using a randomly selected interpolation mode.
1298 class DATASET_API RandomResize final : public TensorTransform {
1299  public:
1300   /// \brief Constructor.
1301   /// \param[in] size A vector representing the output size of the resized image.
1302   ///     If the size is a single value, the smaller edge of the image will be resized to this value with
1303   ///      the same image aspect ratio. If the size has 2 values, it should be (height, width).
1304   /// \par Example
1305   /// \code
1306   ///     /* Define operations */
1307   ///     auto decode_op = vision::Decode();
1308   ///     auto random_op = vision::RandomResize({32, 32});
1309   ///
1310   ///     /* dataset is an instance of Dataset object */
1311   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1312   ///                            {"image"});              // input columns
1313   /// \endcode
1314   explicit RandomResize(const std::vector<int32_t> &size);
1315 
1316   /// \brief Destructor.
1317   ~RandomResize() override = default;
1318 
1319  protected:
1320   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1321   /// \return Shared pointer to TensorOperation object.
1322   std::shared_ptr<TensorOperation> Parse() override;
1323 
1324  private:
1325   struct Data;
1326   std::shared_ptr<Data> data_;
1327 };
1328 
1329 /// \brief Resize the input image using a randomly selected interpolation mode and adjust
1330 ///     bounding boxes accordingly.
1331 class DATASET_API RandomResizeWithBBox final : public TensorTransform {
1332  public:
1333   /// \brief Constructor.
1334   /// \param[in] size A vector representing the output size of the resized image.
1335   ///     If the size is a single value, the smaller edge of the image will be resized to this value with
1336   ///      the same image aspect ratio. If the size has 2 values, it should be (height, width).
1337   /// \par Example
1338   /// \code
1339   ///     /* Define operations */
1340   ///     auto random_op = vision::RandomResizeWithBBox({50, 50});
1341   ///
1342   ///     /* dataset is an instance of Dataset object */
1343   ///     dataset = dataset->Map({random_op},             // operations
1344   ///                            {"image", "bbox"});      // input columns
1345   /// \endcode
1346   explicit RandomResizeWithBBox(const std::vector<int32_t> &size);
1347 
1348   /// \brief Destructor.
1349   ~RandomResizeWithBBox() override = default;
1350 
1351  protected:
1352   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1353   /// \return Shared pointer to TensorOperation object.
1354   std::shared_ptr<TensorOperation> Parse() override;
1355 
1356  private:
1357   struct Data;
1358   std::shared_ptr<Data> data_;
1359 };
1360 
1361 /// \brief Crop the input image to a random size and aspect ratio.
1362 class DATASET_API RandomResizedCrop final : public TensorTransform {
1363  public:
1364   /// \brief Constructor.
1365   /// \param[in] size A vector representing the output size of the cropped image.
1366   ///     If the size is a single value, a squared crop of size (size, size) is returned.
1367   ///     If the size has 2 values, it should be (height, width).
1368   /// \param[in] scale Range [min, max) of respective size of the original
1369   ///     size to be cropped (default=(0.08, 1.0)).
1370   /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
1371   ///     (default=(3. / 4., 4. / 3.)).
1372   /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear).
1373   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
1374   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
1375   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
1376   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
1377   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
1378   /// \param[in] max_attempts The maximum number of attempts to propose a valid.
1379   ///     crop_area (default=10). If exceeded, fall back to use center_crop instead.
1380   /// \note If the input image is more than one, then make sure that the image size is the same.
1381   /// \par Example
1382   /// \code
1383   ///     /* Define operations */
1384   ///     auto decode_op = vision::Decode();
1385   ///     auto random_op = vision::RandomResizedCrop({32, 32}, {0.08, 1.0});
1386   ///
1387   ///     /* dataset is an instance of Dataset object */
1388   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1389   ///                            {"image"});              // input columns
1390   /// \endcode
1391   explicit RandomResizedCrop(const std::vector<int32_t> &size, const std::vector<float> &scale = {0.08, 1.0},
1392                              const std::vector<float> &ratio = {3. / 4., 4. / 3.},
1393                              InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
1394 
1395   /// \brief Destructor.
1396   ~RandomResizedCrop() override = default;
1397 
1398  protected:
1399   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1400   /// \return Shared pointer to TensorOperation object.
1401   std::shared_ptr<TensorOperation> Parse() override;
1402 
1403  private:
1404   struct Data;
1405   std::shared_ptr<Data> data_;
1406 };
1407 
1408 /// \brief Crop the input image to a random size and aspect ratio.
1409 ///        If cropped area is out of bbox, the return bbox will be empty.
1410 class DATASET_API RandomResizedCropWithBBox final : public TensorTransform {
1411  public:
1412   /// \brief Constructor.
1413   /// \param[in] size A vector representing the output size of the cropped image.
1414   ///     If the size is a single value, a squared crop of size (size, size) is returned.
1415   ///     If the size has 2 values, it should be (height, width).
1416   /// \param[in] scale Range [min, max) of respective size of the original
1417   ///     size to be cropped (default=(0.08, 1.0)).
1418   /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
1419   ///     (default=(3. / 4., 4. / 3.)).
1420   /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear).
1421   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
1422   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
1423   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
1424   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
1425   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
1426   /// \param[in] max_attempts The maximum number of attempts to propose a valid
1427   ///     crop_area (default=10). If exceeded, fall back to use center_crop instead.
1428   /// \par Example
1429   /// \code
1430   ///     /* Define operations */
1431   ///     auto random_op = vision::RandomResizedCropWithBBox({50, 50}, {0.05, 0.5}, {0.2, 0.4},
1432   ///                                                        InterpolationMode::kCubic);
1433   ///
1434   ///     /* dataset is an instance of Dataset object */
1435   ///     dataset = dataset->Map({random_op},             // operations
1436   ///                            {"image", "bbox"});      // input columns
1437   /// \endcode
1438   explicit RandomResizedCropWithBBox(const std::vector<int32_t> &size, const std::vector<float> &scale = {0.08, 1.0},
1439                                      const std::vector<float> &ratio = {3. / 4., 4. / 3.},
1440                                      InterpolationMode interpolation = InterpolationMode::kLinear,
1441                                      int32_t max_attempts = 10);
1442 
1443   /// \brief Destructor.
1444   ~RandomResizedCropWithBBox() override = default;
1445 
1446  protected:
1447   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1448   /// \return Shared pointer to TensorOperation object.
1449   std::shared_ptr<TensorOperation> Parse() override;
1450 
1451  private:
1452   struct Data;
1453   std::shared_ptr<Data> data_;
1454 };
1455 
1456 /// \brief Rotate the image according to parameters.
1457 class DATASET_API RandomRotation final : public TensorTransform {
1458  public:
1459   /// \brief Constructor.
1460   /// \param[in] degrees A float vector of size 2, representing the starting and ending degrees.
1461   /// \param[in] resample An enum for the mode of interpolation.
1462   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
1463   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
1464   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
1465   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
1466   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
1467   /// \param[in] expand A boolean representing whether the image is expanded after rotation.
1468   /// \param[in] center A float vector of size 2 or empty, representing the x and y center of rotation
1469   ///     or the center of the image.
1470   /// \param[in] fill_value A vector representing the value to fill the area outside the transform
1471   ///    in the output image. If 1 value is provided, it is used for all RGB channels.
1472   ///    If 3 values are provided, it is used to fill R, G, B channels respectively.
1473   /// \par Example
1474   /// \code
1475   ///     /* Define operations */
1476   ///     auto decode_op = vision::Decode();
1477   ///     auto random_op = vision::RandomRotation({30, 60}, InterpolationMode::kNearestNeighbour);
1478   ///
1479   ///     /* dataset is an instance of Dataset object */
1480   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1481   ///                            {"image"});              // input columns
1482   /// \endcode
1483   explicit RandomRotation(const std::vector<float> &degrees,
1484                           InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false,
1485                           const std::vector<float> &center = {}, const std::vector<uint8_t> &fill_value = {0, 0, 0});
1486 
1487   /// \brief Destructor.
1488   ~RandomRotation() override = default;
1489 
1490  protected:
1491   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1492   /// \return Shared pointer to TensorOperation object.
1493   std::shared_ptr<TensorOperation> Parse() override;
1494 
1495  private:
1496   struct Data;
1497   std::shared_ptr<Data> data_;
1498 };
1499 
1500 /// \brief Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples
1501 ///     (operation, prob), where operation is a TensorTransform operation and prob is the probability that this
1502 ///     operation will be applied. Once a sub-policy is selected, each operation within the sub-policy with be
1503 ///     applied in sequence according to its probability.
1504 class DATASET_API RandomSelectSubpolicy final : public TensorTransform {
1505  public:
1506   /// \brief Constructor.
1507   /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are raw pointers.
1508   /// \par Example
1509   /// \code
1510   ///     /* Define operations */
1511   ///     auto invert_op(new vision::Invert());
1512   ///     auto equalize_op(new vision::Equalize());
1513   ///
1514   ///     std::vector<std::pair<TensorTransform *, double>> policy = {{invert_op, 0.5}, {equalize_op, 0.4}};
1515   ///     vision::RandomSelectSubpolicy random_select_subpolicy_op = vision::RandomSelectSubpolicy({policy});
1516   ///
1517   ///     /* dataset is an instance of Dataset object */
1518   ///     dataset = dataset->Map({random_select_subpolicy_op},    // operations
1519   ///                            {"image"});                      // input columns
1520   /// \endcode
1521   explicit RandomSelectSubpolicy(const std::vector<std::vector<std::pair<TensorTransform *, double>>> &policy);
1522 
1523   /// \brief Constructor.
1524   /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are shared pointers.
1525   /// \par Example
1526   /// \code
1527   ///     /* Define operations */
1528   ///     std::shared_ptr<TensorTransform> invert_op(new vision::Invert());
1529   ///     std::shared_ptr<TensorTransform> equalize_op(new vision::Equalize());
1530   ///     std::shared_ptr<TensorTransform> resize_op(new vision::Resize({15, 15}));
1531   ///
1532   ///     auto random_select_subpolicy_op = vision::RandomSelectSubpolicy({
1533   ///                                          {{invert_op, 0.5}, {equalize_op, 0.4}},
1534   ///                                          {{resize_op, 0.1}}
1535   ///                                       });
1536   ///
1537   ///     /* dataset is an instance of Dataset object */
1538   ///     dataset = dataset->Map({random_select_subpolicy_op},    // operations
1539   ///                            {"image"});                      // input columns
1540   /// \endcode
1541   explicit RandomSelectSubpolicy(
1542     const std::vector<std::vector<std::pair<std::shared_ptr<TensorTransform>, double>>> &policy);
1543 
1544   /// \brief Constructor.
1545   /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are object pointers.
1546   /// \par Example
1547   /// \code
1548   ///     /* Define operations */
1549   ///     vision::Invert invert_op = vision::Invert();
1550   ///     vision::Equalize equalize_op = vision::Equalize();
1551   ///     vision::Resize resize_op = vision::Resize({15, 15});
1552   ///
1553   ///     auto random_select_subpolicy_op = vision::RandomSelectSubpolicy({
1554   ///                                          {{invert_op, 0.5}, {equalize_op, 0.4}},
1555   ///                                          {{resize_op, 0.1}}
1556   ///                                       });
1557   ///
1558   ///     /* dataset is an instance of Dataset object */
1559   ///     dataset = dataset->Map({random_select_subpolicy_op},    // operations
1560   ///                            {"image"});                      // input columns
1561   /// \endcode
1562   explicit RandomSelectSubpolicy(
1563     const std::vector<std::vector<std::pair<std::reference_wrapper<TensorTransform>, double>>> &policy);
1564 
1565   /// \brief Destructor.
1566   ~RandomSelectSubpolicy() override = default;
1567 
1568  protected:
1569   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1570   /// \return Shared pointer to TensorOperation object.
1571   std::shared_ptr<TensorOperation> Parse() override;
1572 
1573  private:
1574   struct Data;
1575   std::shared_ptr<Data> data_;
1576 };
1577 
1578 /// \brief Adjust the sharpness of the input image by a fixed or random degree.
1579 class DATASET_API RandomSharpness final : public TensorTransform {
1580  public:
1581   /// \brief Constructor.
1582   /// \param[in] degrees A float vector of size 2, representing the range of random sharpness
1583   ///     adjustment degrees. It should be in (min, max) format. If min=max, then it is a
1584   ///     single fixed magnitude operation (default = (0.1, 1.9)).
1585   /// \par Example
1586   /// \code
1587   ///     /* Define operations */
1588   ///     auto decode_op = vision::Decode();
1589   ///     auto random_op = vision::RandomSharpness({0.1, 1.5});
1590   ///
1591   ///     /* dataset is an instance of Dataset object */
1592   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1593   ///                            {"image"});              // input columns
1594   /// \endcode
1595   explicit RandomSharpness(const std::vector<float> &degrees = {0.1, 1.9});
1596 
1597   /// \brief Destructor.
1598   ~RandomSharpness() override = default;
1599 
1600  protected:
1601   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1602   /// \return Shared pointer to TensorOperation object.
1603   std::shared_ptr<TensorOperation> Parse() override;
1604 
1605  private:
1606   struct Data;
1607   std::shared_ptr<Data> data_;
1608 };
1609 
1610 /// \brief Invert pixels randomly within a specified range.
1611 class DATASET_API RandomSolarize final : public TensorTransform {
1612  public:
1613   /// \brief Constructor.
1614   /// \param[in] threshold A vector with two elements specifying the pixel range to invert.
1615   ///     Threshold values should always be in (min, max) format.
1616   ///     If min=max, it will to invert all pixels above min(max).
1617   /// \par Example
1618   /// \code
1619   ///     /* Define operations */
1620   ///     auto decode_op = vision::Decode();
1621   ///     auto random_op = vision::RandomSharpness({0, 255});
1622   ///
1623   ///     /* dataset is an instance of Dataset object */
1624   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1625   ///                            {"image"});              // input columns
1626   /// \endcode
1627   explicit RandomSolarize(const std::vector<uint8_t> &threshold = {0, 255});
1628 
1629   /// \brief Destructor.
1630   ~RandomSolarize() override = default;
1631 
1632  protected:
1633   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1634   /// \return Shared pointer to TensorOperation object.
1635   std::shared_ptr<TensorOperation> Parse() override;
1636 
1637  private:
1638   struct Data;
1639   std::shared_ptr<Data> data_;
1640 };
1641 
1642 /// \brief Randomly flip the input image vertically with a given probability.
1643 class DATASET_API RandomVerticalFlip final : public TensorTransform {
1644  public:
1645   /// \brief Constructor.
1646   /// \param[in] prob A float representing the probability of flip.
1647   /// \par Example
1648   /// \code
1649   ///     /* Define operations */
1650   ///     auto decode_op = vision::Decode();
1651   ///     auto random_op = vision::RandomVerticalFlip();
1652   ///
1653   ///     /* dataset is an instance of Dataset object */
1654   ///     dataset = dataset->Map({decode_op, random_op},  // operations
1655   ///                            {"image"});              // input columns
1656   /// \endcode
1657   explicit RandomVerticalFlip(float prob = 0.5);
1658 
1659   /// \brief Destructor.
1660   ~RandomVerticalFlip() override = default;
1661 
1662  protected:
1663   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1664   /// \return Shared pointer to TensorOperation object.
1665   std::shared_ptr<TensorOperation> Parse() override;
1666 
1667  private:
1668   struct Data;
1669   std::shared_ptr<Data> data_;
1670 };
1671 
1672 /// \brief Randomly flip the input image vertically with a given probability and adjust bounding boxes accordingly.
1673 class DATASET_API RandomVerticalFlipWithBBox final : public TensorTransform {
1674  public:
1675   /// \brief Constructor.
1676   /// \param[in] prob A float representing the probability of flip.
1677   /// \par Example
1678   /// \code
1679   ///     /* Define operations */
1680   ///     auto random_op = vision::RandomVerticalFlipWithBBox();
1681   ///
1682   ///     /* dataset is an instance of Dataset object */
1683   ///     dataset = dataset->Map({random_op},             // operations
1684   ///                            {"image", "bbox"});      // input columns
1685   /// \endcode
1686   explicit RandomVerticalFlipWithBBox(float prob = 0.5);
1687 
1688   /// \brief Destructor.
1689   ~RandomVerticalFlipWithBBox() override = default;
1690 
1691  protected:
1692   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1693   /// \return Shared pointer to TensorOperation object.
1694   std::shared_ptr<TensorOperation> Parse() override;
1695 
1696  private:
1697   struct Data;
1698   std::shared_ptr<Data> data_;
1699 };
1700 
1701 /// \brief Reads a file in binary mode.
1702 /// \param[in] filename The path to the file to be read.
1703 /// \param[out] output The binary data.
1704 /// \return The status code.
1705 Status DATASET_API ReadFile(const std::string &filename, mindspore::MSTensor *output);
1706 
1707 /// \brief Read a image file and decode it into one or three channels data.
1708 /// \param[in] filename The path to the file to be read.
1709 /// \param[out] output The Tensor data.
1710 /// \param[in] mode The read mode used for optionally converting the image, can be one of
1711 ///    [ImageReadMode::kUNCHANGED, ImageReadMode::kGRAYSCALE, ImageReadMode::kCOLOR]. Default:
1712 ///    ImageReadMode::kUNCHANGED.
1713 ///    - ImageReadMode::kUNCHANGED, remain the output in the original format.
1714 ///    - ImageReadMode::kGRAYSCALE, convert the output into one channel grayscale data.
1715 ///    - ImageReadMode::kCOLOR, convert the output into three channels RGB color data.
1716 /// \return The status code.
1717 Status DATASET_API ReadImage(const std::string &filename, mindspore::MSTensor *output,
1718                              ImageReadMode mode = ImageReadMode::kUNCHANGED);
1719 
1720 /// \brief Read the video, audio, metadata from a video file. It supports AVI, H264, H265, MOV, MP4, WMV file formats.
1721 /// \param[in] filename The path to the videoe file to be read.
1722 /// \param[out] video_output The video frames of the video file.
1723 /// \param[out] audio_output The audio frames of the video file.
1724 /// \param[out] metadata_output The metadata contains video_fps, audio_fps.
1725 /// \param[in] start_pts The start presentation timestamp of the video. Default: 0.0.
1726 /// \param[in] end_pts The end presentation timestamp of the video. Default: 2147483647.0.
1727 /// \param[in] pts_unit The unit for the timestamps, can be one of ["pts", "sec"]. Default: "pts".
1728 /// \return The status code.
1729 Status DATASET_API ReadVideo(const std::string &filename, mindspore::MSTensor *video_output,
1730                              mindspore::MSTensor *audio_output, std::map<std::string, std::string> *metadata_output,
1731                              float start_pts = 0.0, float end_pts = 2147483647.0, const std::string &pts_unit = "pts");
1732 
1733 /// \brief Read the timestamps and frame rate of a video file. It supports AVI, H264, H265, MOV, MP4, WMV files.
1734 /// \param[in] filename The path to the videoe file to be read.
1735 /// \param[out] output The tuple(video_timestamps, video_fps) of the video.
1736 /// \param[in] pts_unit The unit for the timestamps, can be one of ["pts", "sec"]. Default: "pts".
1737 /// \return The status code.
1738 Status DATASET_API ReadVideoTimestamps(const std::string &filename, std::tuple<std::vector<float>, float> *output,
1739                                        const std::string &pts_unit = "pts");
1740 
1741 /// \brief Crop the given image and zoom to the specified size.
1742 class DATASET_API ResizedCrop final : public TensorTransform {
1743  public:
1744   /// \brief Constructor.
1745   /// \param[in] top Horizontal ordinate of the upper left corner of the crop image.
1746   /// \param[in] left Vertical ordinate of the upper left corner of the crop image.
1747   /// \param[in] height Height of cropped image.
1748   /// \param[in] width Width of cropped image.
1749   /// \param[in] size A vector representing the output size of the image.
1750   ///     If the size is a single value, a squared resized of size (size, size) is returned.
1751   ///     If the size has 2 values, it should be (height, width).
1752   /// \param[in] interpolation Image interpolation mode. Default: InterpolationMode::kLinear.
1753   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
1754   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
1755   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
1756   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
1757   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
1758   /// \note If the input image is more than one, then make sure that the image size is the same.
1759   /// \par Example
1760   /// \code
1761   ///     /* Define operations */
1762   ///     auto decode_op = vision::Decode();
1763   ///     auto resized_crop_op = vision::ResizedCrop(128, 128, 256, 256, {128, 128});
1764   ///
1765   ///     /* dataset is an instance of Dataset object */
1766   ///     dataset = dataset->Map({decode_op, resized_crop_op},  // operations
1767   ///                            {"image"});                    // input columns
1768   /// \endcode
1769   ResizedCrop(int32_t top, int32_t left, int32_t height, int32_t width, const std::vector<int32_t> &size,
1770               InterpolationMode interpolation = InterpolationMode::kLinear);
1771 
1772   /// \brief Destructor.
1773   ~ResizedCrop() override = default;
1774 
1775  protected:
1776   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1777   /// \return Shared pointer to TensorOperation object.
1778   std::shared_ptr<TensorOperation> Parse() override;
1779 
1780  private:
1781   struct Data;
1782   std::shared_ptr<Data> data_;
1783 };
1784 
1785 /// \brief Resize the input image to the given size and adjust bounding boxes accordingly.
1786 class DATASET_API ResizeWithBBox final : public TensorTransform {
1787  public:
1788   /// \brief Constructor.
1789   /// \param[in] size The output size of the resized image.
1790   ///     If the size is an integer, smaller edge of the image will be resized to this value with the same image aspect
1791   ///     ratio. If the size is a sequence of length 2, it should be (height, width).
1792   /// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kLinear).
1793   ///   - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
1794   ///   - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
1795   ///   - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
1796   ///   - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
1797   ///   - InterpolationMode::kCubicPil, Interpolation method is bicubic interpolation like implemented in pillow.
1798   /// \par Example
1799   /// \code
1800   ///     /* Define operations */
1801   ///     auto random_op = vision::ResizeWithBBox({100, 100}, InterpolationMode::kNearestNeighbour);
1802   ///
1803   ///     /* dataset is an instance of Dataset object */
1804   ///     dataset = dataset->Map({random_op},             // operations
1805   ///                            {"image", "bbox"});      // input columns
1806   /// \endcode
1807   explicit ResizeWithBBox(const std::vector<int32_t> &size,
1808                           InterpolationMode interpolation = InterpolationMode::kLinear);
1809 
1810   /// \brief Destructor.
1811   ~ResizeWithBBox() override = default;
1812 
1813  protected:
1814   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1815   /// \return Shared pointer to TensorOperation object.
1816   std::shared_ptr<TensorOperation> Parse() override;
1817 
1818  private:
1819   struct Data;
1820   std::shared_ptr<Data> data_;
1821 };
1822 
1823 /// \brief Change the format of input tensor from 4-channel RGBA to 3-channel BGR.
1824 class DATASET_API RGBA2BGR final : public TensorTransform {
1825  public:
1826   /// \brief Constructor.
1827   /// \par Example
1828   /// \code
1829   ///     /* Define operations */
1830   ///     auto decode_op = vision::Decode();
1831   ///     auto rgb2bgr_op = vision::RGBA2BGR();
1832   ///
1833   ///     /* dataset is an instance of Dataset object */
1834   ///     dataset = dataset->Map({decode_op, rgb2bgr_op},  // operations
1835   ///                            {"image"});               // input columns
1836   /// \endcode
1837   RGBA2BGR();
1838 
1839   /// \brief Destructor.
1840   ~RGBA2BGR() override = default;
1841 
1842  protected:
1843   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1844   /// \return Shared pointer to TensorOperation object.
1845   std::shared_ptr<TensorOperation> Parse() override;
1846 };
1847 
1848 /// \brief Change the input 4 channel RGBA tensor to 3 channel RGB.
1849 class DATASET_API RGBA2RGB final : public TensorTransform {
1850  public:
1851   /// \brief Constructor.
1852   /// \par Example
1853   /// \code
1854   ///     /* Define operations */
1855   ///     auto decode_op = vision::Decode();
1856   ///     auto rgba2rgb_op = vision::RGBA2RGB();
1857   ///
1858   ///     /* dataset is an instance of Dataset object */
1859   ///     dataset = dataset->Map({decode_op, rgba2rgb_op},  // operations
1860   ///                            {"image"});                // input columns
1861   /// \endcode
1862   RGBA2RGB();
1863 
1864   /// \brief Destructor.
1865   ~RGBA2RGB() override = default;
1866 
1867  protected:
1868   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1869   /// \return Shared pointer to TensorOperation object.
1870   std::shared_ptr<TensorOperation> Parse() override;
1871 };
1872 
1873 /// \note Slice the tensor to multiple patches in horizontal and vertical directions.
1874 class DATASET_API SlicePatches final : public TensorTransform {
1875  public:
1876   /// \brief Constructor.
1877   /// \param[in] num_height The number of patches in vertical direction (default=1).
1878   /// \param[in] num_width The number of patches in horizontal direction (default=1).
1879   /// \param[in] slice_mode An enum for the mode of slice (default=SliceMode::kPad).
1880   /// \param[in] fill_value A value representing the pixel to fill the padding area in right and
1881   ///     bottom border if slice_mode is kPad. Then padded tensor could be just sliced to multiple patches (default=0).
1882   /// \note The usage scenerio is suitable to tensor with large height and width. The tensor will keep the same
1883   ///     if set both num_height and num_width to 1. And the number of output tensors is equal to num_height*num_width.
1884   /// \par Example
1885   /// \code
1886   ///     /* Define operations */
1887   ///     auto decode_op = vision::Decode();
1888   ///     auto slice_patch_op = vision::SlicePatches(255, 255);
1889   ///
1890   ///     /* dataset is an instance of Dataset object */
1891   ///     dataset = dataset->Map({decode_op, slice_patch_op},  // operations
1892   ///                            {"image"});                   // input columns
1893   /// \endcode
1894   explicit SlicePatches(int32_t num_height = 1, int32_t num_width = 1, SliceMode slice_mode = SliceMode::kPad,
1895                         uint8_t fill_value = 0);
1896 
1897   /// \brief Destructor.
1898   ~SlicePatches() override = default;
1899 
1900  protected:
1901   /// \brief Function to convert TensorTransform object into a TensorOperation object.
1902   /// \return Shared pointer to TensorOperation object.
1903   std::shared_ptr<TensorOperation> Parse() override;
1904 
1905  private:
1906   struct Data;
1907   std::shared_ptr<Data> data_;
1908 };
1909 
1910 /// \brief Invert pixels within a specified range.
1911 class DATASET_API Solarize final : public TensorTransform {
1912  public:
1913   /// \brief Constructor.
1914   /// \param[in] threshold A vector with two elements specifying the pixel range to invert.
1915   ///     Threshold values should always be in (min, max) format.
1916   ///     If min=max, it will to invert all pixels above min(max).
1917   /// \par Example
1918   /// \code
1919   ///     /* Define operations */
1920   ///     auto decode_op = vision::Decode();
1921   ///     auto solarize_op = vision::Solarize({0, 255});
1922   ///
1923   ///     /* dataset is an instance of Dataset object */
1924   ///     dataset = dataset->Map({decode_op, solarize_op},  // operations
1925   ///                            {"image"});                // input columns
1926   /// \endcode
1927   explicit Solarize(const std::vector<float> &threshold);
1928 
1929   /// \brief Destructor.
1930   ~Solarize() override = default;
1931 
1932  protected:
1933   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1934   /// \return Shared pointer to TensorOperation object.
1935   std::shared_ptr<TensorOperation> Parse() override;
1936 
1937  private:
1938   struct Data;
1939   std::shared_ptr<Data> data_;
1940 };
1941 
1942 /// \brief Divide the pixel values by 255 and convert from HWC format to CHW format with required datatype.
1943 class DATASET_API ToTensor final : public TensorTransform {
1944  public:
1945   /// \brief Constructor.
1946   /// \param[in] output_type The type of the output tensor of type mindspore::DataType or String
1947   ///   (default=mindspore::DataType::kNumberTypeFloat32).
1948   /// \par Example
1949   /// \code
1950   ///     /* Define operations */
1951   ///     auto to_tensor_op = vision::ToTensor();
1952   ///
1953   ///     /* dataset is an instance of Dataset object */
1954   ///     dataset = dataset->Map({to_tensor_op},  // operations
1955   ///                            {"image"});  // input columns
1956   /// \endcode
1957   ToTensor();
1958   explicit ToTensor(std::string output_type);
1959   explicit ToTensor(mindspore::DataType output_type);
1960 
1961   /// \brief Destructor.
1962   ~ToTensor() override = default;
1963 
1964  protected:
1965   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
1966   /// \return Shared pointer to TensorOperation object.
1967   std::shared_ptr<TensorOperation> Parse() override;
1968 
1969  private:
1970   struct Data;
1971   std::shared_ptr<Data> data_;
1972 };
1973 
1974 /// \brief Dataset-independent data-augmentation with TrivialAugment Wide.
1975 class DATASET_API TrivialAugmentWide final : public TensorTransform {
1976  public:
1977   /// \brief Constructor.
1978   /// \param[in] num_magnitude_bins The number of different magnitude values. Default: 31.
1979   /// \param[in] interpolation An enum for the mode of interpolation. Default: InterpolationMode::kNearestNeighbour.
1980   ///     - InterpolationMode::kNearestNeighbour, Interpolation method is nearest-neighbor interpolation.
1981   ///     - InterpolationMode::kLinear, Interpolation method is blinear interpolation.
1982   ///     - InterpolationMode::kCubic, Interpolation method is bicubic interpolation.
1983   ///     - InterpolationMode::kArea, Interpolation method is pixel area interpolation.
1984   /// \param[in] fill_value A vector representing the pixel intensity of the borders. Default: {0, 0, 0}.
1985   /// \par Example
1986   /// \code
1987   ///     /* Define operations */
1988   ///     auto decode_op = vision::Decode();
1989   ///     auto trivial_augment_wide_op = vision::TrivialAugmentWide();
1990   ///     /* dataset is an instance of Dataset object */
1991   ///     dataset = dataset->Map({decode_op, trivial_augment_wide_op}, // operations
1992   ///                            {"image"});                           // input columns
1993   /// \endcode
1994   explicit TrivialAugmentWide(int32_t num_magnitude_bins = 31,
1995                               InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
1996                               const std::vector<uint8_t> &fill_value = {0, 0, 0});
1997 
1998   /// \brief Destructor.
1999   ~TrivialAugmentWide() override = default;
2000 
2001  protected:
2002   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
2003   /// \return Shared pointer to TensorOperation object.
2004   std::shared_ptr<TensorOperation> Parse() override;
2005 
2006  private:
2007   struct Data;
2008   std::shared_ptr<Data> data_;
2009 };
2010 
2011 /// \brief Randomly perform transformations, as selected from input transform list, on the input tensor.
2012 class DATASET_API UniformAugment final : public TensorTransform {
2013  public:
2014   /// \brief Constructor.
2015   /// \param[in] transforms Raw pointer to vector of TensorTransform operations.
2016   /// \param[in] num_ops An integer representing the number of operations to be selected and applied.
2017   /// \par Example
2018   /// \code
2019   ///     /* Define operations */
2020   ///     auto resize_op(new vision::Resize({30, 30}));
2021   ///     auto random_crop_op(new vision::RandomCrop({28, 28}));
2022   ///     auto center_crop_op(new vision::CenterCrop({16, 16}));
2023   ///     auto uniform_op(new vision::UniformAugment({random_crop_op, center_crop_op}, 2));
2024   ///
2025   ///     /* dataset is an instance of Dataset object */
2026   ///     dataset = dataset->Map({resize_op, uniform_op},  // operations
2027   ///                            {"image"});               // input columns
2028   /// \endcode
2029   explicit UniformAugment(const std::vector<TensorTransform *> &transforms, int32_t num_ops = 2);
2030 
2031   /// \brief Constructor.
2032   /// \param[in] transforms Smart pointer to vector of TensorTransform operations.
2033   /// \param[in] num_ops An integer representing the number of operations to be selected and applied.
2034   /// \par Example
2035   /// \code
2036   ///     /* Define operations */
2037   ///     std::shared_ptr<TensorTransform> resize_op(new vision::Resize({30, 30}));
2038   ///     std::shared_ptr<TensorTransform> random_crop_op(new vision::RandomCrop({28, 28}));
2039   ///     std::shared_ptr<TensorTransform> center_crop_op(new vision::CenterCrop({16, 16}));
2040   ///     std::shared_ptr<TensorTransform> uniform_op(new vision::UniformAugment({random_crop_op, center_crop_op}, 2));
2041   ///
2042   ///     /* dataset is an instance of Dataset object */
2043   ///     dataset = dataset->Map({resize_op, uniform_op},  // operations
2044   ///                            {"image"});               // input columns
2045   /// \endcode
2046   explicit UniformAugment(const std::vector<std::shared_ptr<TensorTransform>> &transforms, int32_t num_ops = 2);
2047 
2048   /// \brief Constructor.
2049   /// \param[in] transforms Object pointer to vector of TensorTransform operations.
2050   /// \param[in] num_ops An integer representing the number of operations to be selected and applied.
2051   /// \par Example
2052   /// \code
2053   ///     /* Define operations */
2054   ///     vision::Resize resize_op = vision::Resize({30, 30});
2055   ///     vision::RandomCrop random_crop_op = vision::RandomCrop({28, 28});
2056   ///     vision::CenterCrop center_crop_op = vision::CenterCrop({16, 16});
2057   ///     vision::UniformAugment uniform_op = vision::UniformAugment({random_crop_op, center_crop_op}, 2);
2058   ///
2059   ///     /* dataset is an instance of Dataset object */
2060   ///     dataset = dataset->Map({resize_op, uniform_op},  // operations
2061   ///                            {"image"});               // input columns
2062   /// \endcode
2063   explicit UniformAugment(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, int32_t num_ops = 2);
2064 
2065   /// \brief Destructor.
2066   ~UniformAugment() override = default;
2067 
2068  protected:
2069   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
2070   /// \return Shared pointer to TensorOperation object.
2071   std::shared_ptr<TensorOperation> Parse() override;
2072 
2073  private:
2074   struct Data;
2075   std::shared_ptr<Data> data_;
2076 };
2077 
2078 /// \brief Flip the input image vertically.
2079 class DATASET_API VerticalFlip final : public TensorTransform {
2080  public:
2081   /// \brief Constructor.
2082   /// \par Example
2083   /// \code
2084   ///     /* Define operations */
2085   ///     auto decode_op = vision::Decode();
2086   ///     auto flip_op = vision::VerticalFlip();
2087   ///
2088   ///     /* dataset is an instance of Dataset object */
2089   ///     dataset = dataset->Map({decode_op, flip_op},  // operations
2090   ///                            {"image"});            // input columns
2091   /// \endcode
2092   VerticalFlip();
2093 
2094   /// \brief Destructor.
2095   ~VerticalFlip() override = default;
2096 
2097  protected:
2098   /// \brief The function to convert a TensorTransform object into a TensorOperation object.
2099   /// \return Shared pointer to TensorOperation object.
2100   std::shared_ptr<TensorOperation> Parse() override;
2101 };
2102 
2103 /// \brief Write the one dimension uint8 data into a file using binary mode.
2104 /// \param[in] filename The path to the file to be written.
2105 /// \param[in] data The tensor data.
2106 /// \return The status code.
2107 Status DATASET_API WriteFile(const std::string &filename, const mindspore::MSTensor &data);
2108 
2109 /// \brief Write the image data into a JPEG file.
2110 /// \param[in] filename The path to the file to be written.
2111 /// \param[in] image The data tensor.
2112 /// \param[in] quality The quality for JPEG file, in range of [1, 100]. Default: 75.
2113 /// \return The status code.
2114 Status DATASET_API WriteJpeg(const std::string &filename, const mindspore::MSTensor &image, int quality = 75);
2115 
2116 /// \brief Write the image into a PNG file.
2117 /// \param[in] filename The path to the file to be written.
2118 /// \param[in] image The data tensor.
2119 /// \param[in] compression_level The compression level for PNG file, in range of [0, 9]. Default: 6.
2120 /// \return The status code.
2121 Status DATASET_API WritePng(const std::string &filename, const mindspore::MSTensor &image, int compression_level = 6);
2122 }  // namespace vision
2123 }  // namespace dataset
2124 }  // namespace mindspore
2125 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
2126