• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019-2024 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""
16The module vision.transforms provides many kinds of image augmentation methods
17and image-related conversion methods
18(e.g. including with PIL.Image.Image and numpy.ndarray).
19to perform various computer vision tasks.
20Users can apply suitable augmentations on image data
21to improve their training models.
22Users can also self-define their own augmentation methods with Python Pillow (PIL)
23
24For the different methods in this module, implementation is based in C++ and/or Python.
25The C++ implementation is inherited from mindspore._c_dataengine, provides high performance
26and is mainly based on OpenCV.
27The Python implementation is mainly based on PIL.
28
29.. Note::
30    A constructor's arguments for every class in this module must be saved into the
31    class attributes (self.xxx) to support save() and load().
32
33Examples:
34    >>> import mindspore.dataset as ds
35    >>> import mindspore.dataset.vision as vision
36    >>> from mindspore.dataset.vision import Border, Inter
37    >>> import mindspore.dataset.transforms as transforms
38    >>>
39    >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
40    >>> # create a dataset that reads all files in dataset_dir with 8 threads
41    >>> image_folder_dataset = ds.ImageFolderDataset(image_folder_dataset_dir,
42    ...                                              num_parallel_workers=8)
43    >>> # create a list of transformations to be applied to the image data
44    >>> transforms_list = [vision.Decode(),
45    ...                    vision.Resize((256, 256), interpolation=Inter.LINEAR),
46    ...                    vision.RandomCrop(200, padding_mode=Border.EDGE),
47    ...                    vision.RandomRotation((0, 15)),
48    ...                    vision.Normalize((100, 115.0, 121.0), (71.0, 68.0, 70.0)),
49    ...                    vision.HWC2CHW()]
50    >>> onehot_op = transforms.OneHot(num_classes=10)
51    >>> # apply the transformation to the dataset through data1.map()
52    >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
53    ...                                                 input_columns="image")
54    >>> image_folder_dataset = image_folder_dataset.map(operations=onehot_op,
55    ...                                                 input_columns="label")
56"""
57
58# pylint: disable=too-few-public-methods
59import numbers
60import random
61import numpy as np
62from PIL import Image
63
64import mindspore._c_dataengine as cde
65from mindspore._c_expression import typing
66from . import py_transforms_util as util
67from .py_transforms_util import is_pil
68from .utils import AutoAugmentPolicy, Border, ConvertMode, ImageBatchFormat, Inter, SliceMode, parse_padding
69from .validators import check_adjust_brightness, check_adjust_contrast, check_adjust_gamma, check_adjust_hue, \
70    check_adjust_saturation, check_adjust_sharpness, check_affine, check_alpha, check_auto_augment, \
71    check_auto_contrast, check_bounding_box_augment_cpp, check_center_crop, check_convert_color, check_crop, \
72    check_cut_mix_batch_c, check_cutout_new, check_decode, check_erase, check_five_crop, check_gaussian_blur, \
73    check_hsv_to_rgb, check_linear_transform, check_mix_up, check_mix_up_batch_c, check_normalize, \
74    check_normalizepad, check_num_channels, check_pad, check_pad_to_size, check_perspective, check_positive_degrees, \
75    check_posterize, check_prob, check_rand_augment, check_random_adjust_sharpness, check_random_affine, \
76    check_random_auto_contrast, check_random_color_adjust, check_random_crop, check_random_erasing, \
77    check_random_perspective, check_random_posterize, check_random_resize_crop, check_random_rotation, \
78    check_random_select_subpolicy_op, check_random_solarize, check_range, check_rescale, check_resize, \
79    check_resize_interpolation, check_resized_crop, check_rgb_to_hsv, check_rotate, check_slice_patches, \
80    check_solarize, check_ten_crop, check_trivial_augment_wide, check_uniform_augment, check_to_tensor, \
81    check_device_target, FLOAT_MAX_INTEGER
82from ..core.datatypes import mstype_to_detype, nptype_to_detype
83from ..transforms.py_transforms_util import Implementation
84from ..transforms.transforms import CompoundOperation, PyTensorOperation, TensorOperation, TypeCast
85
86
87class ImageTensorOperation(TensorOperation):
88    """
89    Base class of Image Tensor Ops.
90    """
91
92    def __call__(self, *input_tensor_list):
93        for tensor in input_tensor_list:
94            if not isinstance(tensor, (np.ndarray, Image.Image)):
95                raise TypeError(
96                    "Input should be NumPy or PIL image, got {}.".format(type(tensor)))
97        return super().__call__(*input_tensor_list)
98
99    def parse(self):
100        # Note: subclasses must implement `def parse(self)` so do not make ImageTensorOperation's parse a staticmethod.
101        raise NotImplementedError("ImageTensorOperation has to implement parse() method.")
102
103
104class VideoTensorOperation(TensorOperation):
105    """
106    Base class of Video Tensor Ops
107    """
108
109    def __call__(self, *input_tensor_list):
110        for tensor in input_tensor_list:
111            if not isinstance(tensor, np.ndarray):
112                raise TypeError(
113                    "Input should be ndarray, got {}.".format(type(tensor)))
114        return super().__call__(*input_tensor_list)
115
116    def parse(self):
117        raise NotImplementedError("VideoTensorOperation has to implement parse() method.")
118
119
120class AdjustBrightness(ImageTensorOperation, PyTensorOperation):
121    """
122    Adjust the brightness of the input image.
123
124    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
125
126    Args:
127        brightness_factor (float): How much to adjust the brightness, must be non negative.
128            ``0`` gives a black image, ``1`` gives the original image,
129            while ``2`` increases the brightness by a factor of 2.
130
131    Raises:
132        TypeError: If `brightness_factor` is not of type float.
133        ValueError: If `brightness_factor` is less than 0.
134        RuntimeError: If shape of the input image is not <H, W, C>.
135
136    Supported Platforms:
137        ``CPU`` ``Ascend``
138
139    Examples:
140        >>> import numpy as np
141        >>> import mindspore.dataset as ds
142        >>> import mindspore.dataset.vision as vision
143        >>>
144        >>> # Use the transform in dataset pipeline mode
145        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
146        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
147        >>> transforms_list = [vision.AdjustBrightness(brightness_factor=2.0)]
148        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
149        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
150        ...     print(item["image"].shape, item["image"].dtype)
151        ...     break
152        (100, 100, 3) uint8
153        >>>
154        >>> # Use the transform in eager mode
155        >>> data = np.random.randint(0, 256, (20, 20, 3)) / 255.0
156        >>> data = data.astype(np.float32)
157        >>> output = vision.AdjustBrightness(2.666)(data)
158        >>> print(output.shape, output.dtype)
159        (20, 20, 3) float32
160
161    Tutorial Examples:
162        - `Illustration of vision transforms
163          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
164    """
165
166    @check_adjust_brightness
167    def __init__(self, brightness_factor):
168        super().__init__()
169        self.brightness_factor = brightness_factor
170
171    @check_device_target
172    def device(self, device_target="CPU"):
173        """
174        Set the device for the current operator execution.
175
176        - When the device is Ascend, input shape should be limited from [4, 6] to [8192, 4096].
177
178        Args:
179            device_target (str, optional): The operator will be executed on this device. Currently supports
180                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
181
182        Raises:
183            TypeError: If `device_target` is not of type str.
184            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
185
186        Supported Platforms:
187            ``CPU`` ``Ascend``
188
189        Examples:
190            >>> import numpy as np
191            >>> import mindspore.dataset as ds
192            >>> import mindspore.dataset.vision as vision
193            >>>
194            >>> # Use the transform in dataset pipeline mode
195            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
196            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
197            >>> transforms_list = [vision.AdjustBrightness(2.0).device("Ascend")]
198            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
199            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
200            ...     print(item["image"].shape, item["image"].dtype)
201            ...     break
202            (100, 100, 3) uint8
203            >>>
204            >>> # Use the transform in eager mode
205            >>> data = np.random.randint(0, 256, (20, 20, 3)) / 255.0
206            >>> data = data.astype(np.float32)
207            >>> output = vision.AdjustBrightness(2.666).device("Ascend")(data)
208            >>> print(output.shape, output.dtype)
209            (20, 20, 3) float32
210
211        Tutorial Examples:
212            - `Illustration of vision transforms
213              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
214        """
215        self.device_target = device_target
216        return self
217
218    def parse(self):
219        return cde.AdjustBrightnessOperation(self.brightness_factor, self.device_target)
220
221    def _execute_py(self, img):
222        """
223        Execute method.
224
225        Args:
226            img (PIL Image): Image to be brightness adjusted.
227
228        Returns:
229            PIL Image, brightness adjusted image.
230        """
231        return util.adjust_brightness(img, self.brightness_factor)
232
233
234class AdjustContrast(ImageTensorOperation, PyTensorOperation):
235    """
236    Adjust the contrast of the input image.
237
238    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
239
240    Args:
241        contrast_factor (float): How much to adjust the contrast, must be non negative.
242            ``0`` gives a solid gray image, ``1`` gives the original image,
243            while ``2`` increases the contrast by a factor of 2.
244
245    Raises:
246        TypeError: If `contrast_factor` is not of type float.
247        ValueError: If `contrast_factor` is less than 0.
248        RuntimeError: If shape of the input image is not <H, W, C>.
249
250    Supported Platforms:
251        ``CPU`` ``Ascend``
252
253    Examples:
254        >>> import numpy as np
255        >>> import mindspore.dataset as ds
256        >>> import mindspore.dataset.vision as vision
257        >>>
258        >>> # Use the transform in dataset pipeline mode
259        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
260        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
261        >>> transforms_list = [vision.AdjustContrast(contrast_factor=2.0)]
262        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
263        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
264        ...     print(item["image"].shape, item["image"].dtype)
265        ...     break
266        (100, 100, 3) uint8
267        >>>
268        >>> # Use the transform in eager mode
269        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
270        >>> output = vision.AdjustContrast(2.0)(data)
271        >>> print(output.shape, output.dtype)
272        (2, 2, 3) uint8
273
274    Tutorial Examples:
275        - `Illustration of vision transforms
276          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
277    """
278
279    @check_adjust_contrast
280    def __init__(self, contrast_factor):
281        super().__init__()
282        self.contrast_factor = contrast_factor
283
284    @check_device_target
285    def device(self, device_target="CPU"):
286        """
287        Set the device for the current operator execution.
288
289        - When the device is Ascend, input shape should be limited from [4, 6] to [8192, 4096].
290
291        Args:
292            device_target (str, optional): The operator will be executed on this device. Currently supports
293                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
294
295        Raises:
296            TypeError: If `device_target` is not of type str.
297            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
298
299        Supported Platforms:
300            ``CPU`` ``Ascend``
301
302        Examples:
303            >>> import numpy as np
304            >>> import mindspore.dataset as ds
305            >>> import mindspore.dataset.vision as vision
306            >>>
307            >>> # Use the transform in dataset pipeline mode
308            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
309            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
310            >>> transforms_list = [vision.AdjustContrast(0).device("Ascend")]
311            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
312            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
313            ...     print(item["image"].shape, item["image"].dtype)
314            ...     break
315            (100, 100, 3) uint8
316            >>>
317            >>> # Use the transform in eager mode
318            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
319            >>> output = vision.AdjustContrast(2.0).device("Ascend")(data)
320            >>> print(output.shape, output.dtype)
321            (100, 100, 3) uint8
322
323        Tutorial Examples:
324            - `Illustration of vision transforms
325              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
326        """
327        self.device_target = device_target
328        return self
329
330    def parse(self):
331        return cde.AdjustContrastOperation(self.contrast_factor, self.device_target)
332
333    def _execute_py(self, img):
334        """
335        Execute method.
336
337        Args:
338            img (PIL Image): Image to be contrast adjusted.
339
340        Returns:
341            PIL Image, contrast adjusted image.
342        """
343        return util.adjust_contrast(img, self.contrast_factor)
344
345
346class AdjustGamma(ImageTensorOperation, PyTensorOperation):
347    r"""
348    Apply gamma correction on input image. Input image is expected to be in <..., H, W, C> or <H, W> format.
349
350    .. math::
351        I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
352
353    See `Gamma Correction`_ for more details.
354
355    .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
356
357    Args:
358        gamma (float): Non negative real number.
359            The output image pixel value is exponentially related to the input image pixel value.
360            gamma larger than 1 make the shadows darker,
361            while gamma smaller than 1 make dark regions lighter.
362        gain (float, optional): The constant multiplier. Default: ``1.0``.
363
364    Raises:
365        TypeError: If `gain` is not of type float.
366        TypeError: If `gamma` is not of type float.
367        ValueError: If `gamma` is less than 0.
368        RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>.
369
370    Supported Platforms:
371        ``CPU``
372
373    Examples:
374        >>> import numpy as np
375        >>> import mindspore.dataset as ds
376        >>> import mindspore.dataset.vision as vision
377        >>>
378        >>> # Use the transform in dataset pipeline mode
379        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
380        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
381        >>> transforms_list = [vision.AdjustGamma(gamma=10.0, gain=1.0)]
382        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
383        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
384        ...     print(item["image"].shape, item["image"].dtype)
385        ...     break
386        (100, 100, 3) uint8
387        >>>
388        >>> # Use the transform in eager mode
389        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
390        >>> output = vision.AdjustGamma(gamma=0.1, gain=1.0)(data)
391        >>> print(output.shape, output.dtype)
392        (2, 2, 3) uint8
393
394    Tutorial Examples:
395        - `Illustration of vision transforms
396          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
397    """
398
399    @check_adjust_gamma
400    def __init__(self, gamma, gain=1):
401        super().__init__()
402        self.gamma = gamma
403        self.gain = gain
404        self.random = False
405
406    def parse(self):
407        return cde.AdjustGammaOperation(self.gamma, self.gain)
408
409    def _execute_py(self, img):
410        """
411        Execute method.
412
413        Args:
414            img (PIL Image): Image to be gamma adjusted.
415
416        Returns:
417            PIL Image, gamma adjusted image.
418        """
419        return util.adjust_gamma(img, self.gamma, self.gain)
420
421
422class AdjustHue(ImageTensorOperation, PyTensorOperation):
423    """
424    Adjust the hue of the input image.
425
426    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
427
428    Args:
429        hue_factor (float): How much to add to the hue channel,
430            must be in range of [-0.5, 0.5].
431
432    Raises:
433        TypeError: If `hue_factor` is not of type float.
434        ValueError: If `hue_factor` is not in the interval [-0.5, 0.5].
435        RuntimeError: If shape of the input image is not <H, W, C>.
436
437    Supported Platforms:
438        ``CPU`` ``Ascend``
439
440    Examples:
441        >>> import numpy as np
442        >>> import mindspore.dataset as ds
443        >>> import mindspore.dataset.vision as vision
444        >>>
445        >>> # Use the transform in dataset pipeline mode
446        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
447        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
448        >>> transforms_list = [vision.AdjustHue(hue_factor=0.2)]
449        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
450        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
451        ...     print(item["image"].shape, item["image"].dtype)
452        ...     break
453        (100, 100, 3) uint8
454        >>>
455        >>> # Use the transform in eager mode
456        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
457        >>> output = vision.AdjustHue(hue_factor=0.2)(data)
458        >>> print(output.shape, output.dtype)
459        (2, 2, 3) uint8
460
461    Tutorial Examples:
462        - `Illustration of vision transforms
463          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
464    """
465
466    @check_adjust_hue
467    def __init__(self, hue_factor):
468        super().__init__()
469        self.hue_factor = hue_factor
470
471    @check_device_target
472    def device(self, device_target="CPU"):
473        """
474        Set the device for the current operator execution.
475
476        - When the device is Ascend, input shape should be limited from [4, 6] to [8192, 4096].
477
478        Args:
479            device_target (str, optional): The operator will be executed on this device. Currently supports
480                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
481
482        Raises:
483            TypeError: If `device_target` is not of type str.
484            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
485
486        Supported Platforms:
487            ``CPU`` ``Ascend``
488
489        Examples:
490            >>> import numpy as np
491            >>> import mindspore.dataset as ds
492            >>> import mindspore.dataset.vision as vision
493            >>>
494            >>> # Use the transform in dataset pipeline mode
495            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
496            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
497            >>> transforms_list = [vision.AdjustHue(0.5).device("Ascend")]
498            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
499            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
500            ...     print(item["image"].shape, item["image"].dtype)
501            ...     break
502            (100, 100, 3) uint8
503            >>>
504            >>> # Use the transform in eager mode
505            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
506            >>> output = vision.AdjustHue(hue_factor=0.2).device("Ascend")(data)
507            >>> print(output.shape, output.dtype)
508            (100, 100, 3) uint8
509
510        Tutorial Examples:
511            - `Illustration of vision transforms
512              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
513        """
514        self.device_target = device_target
515        return self
516
517    def parse(self):
518        return cde.AdjustHueOperation(self.hue_factor, self.device_target)
519
520    def _execute_py(self, img):
521        """
522        Execute method.
523
524        Args:
525            img (PIL Image): Image to be hue adjusted.
526
527        Returns:
528            PIL Image, hue adjusted image.
529        """
530        return util.adjust_hue(img, self.hue_factor)
531
532
533class AdjustSaturation(ImageTensorOperation, PyTensorOperation):
534    """
535    Adjust the saturation of the input image.
536
537    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
538
539    Args:
540        saturation_factor (float): How much to adjust the saturation, must be non negative.
541            ``0`` gives a black image, ``1`` gives the original image
542            while ``2`` increases the saturation by a factor of 2.
543
544    Raises:
545        TypeError: If `saturation_factor` is not of type float.
546        ValueError: If `saturation_factor` is less than 0.
547        RuntimeError: If shape of the input image is not <H, W, C>.
548        RuntimeError: If channel of the input image is not 3.
549
550    Supported Platforms:
551        ``CPU`` ``Ascend``
552
553    Examples:
554        >>> import numpy as np
555        >>> import mindspore.dataset as ds
556        >>> import mindspore.dataset.vision as vision
557        >>>
558        >>> # Use the transform in dataset pipeline mode
559        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
560        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
561        >>> transforms_list = [vision.AdjustSaturation(saturation_factor=2.0)]
562        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
563        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
564        ...     print(item["image"].shape, item["image"].dtype)
565        ...     break
566        (100, 100, 3) uint8
567        >>>
568        >>> # Use the transform in eager mode
569        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
570        >>> output = vision.AdjustSaturation(saturation_factor=2.0)(data)
571        >>> print(output.shape, output.dtype)
572        (2, 2, 3) uint8
573
574    Tutorial Examples:
575        - `Illustration of vision transforms
576          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
577    """
578
579    @check_adjust_saturation
580    def __init__(self, saturation_factor):
581        super().__init__()
582        self.saturation_factor = saturation_factor
583
584    @check_device_target
585    def device(self, device_target="CPU"):
586        """
587        Set the device for the current operator execution.
588
589        - When the device is Ascend, input shape should be limited from [4, 6] to [8192, 4096].
590
591        Args:
592            device_target (str, optional): The operator will be executed on this device. Currently supports
593                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
594
595        Raises:
596            TypeError: If `device_target` is not of type str.
597            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
598
599        Supported Platforms:
600            ``CPU`` ``Ascend``
601
602        Examples:
603            >>> import numpy as np
604            >>> import mindspore.dataset as ds
605            >>> import mindspore.dataset.vision as vision
606            >>>
607            >>> # Use the transform in dataset pipeline mode
608            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
609            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
610            >>> transforms_list = [vision.AdjustSaturation(2.0).device("Ascend")]
611            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
612            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
613            ...     print(item["image"].shape, item["image"].dtype)
614            ...     break
615            (100, 100, 3) uint8
616            >>>
617            >>> # Use the transform in eager mode
618            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
619            >>> output = vision.AdjustSaturation(saturation_factor=2.0).device("Ascend")(data)
620            >>> print(output.shape, output.dtype)
621            (100, 100, 3) uint8
622
623        Tutorial Examples:
624            - `Illustration of vision transforms
625              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
626        """
627        self.device_target = device_target
628        return self
629
630    def parse(self):
631        return cde.AdjustSaturationOperation(self.saturation_factor, self.device_target)
632
633    def _execute_py(self, img):
634        """
635        Execute method.
636
637        Args:
638            img (PIL Image): Image to be saturation adjusted.
639
640        Returns:
641            PIL Image, saturation adjusted image.
642        """
643        return util.adjust_saturation(img, self.saturation_factor)
644
645
646class AdjustSharpness(ImageTensorOperation):
647    """
648    Adjust the sharpness of the input image.
649
650    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
651
652    Args:
653        sharpness_factor (float): How much to adjust the sharpness, must be
654            non negative. ``0`` gives a blurred image, ``1`` gives the
655            original image while ``2`` increases the sharpness by a factor of 2.
656
657    Raises:
658        TypeError: If `sharpness_factor` is not of type float.
659        ValueError: If `sharpness_factor` is less than 0.
660        RuntimeError: If shape of the input image is not <H, W> or <H, W, C>.
661
662    Supported Platforms:
663        ``CPU`` ``Ascend``
664
665    Examples:
666        >>> import numpy as np
667        >>> import mindspore.dataset as ds
668        >>> import mindspore.dataset.vision as vision
669        >>>
670        >>> # Use the transform in dataset pipeline mode
671        >>> # create a dataset that reads all files in dataset_dir with 8 threads
672        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
673        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
674        >>> transforms_list = [vision.AdjustSharpness(sharpness_factor=2.0)]
675        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
676        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
677        ...     print(item["image"].shape, item["image"].dtype)
678        ...     break
679        (100, 100, 3) uint8
680        >>>
681        >>> # Use the transform in eager mode
682        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((3, 4))
683        >>> output = vision.AdjustSharpness(sharpness_factor=0)(data)
684        >>> print(output.shape, output.dtype)
685        (3, 4) uint8
686
687    Tutorial Examples:
688        - `Illustration of vision transforms
689          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
690    """
691
692    @check_adjust_sharpness
693    def __init__(self, sharpness_factor):
694        super().__init__()
695        self.sharpness_factor = sharpness_factor
696        self.implementation = Implementation.C
697
698    @check_device_target
699    def device(self, device_target="CPU"):
700        """
701        Set the device for the current operator execution.
702
703        - When the device is Ascend, input type supports `uint8` or `float32` , input channel supports 1 and 3.
704          The input data has a height limit of [4, 8192] and a width limit of [6, 4096].
705
706        Args:
707            device_target (str, optional): The operator will be executed on this device. Currently supports
708                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
709
710        Raises:
711            TypeError: If `device_target` is not of type str.
712            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
713
714        Supported Platforms:
715            ``CPU`` ``Ascend``
716
717        Examples:
718            >>> import numpy as np
719            >>> import mindspore.dataset as ds
720            >>> import mindspore.dataset.vision as vision
721            >>>
722            >>> # Use the transform in dataset pipeline mode
723            >>> # create a dataset that reads all files in dataset_dir with 8 threads
724            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
725            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
726            >>> transforms_list = [vision.AdjustSharpness(sharpness_factor=2.0).device("Ascend")]
727            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
728            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
729            ...     print(item["image"].shape, item["image"].dtype)
730            ...     break
731            (100, 100, 3) uint8
732            >>>
733            >>> # Use the transform in eager mode
734            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
735            >>> output = vision.AdjustSharpness(sharpness_factor=0).device("Ascend")(data)
736            >>> print(output.shape, output.dtype)
737            (100, 100, 3) uint8
738
739        Tutorial Examples:
740            - `Illustration of vision transforms
741              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
742        """
743        self.device_target = device_target
744        return self
745
746    def parse(self):
747        return cde.AdjustSharpnessOperation(self.sharpness_factor, self.device_target)
748
749
750class Affine(ImageTensorOperation):
751    """
752    Apply Affine transformation to the input image, keeping the center of the image unchanged.
753
754    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
755
756    Args:
757        degrees (float): Rotation angle in degrees between -180 and 180, clockwise direction.
758        translate (Sequence[float, float]): The horizontal and vertical translations, must be a sequence of size 2
759            and value between -1 and 1.
760        scale (float): Scaling factor, which must be positive.
761        shear (Union[float, Sequence[float, float]]): Shear angle value in degrees between -180 to 180.
762            If float is provided, shear along the x axis with this value, without shearing along the y axis;
763            If Sequence[float, float] is provided, shear along the x axis and y axis with these two values separately.
764        resample (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
765            Default: ``Inter.NEAREST``.
766        fill_value (Union[int, tuple[int, int, int]], optional): Optional `fill_value` to fill the area
767            outside the transform in the output image. There must be three elements in tuple and the value
768            of single element is [0, 255]. Default: ``0``.
769
770    Raises:
771        TypeError: If `degrees` is not of type float.
772        TypeError: If `translate` is not of type Sequence[float, float].
773        TypeError: If `scale` is not of type float.
774        ValueError: If `scale` is non positive.
775        TypeError: If `shear` is not of float or Sequence[float, float].
776        TypeError: If `resample` is not of type :class:`~.vision.Inter` .
777        TypeError: If `fill_value` is not of type int or tuple[int, int, int].
778        RuntimeError: If shape of the input image is not <H, W> or <H, W, C>.
779
780    Supported Platforms:
781        ``CPU`` ``Ascend``
782
783    Examples:
784        >>> import numpy as np
785        >>> import mindspore.dataset as ds
786        >>> import mindspore.dataset.vision as vision
787        >>> from mindspore.dataset.vision import Inter
788        >>>
789        >>> # Use the transform in dataset pipeline mode
790        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
791        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
792        >>> affine_op = vision.Affine(degrees=15, translate=[0.2, 0.2], scale=1.1, shear=[1.0, 1.0],
793        ...                           resample=Inter.BILINEAR)
794        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[affine_op], input_columns=["image"])
795        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
796        ...     print(item["image"].shape, item["image"].dtype)
797        ...     break
798        (100, 100, 3) uint8
799        >>>
800        >>> # Use the transform in eager mode
801        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
802        >>> output = vision.Affine(degrees=15, translate=[0.2, 0.2], scale=1.1,
803        ...                        shear=[1.0, 1.0], resample=Inter.BILINEAR)(data)
804        >>> print(output.shape, output.dtype)
805        (2, 2, 3) uint8
806
807    Tutorial Examples:
808        - `Illustration of vision transforms
809          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
810    """
811
812    @check_affine
813    def __init__(self, degrees, translate, scale, shear, resample=Inter.NEAREST, fill_value=0):
814        super().__init__()
815        # Parameter checking
816        if isinstance(shear, numbers.Number):
817            shear = (shear, 0.)
818
819        if isinstance(fill_value, numbers.Number):
820            fill_value = (fill_value, fill_value, fill_value)
821
822        self.degrees = degrees
823        self.translate = translate
824        self.scale_ = scale
825        self.shear = shear
826        self.resample = resample
827        self.fill_value = fill_value
828        self.implementation = Implementation.C
829
830    @check_device_target
831    def device(self, device_target="CPU"):
832        """
833        Set the device for the current operator execution.
834
835        - When the device is Ascend, input shape should be limited from [4, 6] to [32768, 32768].
836
837        Args:
838            device_target (str, optional): The operator will be executed on this device. Currently supports
839                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
840
841        Raises:
842            TypeError: If `device_target` is not of type str.
843            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
844
845        Supported Platforms:
846            ``CPU`` ``Ascend``
847
848        Examples:
849            >>> import numpy as np
850            >>> import mindspore.dataset as ds
851            >>> import mindspore.dataset.vision as vision
852            >>> from mindspore.dataset.vision import Inter
853            >>>
854            >>> # Use the transform in dataset pipeline mode
855            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
856            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
857            >>> affine_op = vision.Affine(degrees=15, translate=[0.2, 0.2], scale=1.1,
858            ...                           shear=[1.0, 1.0], resample=Inter.BILINEAR).device("Ascend")
859            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[affine_op], input_columns=["image"])
860            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
861            ...     print(item["image"].shape, item["image"].dtype)
862            ...     break
863            (100, 100, 3) uint8
864            >>>
865            >>> # Use the transform in eager mode
866            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
867            >>> output = vision.Affine(degrees=15, translate=[0.2, 0.2], scale=1.1,
868            ...                        shear=[1.0, 1.0], resample=Inter.BILINEAR).device("Ascend")(data)
869            >>> print(output.shape, output.dtype)
870            (100, 100, 3) uint8
871
872        Tutorial Examples:
873            - `Illustration of vision transforms
874              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
875        """
876        self.device_target = device_target
877        if self.resample not in [Inter.BILINEAR, Inter.NEAREST] and self.device_target == "Ascend":
878            raise RuntimeError("Invalid interpolation mode, only support BILINEAR and NEAREST.")
879        return self
880
881    def parse(self):
882        return cde.AffineOperation(self.degrees, self.translate, self.scale_, self.shear,
883                                   Inter.to_c_type(self.resample), self.fill_value, self.device_target)
884
885
886class AutoAugment(ImageTensorOperation):
887    """
888    Apply AutoAugment data augmentation method based on
889    `AutoAugment: Learning Augmentation Strategies from Data <https://arxiv.org/pdf/1805.09501.pdf>`_ .
890    This operation works only with 3-channel RGB images.
891
892    Args:
893        policy (AutoAugmentPolicy, optional): AutoAugment policies learned on different datasets.
894            Default: ``AutoAugmentPolicy.IMAGENET``.
895            It can be ``AutoAugmentPolicy.IMAGENET``, ``AutoAugmentPolicy.CIFAR10``, ``AutoAugmentPolicy.SVHN``.
896            Randomly apply 2 operations from a candidate set. See auto augmentation details in AutoAugmentPolicy.
897
898            - ``AutoAugmentPolicy.IMAGENET``, means to apply AutoAugment learned on ImageNet dataset.
899
900            - ``AutoAugmentPolicy.CIFAR10``, means to apply AutoAugment learned on Cifar10 dataset.
901
902            - ``AutoAugmentPolicy.SVHN``, means to apply AutoAugment learned on SVHN dataset.
903
904        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
905            Default: ``Inter.NEAREST``.
906        fill_value (Union[int, tuple[int]], optional): Pixel fill value for the area outside the transformed image.
907            It can be an int or a 3-tuple. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
908            If it is an integer, it is used for all RGB channels. The fill_value values must be in range [0, 255].
909            Default: ``0``.
910
911    Raises:
912        TypeError: If `policy` is not of type :class:`mindspore.dataset.vision.AutoAugmentPolicy` .
913        TypeError: If `interpolation` is not of type :class:`~.vision.Inter` .
914        TypeError: If `fill_value` is not an integer or a tuple of length 3.
915        RuntimeError: If given tensor shape is not <H, W, C>.
916
917    Supported Platforms:
918        ``CPU``
919
920    Examples:
921        >>> import numpy as np
922        >>> import mindspore.dataset as ds
923        >>> import mindspore.dataset.vision as vision
924        >>> from mindspore.dataset.vision import AutoAugmentPolicy, Inter
925        >>>
926        >>> # Use the transform in dataset pipeline mode
927        >>> transforms_list = [vision.AutoAugment(policy=AutoAugmentPolicy.IMAGENET,
928        ...                                       interpolation=Inter.NEAREST,
929        ...                                       fill_value=0)]
930        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
931        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
932        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
933        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
934        ...     print(item["image"].shape, item["image"].dtype)
935        ...     break
936        (100, 100, 3) uint8
937        >>>
938        >>> # Use the transform in eager mode
939        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
940        >>> output = vision.AutoAugment()(data)
941        >>> print(output.shape, output.dtype)
942        (100, 100, 3) uint8
943
944    Tutorial Examples:
945        - `Illustration of vision transforms
946          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
947    """
948
949    @check_auto_augment
950    def __init__(self, policy=AutoAugmentPolicy.IMAGENET, interpolation=Inter.NEAREST, fill_value=0):
951        super().__init__()
952        self.policy = policy
953        self.interpolation = interpolation
954        if isinstance(fill_value, int):
955            fill_value = tuple([fill_value] * 3)
956        self.fill_value = fill_value
957        self.implementation = Implementation.C
958
959    def parse(self):
960        return cde.AutoAugmentOperation(AutoAugmentPolicy.to_c_type(self.policy), Inter.to_c_type(self.interpolation),
961                                        self.fill_value)
962
963
964class AutoContrast(ImageTensorOperation, PyTensorOperation):
965    """
966    Apply automatic contrast on input image. This operation calculates histogram of image, reassign cutoff percent
967    of the lightest pixels from histogram to 255, and reassign cutoff percent of the darkest pixels from histogram to 0.
968
969    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
970
971    Args:
972        cutoff (float, optional): Percent of lightest and darkest pixels to cut off from
973            the histogram of input image. The value must be in the range [0.0, 50.0]. Default: ``0.0``.
974        ignore (Union[int, sequence], optional): The background pixel values to ignore,
975            The ignore values must be in range [0, 255]. Default: ``None``.
976
977    Raises:
978        TypeError: If `cutoff` is not of type float.
979        TypeError: If `ignore` is not of type int or sequence.
980        ValueError: If `cutoff` is not in range [0, 50.0).
981        ValueError: If `ignore` is not in range [0, 255].
982        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
983
984    Supported Platforms:
985        ``CPU`` ``Ascend``
986
987    Examples:
988        >>> import numpy as np
989        >>> import mindspore.dataset as ds
990        >>> import mindspore.dataset.vision as vision
991        >>>
992        >>> # Use the transform in dataset pipeline mode
993        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
994        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
995        >>> transforms_list = [vision.AutoContrast(cutoff=10.0, ignore=[10, 20])]
996        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
997        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
998        ...     print(item["image"].shape, item["image"].dtype)
999        ...     break
1000        (100, 100, 3) uint8
1001        >>>
1002        >>> # Use the transform in eager mode
1003        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
1004        >>> output = vision.AutoContrast(cutoff=10.0, ignore=[10, 20])(data)
1005        >>> print(output.shape, output.dtype)
1006        (2, 2, 3) uint8
1007
1008    Tutorial Examples:
1009        - `Illustration of vision transforms
1010          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1011    """
1012
1013    @check_auto_contrast
1014    def __init__(self, cutoff=0.0, ignore=None):
1015        super().__init__()
1016        if ignore is None:
1017            ignore = []
1018        if isinstance(ignore, int):
1019            ignore = [ignore]
1020        self.cutoff = cutoff
1021        self.ignore = ignore
1022        self.random = False
1023
1024    @check_device_target
1025    def device(self, device_target="CPU"):
1026        """
1027        Set the device for the current operator execution.
1028
1029        - When the device is Ascend, input type supports `uint8` or `float32` , input channel supports 1 and 3.
1030          If the data type is float32, the expected input value is in the range [0, 1].
1031          The input data has a height limit of [4, 8192] and a width limit of [6, 4096].
1032
1033        Args:
1034            device_target (str, optional): The operator will be executed on this device. Currently supports
1035                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
1036
1037        Raises:
1038            TypeError: If `device_target` is not of type str.
1039            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
1040
1041        Supported Platforms:
1042            ``CPU`` ``Ascend``
1043
1044        Examples:
1045            >>> import numpy as np
1046            >>> import mindspore.dataset as ds
1047            >>> import mindspore.dataset.vision as vision
1048            >>>
1049            >>> # Use the transform in dataset pipeline mode
1050            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
1051            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1052            >>> transforms_list = [vision.AutoContrast(cutoff=10.0, ignore=[10, 20]).device("Ascend")]
1053            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
1054            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1055            ...     print(item["image"].shape, item["image"].dtype)
1056            ...     break
1057            (100, 100, 3) uint8
1058            >>>
1059            >>> # Use the transform in eager mode
1060            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
1061            >>> output = vision.AutoContrast(cutoff=10.0, ignore=[10, 20]).device("Ascend")(data)
1062            >>> print(output.shape, output.dtype)
1063            (100, 100, 3) uint8
1064
1065        Tutorial Examples:
1066            - `Illustration of vision transforms
1067              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1068        """
1069        self.device_target = device_target
1070        return self
1071
1072    def parse(self):
1073        return cde.AutoContrastOperation(self.cutoff, self.ignore, self.device_target)
1074
1075    def _execute_py(self, img):
1076        """
1077        Execute method.
1078
1079        Args:
1080            img (PIL Image): Image to be automatically contrasted.
1081
1082        Returns:
1083            PIL Image, automatically contrasted image.
1084        """
1085        return util.auto_contrast(img, self.cutoff, self.ignore)
1086
1087
1088class BoundingBoxAugment(ImageTensorOperation):
1089    """
1090    Apply a given image processing operation on a random selection of bounding box regions of a given image.
1091
1092    Args:
1093        transform (TensorOperation): Transformation operation to be applied on random selection
1094            of bounding box regions of a given image.
1095        ratio (float, optional): Ratio of bounding boxes to apply augmentation on.
1096            Range: [0.0, 1.0]. Default: ``0.3``.
1097
1098    Raises:
1099        TypeError: If `transform` is an image processing operation in `mindspore.dataset.vision` .
1100        TypeError: If `ratio` is not of type float.
1101        ValueError: If `ratio` is not in range [0.0, 1.0].
1102        RuntimeError: If given bounding box is invalid.
1103
1104    Supported Platforms:
1105        ``CPU``
1106
1107    Examples:
1108        >>> import numpy as np
1109        >>> import mindspore.dataset as ds
1110        >>> import mindspore.dataset.vision as vision
1111        >>>
1112        >>> # Use the transform in dataset pipeline mode
1113        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
1114        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1115        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32))
1116        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func],
1117        ...                                                 input_columns=["image"],
1118        ...                                                 output_columns=["image", "bbox"])
1119        >>> # set bounding box operation with ratio of 1 to apply rotation on all bounding boxes
1120        >>> bbox_aug_op = vision.BoundingBoxAugment(vision.RandomRotation(90), 1)
1121        >>> # map to apply ops
1122        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[bbox_aug_op],
1123        ...                                                 input_columns=["image", "bbox"],
1124        ...                                                 output_columns=["image", "bbox"])
1125        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1126        ...     print(item["image"].shape, item["image"].dtype)
1127        ...     print(item["bbox"].shape, item["bbox"].dtype)
1128        ...     break
1129        (100, 100, 3) float32
1130        (1, 4) float32
1131        >>>
1132        >>> # Use the transform in eager mode
1133        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((3, 4))
1134        >>> data = data.astype(np.float32)
1135        >>> func = lambda img, bboxes: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(bboxes.dtype))
1136        >>> func_data, func_bboxes = func(data, data)
1137        >>> output = vision.BoundingBoxAugment(transforms.Fill(100), 1.0)(func_data, func_bboxes)
1138        >>> print(output[0].shape, output[0].dtype)
1139        (3, 4) float32
1140        >>> print(output[1].shape, output[1].dtype)
1141        (1, 4) float32
1142
1143    Tutorial Examples:
1144        - `Illustration of vision transforms
1145          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1146    """
1147
1148    @check_bounding_box_augment_cpp
1149    def __init__(self, transform, ratio=0.3):
1150        super().__init__()
1151        self.ratio = ratio
1152        self.transform = transform
1153        self.implementation = Implementation.C
1154
1155    def parse(self):
1156        if self.transform and getattr(self.transform, 'parse', None):
1157            transform = self.transform.parse()
1158        else:
1159            transform = self.transform
1160        return cde.BoundingBoxAugmentOperation(transform, self.ratio)
1161
1162
1163class CenterCrop(ImageTensorOperation, PyTensorOperation):
1164    """
1165    Crop the input image at the center to the given size. If input image size is smaller than output size,
1166    input image will be padded with 0 before cropping.
1167
1168    Args:
1169        size (Union[int, sequence]): The output size of the cropped image.
1170            If size is an integer, a square crop of size (size, size) is returned.
1171            If size is a sequence of length 2, it should be (height, width).
1172            The size value(s) must be larger than 0.
1173
1174    Raises:
1175        TypeError: If `size` is not of type integer or sequence.
1176        ValueError: If `size` is less than or equal to 0.
1177        RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>.
1178
1179    Supported Platforms:
1180        ``CPU``
1181
1182    Examples:
1183        >>> import numpy as np
1184        >>> import mindspore.dataset as ds
1185        >>> import mindspore.dataset.vision as vision
1186        >>>
1187        >>> # Use the transform in dataset pipeline mode
1188        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
1189        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1190        >>>
1191        >>> # crop image to a square
1192        >>> transforms_list1 = [vision.CenterCrop(50)]
1193        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list1, input_columns=["image"])
1194        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1195        ...     print(item["image"].shape, item["image"].dtype)
1196        ...     break
1197        (50, 50, 3) uint8
1198        >>>
1199        >>> # crop image to portrait style
1200        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1201        >>> transforms_list2 = [vision.CenterCrop((60, 40))]
1202        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list2, input_columns=["image"])
1203        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1204        ...     print(item["image"].shape, item["image"].dtype)
1205        ...     break
1206        (60, 40, 3) uint8
1207        >>>
1208        >>> # Use the transform in eager mode
1209        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
1210        >>> output = vision.CenterCrop(1)(data)
1211        >>> print(output.shape, output.dtype)
1212        (1, 1, 3) uint8
1213
1214    Tutorial Examples:
1215        - `Illustration of vision transforms
1216          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1217    """
1218
1219    @check_center_crop
1220    def __init__(self, size):
1221        super().__init__()
1222        if isinstance(size, int):
1223            size = (size, size)
1224        self.size = size
1225        self.random = False
1226
1227    def parse(self):
1228        return cde.CenterCropOperation(self.size)
1229
1230    def _execute_py(self, img):
1231        """
1232        Execute method.
1233
1234        Args:
1235            img (PIL Image): Image to be center cropped.
1236
1237        Returns:
1238            PIL Image, cropped image.
1239        """
1240        return util.center_crop(img, self.size)
1241
1242
1243class ConvertColor(ImageTensorOperation):
1244    """
1245    Change the color space of the image.
1246
1247    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
1248
1249    Args:
1250        convert_mode (ConvertMode): The mode of image channel conversion.
1251
1252            - ConvertMode.COLOR_BGR2BGRA, Convert BGR image to BGRA image.
1253
1254            - ConvertMode.COLOR_RGB2RGBA, Convert RGB image to RGBA image.
1255
1256            - ConvertMode.COLOR_BGRA2BGR, Convert BGRA image to BGR image.
1257
1258            - ConvertMode.COLOR_RGBA2RGB, Convert RGBA image to RGB image.
1259
1260            - ConvertMode.COLOR_BGR2RGBA, Convert BGR image to RGBA image.
1261
1262            - ConvertMode.COLOR_RGB2BGRA, Convert RGB image to BGRA image.
1263
1264            - ConvertMode.COLOR_RGBA2BGR, Convert RGBA image to BGR image.
1265
1266            - ConvertMode.COLOR_BGRA2RGB, Convert BGRA image to RGB image.
1267
1268            - ConvertMode.COLOR_BGR2RGB, Convert BGR image to RGB image.
1269
1270            - ConvertMode.COLOR_RGB2BGR, Convert RGB image to BGR image.
1271
1272            - ConvertMode.COLOR_BGRA2RGBA, Convert BGRA image to RGBA image.
1273
1274            - ConvertMode.COLOR_RGBA2BGRA, Convert RGBA image to BGRA image.
1275
1276            - ConvertMode.COLOR_BGR2GRAY, Convert BGR image to GRAY image.
1277
1278            - ConvertMode.COLOR_RGB2GRAY, Convert RGB image to GRAY image.
1279
1280            - ConvertMode.COLOR_GRAY2BGR, Convert GRAY image to BGR image.
1281
1282            - ConvertMode.COLOR_GRAY2RGB, Convert GRAY image to RGB image.
1283
1284            - ConvertMode.COLOR_GRAY2BGRA, Convert GRAY image to BGRA image.
1285
1286            - ConvertMode.COLOR_GRAY2RGBA, Convert GRAY image to RGBA image.
1287
1288            - ConvertMode.COLOR_BGRA2GRAY, Convert BGRA image to GRAY image.
1289
1290            - ConvertMode.COLOR_RGBA2GRAY, Convert RGBA image to GRAY image.
1291
1292    Raises:
1293        TypeError: If `convert_mode` is not of type :class:`mindspore.dataset.vision.ConvertMode` .
1294        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
1295
1296    Supported Platforms:
1297        ``CPU`` ``Ascend``
1298
1299    Examples:
1300        >>> import numpy as np
1301        >>> import mindspore.dataset as ds
1302        >>> import mindspore.dataset.vision as vision
1303        >>>
1304        >>> # Use the transform in dataset pipeline mode
1305        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
1306        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1307        >>>
1308        >>> # Convert RGB images to GRAY images
1309        >>> convert_op = vision.ConvertColor(vision.ConvertMode.COLOR_RGB2GRAY)
1310        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=convert_op, input_columns=["image"])
1311        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1312        ...     print(item["image"].shape, item["image"].dtype)
1313        ...     break
1314        (100, 100) uint8
1315        >>> # Convert RGB images to BGR images
1316        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1317        >>> convert_op = vision.ConvertColor(vision.ConvertMode.COLOR_RGB2BGR)
1318        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=convert_op, input_columns=["image"])
1319        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1320        ...     print(item["image"].shape, item["image"].dtype)
1321        ...     break
1322        (100, 100, 3) uint8
1323        >>>
1324        >>> # Use the transform in eager mode
1325        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
1326        >>> output = vision.ConvertColor(vision.ConvertMode.COLOR_RGB2GRAY)(data)
1327        >>> print(output.shape, output.dtype)
1328        (100, 100) uint8
1329
1330    Tutorial Examples:
1331        - `Illustration of vision transforms
1332          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1333    """
1334
1335    @check_convert_color
1336    def __init__(self, convert_mode):
1337        super().__init__()
1338        self.convert_mode = convert_mode
1339        self.implementation = Implementation.C
1340
1341    @check_device_target
1342    def device(self, device_target="CPU"):
1343        """
1344        Set the device for the current operator execution.
1345
1346        - When the device is Ascend, input type only supports `uint8` , input channel supports 1 and 3.
1347          The input data has a height limit of [4, 8192] and a width limit of [6, 4096].
1348
1349        Args:
1350            device_target (str, optional): The operator will be executed on this device. Currently supports
1351                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
1352
1353        Raises:
1354            TypeError: If `device_target` is not of type str.
1355            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
1356
1357        Supported Platforms:
1358            ``CPU`` ``Ascend``
1359
1360        Examples:
1361            >>> import numpy as np
1362            >>> import mindspore.dataset as ds
1363            >>> import mindspore.dataset.vision as vision
1364            >>>
1365            >>> # Use the transform in dataset pipeline mode
1366            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
1367            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1368            >>> transforms_list = [vision.ConvertColor(vision.ConvertMode.COLOR_RGB2BGR).device("Ascend")]
1369            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
1370            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1371            ...     print(item["image"].shape, item["image"].dtype)
1372            ...     break
1373            (100, 100, 3) uint8
1374            >>>
1375            >>> # Use the transform in eager mode
1376            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
1377            >>> output = vision.ConvertColor(vision.ConvertMode.COLOR_RGB2BGR).device("Ascend")(data)
1378            >>> print(output.shape, output.dtype)
1379            (100, 100, 3) uint8
1380
1381        Tutorial Examples:
1382            - `Illustration of vision transforms
1383              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1384        """
1385        self.device_target = device_target
1386        return self
1387
1388    def parse(self):
1389        return cde.ConvertColorOperation(ConvertMode.to_c_type(self.convert_mode), self.device_target)
1390
1391
1392class Crop(ImageTensorOperation):
1393    """
1394    Crop the input image at a specific location.
1395
1396    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
1397
1398    Args:
1399        coordinates(sequence): Coordinates of the upper left corner of the cropping image. Must be a sequence of two
1400            values, in the form of (top, left).
1401        size (Union[int, sequence]): The output size of the cropped image.
1402            If size is an integer, a square crop of size (size, size) is returned.
1403            If size is a sequence of length 2, it should be (height, width).
1404            The size value(s) must be larger than 0.
1405
1406    Raises:
1407        TypeError: If `coordinates` is not of type sequence.
1408        TypeError: If `size` is not of type integer or sequence.
1409        ValueError: If `coordinates` is less than 0.
1410        ValueError: If `size` is less than or equal to 0.
1411        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
1412
1413    Supported Platforms:
1414        ``CPU`` ``Ascend``
1415
1416    Examples:
1417        >>> import numpy as np
1418        >>> import mindspore.dataset as ds
1419        >>> import mindspore.dataset.vision as vision
1420        >>>
1421        >>> # Use the transform in dataset pipeline mode
1422        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
1423        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1424        >>> crop_op = vision.Crop((0, 0), 32)
1425        >>> transforms_list = [crop_op]
1426        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
1427        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1428        ...     print(item["image"].shape, item["image"].dtype)
1429        ...     break
1430        (32, 32, 3) uint8
1431        >>>
1432        >>> # Use the transform in eager mode
1433        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
1434        >>> output = vision.Crop((0, 0), 1)(data)
1435        >>> print(output.shape, output.dtype)
1436        (1, 1, 3) uint8
1437
1438    Tutorial Examples:
1439        - `Illustration of vision transforms
1440          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1441    """
1442
1443    @check_crop
1444    def __init__(self, coordinates, size):
1445        super().__init__()
1446        if isinstance(size, int):
1447            size = (size, size)
1448        self.coordinates = coordinates
1449        self.size = size
1450        self.implementation = Implementation.C
1451
1452    @check_device_target
1453    def device(self, device_target="CPU"):
1454        """
1455        Set the device for the current operator execution.
1456
1457        - When the device is Ascend, input/output shape should be limited from [4, 6] to [32768, 32768].
1458
1459        Args:
1460            device_target (str, optional): The operator will be executed on this device. Currently supports
1461                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
1462
1463        Raises:
1464            TypeError: If `device_target` is not of type str.
1465            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
1466
1467        Supported Platforms:
1468            ``CPU`` ``Ascend``
1469
1470        Examples:
1471            >>> import numpy as np
1472            >>> import mindspore.dataset as ds
1473            >>> import mindspore.dataset.vision as vision
1474            >>>
1475            >>> # Use the transform in dataset pipeline mode
1476            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
1477            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1478            >>> crop_op = vision.Crop((0, 0), (100, 75)).device("Ascend")
1479            >>> transforms_list = [crop_op]
1480            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
1481            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1482            ...     print(item["image"].shape, item["image"].dtype)
1483            ...     break
1484            (100, 75, 3) uint8
1485            >>>
1486            >>> # Use the transform in eager mode
1487            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
1488            >>> output = vision.Crop((0, 0), 64).device("Ascend")(data)
1489            >>> print(output.shape, output.dtype)
1490            (64, 64, 3) uint8
1491
1492        Tutorial Examples:
1493            - `Illustration of vision transforms
1494              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1495        """
1496        self.device_target = device_target
1497        return self
1498
1499    def parse(self):
1500        return cde.CropOperation(self.coordinates, self.size, self.device_target)
1501
1502
1503class CutMixBatch(ImageTensorOperation):
1504    """
1505    Apply CutMix transformation on input batch of images and labels.
1506    Note that you need to make labels into one-hot format and batched before calling this operation.
1507
1508    Args:
1509        image_batch_format (ImageBatchFormat): The method of padding. Can be any of
1510            [ImageBatchFormat.NHWC, ImageBatchFormat.NCHW].
1511        alpha (float, optional): Hyperparameter of beta distribution, must be larger than 0. Default: ``1.0``.
1512        prob (float, optional): The probability by which CutMix is applied to each image,
1513            which must be in range: [0.0, 1.0]. Default: ``1.0``.
1514
1515    Raises:
1516        TypeError: If `image_batch_format` is not of type :class:`mindspore.dataset.vision.ImageBatchFormat` .
1517        TypeError: If `alpha` is not of type float.
1518        TypeError: If `prob` is not of type float.
1519        ValueError: If `alpha` is less than or equal 0.
1520        ValueError: If `prob` is not in range [0.0, 1.0].
1521        RuntimeError: If given tensor shape is not <H, W, C>.
1522
1523    Supported Platforms:
1524        ``CPU``
1525
1526    Examples:
1527        >>> import numpy as np
1528        >>> import mindspore.dataset as ds
1529        >>> import mindspore.dataset.transforms as transforms
1530        >>> import mindspore.dataset.vision as vision
1531        >>> from mindspore.dataset.vision import ImageBatchFormat
1532        >>>
1533        >>> # Use the transform in dataset pipeline mode
1534        >>> data = np.random.randint(0, 255, size=(28, 28, 3)).astype(np.uint8)
1535        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1536        >>> numpy_slices_dataset = numpy_slices_dataset.map(
1537        ...     operations=lambda img: (data, np.random.randint(0, 5, (3, 1))),
1538        ...     input_columns=["image"],
1539        ...     output_columns=["image", "label"])
1540        >>> onehot_op = transforms.OneHot(num_classes=10)
1541        >>> numpy_slices_dataset= numpy_slices_dataset.map(operations=onehot_op, input_columns=["label"])
1542        >>> cutmix_batch_op = vision.CutMixBatch(ImageBatchFormat.NHWC, 1.0, 0.5)
1543        >>> numpy_slices_dataset = numpy_slices_dataset.batch(5)
1544        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=cutmix_batch_op,
1545        ...                                                 input_columns=["image", "label"])
1546        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1547        ...     print(item["image"].shape, item["image"].dtype)
1548        ...     print(item["label"].shape, item["label"].dtype)
1549        ...     break
1550        (5, 28, 28, 3) uint8
1551        (5, 3, 10) float32
1552        >>>
1553        >>> # Use the transform in eager mode
1554        >>> data = np.random.randint(0, 255, (3, 3, 10, 10)).astype(np.uint8)
1555        >>> label = np.array([[0, 1], [1, 0], [1, 0]])
1556        >>> output = vision.CutMixBatch(vision.ImageBatchFormat.NCHW, 1.0, 1.0)(data, label)
1557        >>> print(output[0].shape, output[0].dtype)
1558        (3, 3, 10, 10) uint8
1559        >>> print(output[1].shape, output[1].dtype)
1560        (3, 2) float32
1561
1562    Tutorial Examples:
1563        - `Illustration of vision transforms
1564          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1565    """
1566
1567    @check_cut_mix_batch_c
1568    def __init__(self, image_batch_format, alpha=1.0, prob=1.0):
1569        super().__init__()
1570        self.image_batch_format = image_batch_format.value
1571        self.alpha = alpha
1572        self.prob = prob
1573        self.implementation = Implementation.C
1574
1575    def parse(self):
1576        return cde.CutMixBatchOperation(ImageBatchFormat.to_c_type(self.image_batch_format), self.alpha, self.prob)
1577
1578
1579class CutOut(ImageTensorOperation):
1580    """
1581    Randomly cut (mask) out a given number of square patches from the input image array.
1582
1583    Args:
1584        length (int): The side length of each square patch, must be larger than 0.
1585        num_patches (int, optional): Number of patches to be cut out of an image, must be larger than 0. Default: ``1``.
1586        is_hwc (bool, optional): Whether the input image is in HWC format.
1587            ``True`` - HWC format, ``False`` - CHW format. Default: ``True``.
1588
1589    Raises:
1590        TypeError: If `length` is not of type integer.
1591        TypeError: If `is_hwc` is not of type bool.
1592        TypeError: If `num_patches` is not of type integer.
1593        ValueError: If `length` is less than or equal 0.
1594        ValueError: If `num_patches` is less than or equal 0.
1595        RuntimeError: If given tensor shape is not <H, W, C>.
1596
1597    Supported Platforms:
1598        ``CPU``
1599
1600    Examples:
1601        >>> import numpy as np
1602        >>> import mindspore.dataset as ds
1603        >>> import mindspore.dataset.vision as vision
1604        >>>
1605        >>> # Use the transform in dataset pipeline mode
1606        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
1607        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1608        >>> transforms_list = [vision.CutOut(80, num_patches=10)]
1609        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
1610        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1611        ...     print(item["image"].shape, item["image"].dtype)
1612        ...     break
1613        (100, 100, 3) uint8
1614        >>>
1615        >>> # Use the transform in eager mode
1616        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
1617        >>> output = vision.CutOut(20)(data)
1618        >>> print(output.shape, output.dtype)
1619        (100, 100, 3) uint8
1620
1621    Tutorial Examples:
1622        - `Illustration of vision transforms
1623          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1624    """
1625
1626    @check_cutout_new
1627    def __init__(self, length, num_patches=1, is_hwc=True):
1628        super().__init__()
1629        self.length = length
1630        self.num_patches = num_patches
1631        self.is_hwc = is_hwc
1632        self.random = False
1633        self.implementation = Implementation.C
1634
1635    def parse(self):
1636        return cde.CutOutOperation(self.length, self.num_patches, self.is_hwc)
1637
1638
1639class Decode(ImageTensorOperation, PyTensorOperation):
1640    """
1641    Decode the input image in RGB mode.
1642    Supported image formats: JPEG, BMP, PNG, TIFF, GIF(need `to_pil=True` ), WEBP(need `to_pil=True` ).
1643
1644    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
1645
1646    Args:
1647        to_pil (bool, optional): Whether to decode the image to the PIL data type. If ``True``,
1648            the image will be decoded to the PIL data type, otherwise it will be decoded to the
1649            NumPy data type. Default: ``False``.
1650
1651    Raises:
1652        RuntimeError: If given tensor is not a 1D sequence.
1653        RuntimeError: If the input is not raw image bytes.
1654        RuntimeError: If the input image is already decoded.
1655
1656    Supported Platforms:
1657        ``CPU`` ``Ascend``
1658
1659    Examples:
1660        >>> import os
1661        >>> import numpy as np
1662        >>> from PIL import Image, ImageDraw
1663        >>> import mindspore.dataset as ds
1664        >>> import mindspore.dataset.vision as vision
1665        >>>
1666        >>> # Use the transform in dataset pipeline mode
1667        >>> class MyDataset:
1668        ...     def __init__(self):
1669        ...         self.data = []
1670        ...         img = Image.new("RGB", (300, 300), (255, 255, 255))
1671        ...         draw = ImageDraw.Draw(img)
1672        ...         draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5)
1673        ...         img.save("./1.jpg")
1674        ...         data = np.fromfile("./1.jpg", np.uint8)
1675        ...         self.data.append(data)
1676        ...
1677        ...     def __getitem__(self, index):
1678        ...         return self.data[0]
1679        ...
1680        ...     def __len__(self):
1681        ...         return 5
1682        >>>
1683        >>> my_dataset = MyDataset()
1684        >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image")
1685        >>> transforms_list = [vision.Decode(), vision.RandomHorizontalFlip()]
1686        >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns=["image"])
1687        >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1688        ...     print(item["image"].shape, item["image"].dtype)
1689        ...     break
1690        (300, 300, 3) uint8
1691        >>> os.remove("./1.jpg")
1692        >>>
1693        >>> # Use the transform in eager mode
1694        >>> img = Image.new("RGB", (300, 300), (255, 255, 255))
1695        >>> draw = ImageDraw.Draw(img)
1696        >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0))
1697        >>> img.save("./2.jpg")
1698        >>> data = np.fromfile("./2.jpg", np.uint8)
1699        >>> output = vision.Decode()(data)
1700        >>> print(output.shape, output.dtype)
1701        (300, 300, 3) uint8
1702        >>> os.remove("./2.jpg")
1703
1704    Tutorial Examples:
1705        - `Illustration of vision transforms
1706          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1707    """
1708
1709    @check_decode
1710    def __init__(self, to_pil=False):
1711        super().__init__()
1712        self.to_pil = to_pil
1713        if to_pil:
1714            self.implementation = Implementation.PY
1715        else:
1716            self.implementation = Implementation.C
1717
1718    def __call__(self, img):
1719        """
1720        Call method for input conversion for eager mode with C++ implementation.
1721        """
1722        if isinstance(img, bytes):
1723            img = np.frombuffer(img, dtype=np.uint8)
1724        if not isinstance(img, np.ndarray):
1725            raise TypeError("The type of the encoded image should be {0}, but got {1}.".format(np.ndarray, type(img)))
1726        if img.dtype.type is np.str_:
1727            raise TypeError("The data type of the encoded image can not be {}.".format(img.dtype.type))
1728        if img.ndim != 1:
1729            raise TypeError("The number of array dimensions of the encoded image should be 1, "
1730                            "but got {0}.".format(img.ndim))
1731        return super().__call__(img)
1732
1733    @check_device_target
1734    def device(self, device_target="CPU"):
1735        """
1736        Set the device for the current operator execution.
1737
1738        Args:
1739            device_target (str, optional): The operator will be executed on this device. Currently supports
1740                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
1741
1742        Raises:
1743            TypeError: If `device_target` is not of type str.
1744            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
1745
1746        Supported Platforms:
1747            ``CPU`` ``Ascend``
1748
1749        Examples:
1750            >>> import os
1751            >>> import numpy as np
1752            >>> from PIL import Image, ImageDraw
1753            >>> import mindspore.dataset as ds
1754            >>> import mindspore.dataset.vision as vision
1755            >>> from mindspore.dataset.vision import Inter
1756            >>>
1757            >>> # Use the transform in dataset pipeline mode
1758            >>> class MyDataset:
1759            ...     def __init__(self):
1760            ...         self.data = []
1761            ...         img = Image.new("RGB", (300, 300), (255, 255, 255))
1762            ...         draw = ImageDraw.Draw(img)
1763            ...         draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5)
1764            ...         img.save("./1.jpg")
1765            ...         data = np.fromfile("./1.jpg", np.uint8)
1766            ...         self.data.append(data)
1767            ...
1768            ...     def __getitem__(self, index):
1769            ...         return self.data[0]
1770            ...
1771            ...     def __len__(self):
1772            ...         return 5
1773            >>>
1774            >>> my_dataset = MyDataset()
1775            >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image")
1776            >>> decode_op = vision.Decode().device("Ascend")
1777            >>> resize_op = vision.Resize([100, 75], Inter.BICUBIC)
1778            >>> transforms_list = [decode_op, resize_op]
1779            >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns=["image"])
1780            >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1781            ...     print(item["image"].shape, item["image"].dtype)
1782            ...     break
1783            (100, 75, 3) uint8
1784            >>> os.remove("./1.jpg")
1785            >>>
1786            >>> # Use the transform in eager mode
1787            >>> img = Image.new("RGB", (300, 300), (255, 255, 255))
1788            >>> draw = ImageDraw.Draw(img)
1789            >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0))
1790            >>> img.save("./2.jpg")
1791            >>> data = np.fromfile("./2.jpg", np.uint8)
1792            >>> output = vision.Decode().device("Ascend")(data)
1793            >>> print(output.shape, output.dtype)
1794            (300, 300, 3) uint8
1795            >>> os.remove("./2.jpg")
1796
1797        Tutorial Examples:
1798            - `Illustration of vision transforms
1799              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1800        """
1801        if self.implementation == Implementation.PY and device_target == "Ascend":
1802            raise ValueError("The transform \"Decode(to_pil=True)\" cannot be performed on Ascend device, " +
1803                             "please set \"to_pil=False\".")
1804
1805        self.device_target = device_target
1806        return self
1807
1808    def parse(self):
1809        return cde.DecodeOperation(True, self.device_target)
1810
1811    def _execute_py(self, img):
1812        """
1813        Execute method.
1814
1815        Args:
1816            img (NumPy): Image to be decoded.
1817
1818        Returns:
1819            img (NumPy, PIL Image), Decoded image.
1820        """
1821        return util.decode(img)
1822
1823
1824class DecodeVideo(VideoTensorOperation):
1825    """
1826    Decode the input raw video bytes.
1827
1828    Supported video formats: AVI, H264, H265, MOV, MP4, WMV.
1829
1830    Raises:
1831        RuntimeError: If the input ndarray is not 1D array.
1832        RuntimeError: If data type of the elements is not uint8.
1833        RuntimeError: If the input ndarray is empty.
1834
1835    Supported Platforms:
1836        ``CPU``
1837
1838    Examples:
1839        >>> import numpy as np
1840        >>> import mindspore.dataset as ds
1841        >>> import mindspore.dataset.vision as vision
1842        >>>
1843        >>> # Use the transform in dataset pipeline mode
1844        >>> # Custom class to generate and read video dataset
1845        >>> class VideoDataset:
1846        ...     def __init__(self, file_list):
1847        ...         self.file_list = file_list
1848        ...
1849        ...     def __getitem__(self, index):
1850        ...         filename = self.file_list[index]
1851        ...         return np.fromfile(filename, np.uint8)
1852        ...
1853        ...     def __len__(self):
1854        ...         return len(self.file_list)
1855        >>>
1856        >>> dataset = ds.GeneratorDataset(VideoDataset(["/path/to/video/file"]), ["data"])
1857        >>> decode_video = vision.DecodeVideo()
1858        >>> dataset = dataset.map(operations=[decode_video], input_columns=["data"], output_columns=["video", "audio"])
1859        >>>
1860        >>> # Use the transform in eager mode
1861        >>> filename = "/path/to/video/file"
1862        >>> raw_ndarray = np.fromfile(filename, np.uint8)
1863        >>> mindspore_output = vision.DecodeVideo()(raw_ndarray)
1864    """
1865
1866    def __init__(self):
1867        super().__init__()
1868        self.implementation = Implementation.C
1869
1870    def parse(self):
1871        return cde.DecodeVideoOperation()
1872
1873
1874class Equalize(ImageTensorOperation, PyTensorOperation):
1875    """
1876    Apply histogram equalization on input image.
1877
1878    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
1879
1880    Raises:
1881        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
1882
1883    Supported Platforms:
1884        ``CPU`` ``Ascend``
1885
1886    Examples:
1887        >>> import numpy as np
1888        >>> import mindspore.dataset as ds
1889        >>> import mindspore.dataset.vision as vision
1890        >>>
1891        >>> # Use the transform in dataset pipeline mode
1892        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
1893        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1894        >>> transforms_list = [vision.Equalize()]
1895        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
1896        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1897        ...     print(item["image"].shape, item["image"].dtype)
1898        ...     break
1899        (100, 100, 3) uint8
1900        >>>
1901        >>> # Use the transform in eager mode
1902        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
1903        >>> output = vision.Equalize()(data)
1904        >>> print(output.shape, output.dtype)
1905        (2, 2, 3) uint8
1906
1907    Tutorial Examples:
1908        - `Illustration of vision transforms
1909          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1910    """
1911
1912    def __init__(self):
1913        super().__init__()
1914        self.random = False
1915
1916    @check_device_target
1917    def device(self, device_target="CPU"):
1918        """
1919        Set the device for the current operator execution.
1920
1921        - When the device is Ascend, input type only supports `uint8` , input channel supports 1 and 3.
1922          The input data has a height limit of [4, 8192] and a width limit of [6, 4096].
1923
1924        Args:
1925            device_target (str, optional): The operator will be executed on this device. Currently supports
1926                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
1927
1928        Raises:
1929            TypeError: If `device_target` is not of type str.
1930            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
1931
1932        Supported Platforms:
1933            ``CPU`` ``Ascend``
1934
1935        Examples:
1936            >>> import numpy as np
1937            >>> import mindspore.dataset as ds
1938            >>> import mindspore.dataset.vision as vision
1939            >>>
1940            >>> # Use the transform in dataset pipeline mode
1941            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
1942            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
1943            >>> transforms_list = [vision.Equalize().device("Ascend")]
1944            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
1945            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1946            ...     print(item["image"].shape, item["image"].dtype)
1947            ...     break
1948            (100, 100, 3) uint8
1949            >>>
1950            >>> # Use the transform in eager mode
1951            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
1952            >>> output = vision.Equalize().device("Ascend")(data)
1953            >>> print(output.shape, output.dtype)
1954            (100, 100, 3) uint8
1955
1956        Tutorial Examples:
1957            - `Illustration of vision transforms
1958              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
1959        """
1960        self.device_target = device_target
1961        return self
1962
1963    def parse(self):
1964        return cde.EqualizeOperation(self.device_target)
1965
1966    def _execute_py(self, img):
1967        """
1968        Execute method.
1969
1970        Args:
1971            img (PIL Image): Image to be equalized.
1972
1973        Returns:
1974            PIL Image, equalized image.
1975        """
1976
1977        return util.equalize(img)
1978
1979
1980class Erase(ImageTensorOperation):
1981    """
1982    Erase the input image with given value.
1983
1984    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
1985
1986    Args:
1987        top (int): Vertical ordinate of the upper left corner of erased region.
1988        left (int): Horizontal ordinate of the upper left corner of erased region.
1989        height (int): Height of erased region.
1990        width (int): Width of erased region.
1991        value (Union[float, Sequence[float, float, float]], optional): Pixel value used to pad the erased area.
1992            Default: ``0``. If float is provided, it will be used for all RGB channels.
1993            If Sequence[float, float, float] is provided, it will be used for R, G, B channels respectively.
1994        inplace (bool, optional): Whether to apply erasing inplace. Default: ``False``.
1995
1996    Raises:
1997        TypeError: If `top` is not of type int.
1998        ValueError: If `top` is negative.
1999        TypeError: If `left` is not of type int.
2000        ValueError: If `left` is negative.
2001        TypeError: If `height` is not of type int.
2002        ValueError: If `height` is not positive.
2003        TypeError: If `width` is not of type int.
2004        ValueError: If `width` is not positive.
2005        TypeError: If `value` is not of type float or Sequence[float, float, float].
2006        ValueError: If `value` is not in range of [0, 255].
2007        TypeError: If `inplace` is not of type bool.
2008        RuntimeError: If shape of the input image is not <H, W, C>.
2009
2010    Supported Platforms:
2011        ``CPU`` ``Ascend``
2012
2013    Examples:
2014        >>> import numpy as np
2015        >>> import mindspore.dataset as ds
2016        >>> import mindspore.dataset.vision as vision
2017        >>>
2018        >>> # Use the transform in dataset pipeline mode
2019        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2020        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2021        >>> transforms_list = [vision.Erase(10,10,10,10)]
2022        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
2023        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2024        ...     print(item["image"].shape, item["image"].dtype)
2025        ...     break
2026        (100, 100, 3) uint8
2027        >>>
2028        >>> # Use the transform in eager mode
2029        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
2030        >>> output = vision.Erase(10, 10, 10, 10)(data)
2031        >>> print(output.shape, output.dtype)
2032        (100, 100, 3) uint8
2033
2034    Tutorial Examples:
2035        - `Illustration of vision transforms
2036          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2037    """
2038
2039    @check_erase
2040    def __init__(self, top, left, height, width, value=0, inplace=False):
2041        super().__init__()
2042        self.top = top
2043        self.left = left
2044        self.height = height
2045        self.width = width
2046        if isinstance(value, (int, float)):
2047            value = tuple([value])
2048        self.value = value
2049        self.inplace = inplace
2050
2051    @check_device_target
2052    def device(self, device_target="CPU"):
2053        """
2054        Set the device for the current operator execution.
2055
2056        - When the device is Ascend, input type supports `uint8` or `float32` , input channel supports 1 and 3.
2057          The input data has a height limit of [4, 8192] and a width limit of [6, 4096].
2058          The inplace parameter is not supported.
2059
2060        Args:
2061            device_target (str, optional): The operator will be executed on this device. Currently supports
2062                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
2063
2064        Raises:
2065            TypeError: If `device_target` is not of type str.
2066            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
2067
2068        Supported Platforms:
2069            ``CPU`` ``Ascend``
2070
2071        Examples:
2072            >>> import numpy as np
2073            >>> import mindspore.dataset as ds
2074            >>> import mindspore.dataset.vision as vision
2075            >>>
2076            >>> # Use the transform in dataset pipeline mode
2077            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2078            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2079            >>> transforms_list = [vision.Erase(10, 10, 10, 10, (100, 100, 100)).device("Ascend")]
2080            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
2081            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2082            ...     print(item["image"].shape, item["image"].dtype)
2083            ...     break
2084            (100, 100, 3) uint8
2085            >>>
2086            >>> # Use the transform in eager mode
2087            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
2088            >>> output = vision.Erase(10, 10, 10, 10, (100, 100, 100)).device("Ascend")(data)
2089            >>> print(output.shape, output.dtype)
2090            (100, 100, 3) uint8
2091
2092        Tutorial Examples:
2093            - `Illustration of vision transforms
2094              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2095        """
2096        self.device_target = device_target
2097        return self
2098
2099    def parse(self):
2100        return cde.EraseOperation(self.top, self.left, self.height, self.width, self.value, self.inplace,
2101                                  self.device_target)
2102
2103
2104class FiveCrop(PyTensorOperation):
2105    """
2106    Crop the given image into one central crop and four corners.
2107
2108    Args:
2109        size (Union[int, Sequence[int, int]]): The size of the cropped image.
2110            If a single integer is provided, a square of size (size, size) will be cropped with this value.
2111            If a Sequence of length 2 is provided, an image of size (height, width) will be cropped.
2112
2113    Raises:
2114        TypeError: If `size` is not of type integer or Sequence of integer.
2115        ValueError: If `size` is not positive.
2116
2117    Supported Platforms:
2118        ``CPU``
2119
2120    Examples:
2121        >>> import os
2122        >>> import numpy as np
2123        >>> from PIL import Image, ImageDraw
2124        >>> import mindspore.dataset as ds
2125        >>> import mindspore.dataset.vision as vision
2126        >>> from mindspore.dataset.transforms import Compose
2127        >>>
2128        >>> # Use the transform in dataset pipeline mode
2129        >>> class MyDataset:
2130        ...     def __init__(self):
2131        ...         self.data = []
2132        ...         img = Image.new("RGB", (300, 300), (255, 255, 255))
2133        ...         draw = ImageDraw.Draw(img)
2134        ...         draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5)
2135        ...         img.save("./1.jpg")
2136        ...         data = np.fromfile("./1.jpg", np.uint8)
2137        ...         self.data.append(data)
2138        ...
2139        ...     def __getitem__(self, index):
2140        ...         return self.data[0]
2141        ...
2142        ...     def __len__(self):
2143        ...         return 5
2144        >>>
2145        >>> my_dataset = MyDataset()
2146        >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image")
2147        >>> transforms_list = Compose([vision.Decode(to_pil=True),
2148        ...                            vision.FiveCrop(size=200),
2149        ...                            # 4D stack of 5 images
2150        ...                            lambda *images: np.stack([vision.ToTensor()(image) for image in images])])
2151        >>> # apply the transform to dataset through map function
2152        >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image")
2153        >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2154        ...     print(item["image"].shape, item["image"].dtype)
2155        ...     break
2156        (5, 3, 200, 200) float32
2157        >>> os.remove("./1.jpg")
2158        >>>
2159        >>> # Use the transform in eager mode
2160        >>> img = Image.new("RGB", (300, 300), (255, 255, 255))
2161        >>> draw = ImageDraw.Draw(img)
2162        >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0))
2163        >>> img.save("./2.jpg")
2164        >>> data = Image.open("./2.jpg")
2165        >>> output = vision.FiveCrop(size=20)(data)
2166        >>> for cropped_img in output:
2167        ...     print(cropped_img.size)
2168        ...     break
2169        (20, 20)
2170        >>> os.remove("./2.jpg")
2171
2172
2173    Tutorial Examples:
2174        - `Illustration of vision transforms
2175          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2176    """
2177
2178    @check_five_crop
2179    def __init__(self, size):
2180        super().__init__()
2181        self.size = size
2182        self.random = False
2183        self.implementation = Implementation.PY
2184
2185    def _execute_py(self, img):
2186        """
2187        Execute method.
2188
2189        Args:
2190            img (PIL Image): Image to be cropped.
2191
2192        Returns:
2193            tuple, a tuple of five PIL Image in order of top_left, top_right, bottom_left, bottom_right, center.
2194        """
2195        return util.five_crop(img, self.size)
2196
2197
2198class GaussianBlur(ImageTensorOperation):
2199    r"""
2200    Blur input image with the specified Gaussian kernel.
2201
2202    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
2203
2204    Args:
2205        kernel_size (Union[int, Sequence[int, int]]): The size of the Gaussian kernel. Must be positive and odd.
2206            If the input type is int, the value will be used as both the width and height of the Gaussian kernel.
2207            If the input type is Sequence[int, int], the two elements will be used as the width and height of the
2208            Gaussian kernel respectively.
2209        sigma (Union[float, Sequence[float, float]], optional): The standard deviation of the Gaussian kernel.
2210            Must be positive.
2211            If the input type is float, the value will be used as the standard deviation of both the width and
2212            height of the Gaussian kernel.
2213            If the input type is Sequence[float, float], the two elements will be used as the standard deviation
2214            of the width and height of the Gaussian kernel respectively.
2215            Default: ``None`` , the standard deviation of the Gaussian kernel will be obtained by the
2216            formula :math:`((kernel\_size - 1) * 0.5 - 1) * 0.3 + 0.8` .
2217
2218    Raises:
2219        TypeError: If `kernel_size` is not of type int or Sequence[int].
2220        TypeError: If `sigma` is not of type float or Sequence[float].
2221        ValueError: If `kernel_size` is not positive and odd.
2222        ValueError: If `sigma` is not positive.
2223        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
2224
2225    Supported Platforms:
2226        ``CPU`` ``Ascend``
2227
2228    Examples:
2229        >>> import numpy as np
2230        >>> import mindspore.dataset as ds
2231        >>> import mindspore.dataset.vision as vision
2232        >>>
2233        >>> # Use the transform in dataset pipeline mode
2234        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2235        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2236        >>> transforms_list = [vision.GaussianBlur(3, 3)]
2237        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
2238        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2239        ...     print(item["image"].shape, item["image"].dtype)
2240        ...     break
2241        (100, 100, 3) uint8
2242        >>>
2243        >>> # Use the transform in eager mode
2244        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
2245        >>> output = vision.GaussianBlur(3, 3)(data)
2246        >>> print(output.shape, output.dtype)
2247        (2, 2, 3) uint8
2248
2249    Tutorial Examples:
2250        - `Illustration of vision transforms
2251          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2252    """
2253
2254    @check_gaussian_blur
2255    def __init__(self, kernel_size, sigma=None):
2256        super().__init__()
2257        if isinstance(kernel_size, int):
2258            kernel_size = (kernel_size,)
2259        if sigma is None:
2260            sigma = (0,)
2261        elif isinstance(sigma, (int, float)):
2262            sigma = (float(sigma),)
2263        self.kernel_size = kernel_size
2264        self.sigma = sigma
2265        self.implementation = Implementation.C
2266
2267    @check_device_target
2268    def device(self, device_target="CPU"):
2269        """
2270        Set the device for the current operator execution.
2271
2272        - When the device is Ascend, the parameter `kernel_size` only supports values 1, 3, and 5.
2273          input shape should be limited from [4, 6] to [8192, 4096].
2274
2275        Args:
2276            device_target (str, optional): The operator will be executed on this device. Currently supports
2277                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
2278
2279        Raises:
2280            TypeError: If `device_target` is not of type str.
2281            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
2282
2283        Supported Platforms:
2284            ``CPU`` ``Ascend``
2285
2286        Examples:
2287            >>> import numpy as np
2288            >>> import mindspore.dataset as ds
2289            >>> import mindspore.dataset.vision as vision
2290            >>>
2291            >>> # Use the transform in dataset pipeline mode
2292            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2293            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2294            >>> blur_op = vision.GaussianBlur(3, 3).device("Ascend")
2295            >>> transforms_list = [blur_op]
2296            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
2297            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2298            ...     print(item["image"].shape, item["image"].dtype)
2299            ...     break
2300            (100, 100, 3) uint8
2301            >>>
2302            >>> # Use the transform in eager mode
2303            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
2304            >>> output = vision.GaussianBlur(3, 3).device("Ascend")(data)
2305            >>> print(output.shape, output.dtype)
2306            (100, 100, 3) uint8
2307
2308        Tutorial Examples:
2309            - `Illustration of vision transforms
2310              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2311        """
2312        self.device_target = device_target
2313        if device_target == "Ascend":
2314            for k in self.kernel_size:
2315                if k not in [1, 3, 5]:
2316                    raise RuntimeError("When target is Ascend, `kernel_size` only supports values 1, 3, and 5.")
2317        return self
2318
2319    def parse(self):
2320        return cde.GaussianBlurOperation(self.kernel_size, self.sigma, self.device_target)
2321
2322
2323class Grayscale(PyTensorOperation):
2324    """
2325    Convert the input PIL Image to grayscale.
2326
2327    Args:
2328        num_output_channels (int): The number of channels desired for the output image, must be ``1`` or ``3``.
2329            If ``3`` is provided, the returned image will have 3 identical RGB channels. Default: ``1``.
2330
2331    Raises:
2332        TypeError: If `num_output_channels` is not of type integer.
2333        ValueError: If `num_output_channels` is not ``1`` or ``3``.
2334
2335    Supported Platforms:
2336        ``CPU``
2337
2338    Examples:
2339        >>> import os
2340        >>> import numpy as np
2341        >>> from PIL import Image, ImageDraw
2342        >>> import mindspore.dataset as ds
2343        >>> import mindspore.dataset.vision as vision
2344        >>> from mindspore.dataset.transforms import Compose
2345        >>>
2346        >>> # Use the transform in dataset pipeline mode
2347        >>> class MyDataset:
2348        ...     def __init__(self):
2349        ...         self.data = []
2350        ...         img = Image.new("RGB", (300, 300), (255, 255, 255))
2351        ...         draw = ImageDraw.Draw(img)
2352        ...         draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5)
2353        ...         img.save("./1.jpg")
2354        ...         data = np.fromfile("./1.jpg", np.uint8)
2355        ...         self.data.append(data)
2356        ...
2357        ...     def __getitem__(self, index):
2358        ...         return self.data[0]
2359        ...
2360        ...     def __len__(self):
2361        ...         return 5
2362        >>>
2363        >>> my_dataset = MyDataset()
2364        >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image")
2365        >>> transforms_list = Compose([vision.Decode(to_pil=True),
2366        ...                            vision.Grayscale(3),
2367        ...                            vision.ToTensor()])
2368        >>> # apply the transform to dataset through map function
2369        >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image")
2370        >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2371        ...     print(item["image"].shape, item["image"].dtype)
2372        ...     break
2373        (3, 300, 300) float32
2374        >>> os.remove("./1.jpg")
2375        >>>
2376        >>> # Use the transform in eager mode
2377        >>> img = Image.new("RGB", (300, 300), (255, 255, 255))
2378        >>> draw = ImageDraw.Draw(img)
2379        >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0))
2380        >>> img.save("./2.jpg")
2381        >>> data = Image.open("./2.jpg")
2382        >>> output = vision.Grayscale(3)(data)
2383        >>> print(np.array(output).shape, np.array(output).dtype)
2384        (300, 300, 3) uint8
2385        >>> os.remove("./2.jpg")
2386
2387    Tutorial Examples:
2388        - `Illustration of vision transforms
2389          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2390    """
2391
2392    @check_num_channels
2393    def __init__(self, num_output_channels=1):
2394        super().__init__()
2395        self.num_output_channels = num_output_channels
2396        self.random = False
2397        self.implementation = Implementation.PY
2398
2399    def _execute_py(self, img):
2400        """
2401        Execute method.
2402
2403        Args:
2404            img (PIL Image): Image to be converted to grayscale.
2405
2406        Returns:
2407            PIL Image, converted grayscale image.
2408        """
2409        return util.grayscale(img, num_output_channels=self.num_output_channels)
2410
2411
2412class HorizontalFlip(ImageTensorOperation):
2413    """
2414    Flip the input image horizontally.
2415
2416    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
2417
2418    Raises:
2419        RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>.
2420
2421    Supported Platforms:
2422        ``CPU`` ``Ascend``
2423
2424    Examples:
2425        >>> import numpy as np
2426        >>> import mindspore.dataset as ds
2427        >>> import mindspore.dataset.vision as vision
2428        >>>
2429        >>> # Use the transform in dataset pipeline mode
2430        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2431        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2432        >>> transforms_list = [vision.HorizontalFlip()]
2433        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
2434        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2435        ...     print(item["image"].shape, item["image"].dtype)
2436        ...     break
2437        (100, 100, 3) uint8
2438        >>>
2439        >>> # Use the transform in eager mode
2440        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
2441        >>> output = vision.HorizontalFlip()(data)
2442        >>> print(output.shape, output.dtype)
2443        (2, 2, 3) uint8
2444
2445    Tutorial Examples:
2446        - `Illustration of vision transforms
2447          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2448    """
2449
2450    def __init__(self):
2451        super().__init__()
2452        self.implementation = Implementation.C
2453
2454    @check_device_target
2455    def device(self, device_target="CPU"):
2456        """
2457        Set the device for the current operator execution.
2458
2459        - When the device is Ascend, input type supports  `uint8` and `float32`,
2460          input channel supports 1 and 3. The input data has a height limit of [4, 8192]
2461          and a width limit of [6, 4096].
2462
2463        Args:
2464            device_target (str, optional): The operator will be executed on this device. Currently supports
2465                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
2466
2467        Raises:
2468            TypeError: If `device_target` is not of type str.
2469            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
2470
2471        Supported Platforms:
2472            ``CPU`` ``Ascend``
2473
2474        Examples:
2475            >>> import numpy as np
2476            >>> import mindspore.dataset as ds
2477            >>> import mindspore.dataset.vision as vision
2478            >>>
2479            >>> # Use the transform in dataset pipeline mode
2480            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2481            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2482            >>> horizontal_flip_op = vision.HorizontalFlip().device("Ascend")
2483            >>> transforms_list = [horizontal_flip_op]
2484            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
2485            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2486            ...     print(item["image"].shape, item["image"].dtype)
2487            ...     break
2488            (100, 100, 3) uint8
2489            >>>
2490            >>> # Use the transform in eager mode
2491            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
2492            >>> output = vision.HorizontalFlip().device("Ascend")(data)
2493            >>> print(output.shape, output.dtype)
2494            (100, 100, 3) uint8
2495
2496        Tutorial Examples:
2497            - `Illustration of vision transforms
2498              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2499        """
2500        self.device_target = device_target
2501        return self
2502
2503    def parse(self):
2504        return cde.HorizontalFlipOperation(self.device_target)
2505
2506
2507class HsvToRgb(PyTensorOperation):
2508    """
2509    Convert the input numpy.ndarray images from HSV to RGB.
2510
2511    Args:
2512        is_hwc (bool): If ``True``, means the input image is in shape of <H, W, C> or <N, H, W, C>.
2513            Otherwise, it is in shape of <C, H, W> or <N, C, H, W>. Default: ``False``.
2514
2515    Raises:
2516        TypeError: If `is_hwc` is not of type bool.
2517
2518    Supported Platforms:
2519        ``CPU``
2520
2521    Examples:
2522        >>> import numpy as np
2523        >>> import mindspore.dataset as ds
2524        >>> import mindspore.dataset.vision as vision
2525        >>> from mindspore.dataset.transforms import Compose
2526        >>>
2527        >>> # Use the transform in dataset pipeline mode
2528        >>> transforms_list = Compose([vision.CenterCrop(20),
2529        ...                            vision.ToTensor(),
2530        ...                            vision.HsvToRgb()])
2531        >>> # apply the transform to dataset through map function
2532        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2533        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2534        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
2535        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2536        ...     print(item["image"].shape, item["image"].dtype)
2537        ...     break
2538        (3, 20, 20) float64
2539        >>>
2540        >>> # Use the transform in eager mode
2541        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
2542        >>> output = vision.HsvToRgb(is_hwc=True)(data)
2543        >>> print(output.shape, output.dtype)
2544        (2, 2, 3) float64
2545
2546    Tutorial Examples:
2547        - `Illustration of vision transforms
2548          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2549    """
2550
2551    @check_hsv_to_rgb
2552    def __init__(self, is_hwc=False):
2553        super().__init__()
2554        self.is_hwc = is_hwc
2555        self.random = False
2556        self.implementation = Implementation.PY
2557
2558    def _execute_py(self, hsv_imgs):
2559        """
2560        Execute method.
2561
2562        Args:
2563            hsv_imgs (numpy.ndarray): HSV images to be converted.
2564
2565        Returns:
2566            numpy.ndarray, converted RGB images.
2567        """
2568        return util.hsv_to_rgbs(hsv_imgs, self.is_hwc)
2569
2570
2571class HWC2CHW(ImageTensorOperation):
2572    """
2573    Transpose the input image from shape <H, W, C> to <C, H, W>.
2574    If the input image is of shape <H, W>, it will remain unchanged.
2575
2576    Note:
2577        This operation is executed on the CPU by default, but it is also supported
2578        to be executed on the GPU or Ascend via heterogeneous acceleration.
2579
2580    Raises:
2581        RuntimeError: If shape of the input image is not <H, W> or <H, W, C>.
2582
2583    Supported Platforms:
2584        ``CPU`` ``GPU`` ``Ascend``
2585
2586    Examples:
2587        >>> import numpy as np
2588        >>> import mindspore.dataset as ds
2589        >>> import mindspore.dataset.vision as vision
2590        >>>
2591        >>> # Use the transform in dataset pipeline mode
2592        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2593        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2594        >>> transforms_list = [vision.RandomHorizontalFlip(0.75),
2595        ...                    vision.RandomCrop(64),
2596        ...                    vision.HWC2CHW()]
2597        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
2598        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2599        ...     print(item["image"].shape, item["image"].dtype)
2600        ...     break
2601        (3, 64, 64) uint8
2602        >>>
2603        >>> # Use the transform in eager mode
2604        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
2605        >>> output = vision.HWC2CHW()(data)
2606        >>> print(output.shape, output.dtype)
2607        (3, 2, 2) uint8
2608
2609    Tutorial Examples:
2610        - `Illustration of vision transforms
2611          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2612    """
2613
2614    def __init__(self):
2615        super().__init__()
2616        self.implementation = Implementation.C
2617        self.random = False
2618
2619    def parse(self):
2620        return cde.HwcToChwOperation()
2621
2622
2623class Invert(ImageTensorOperation, PyTensorOperation):
2624    """
2625    Invert the colors of the input RGB image.
2626
2627    For each pixel in the image, if the original pixel value is `pixel`,
2628    the inverted pixel value will be `255 - pixel`.
2629
2630    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
2631
2632    Raises:
2633        RuntimeError: If the input image is not in shape of <H, W, C>.
2634
2635    Supported Platforms:
2636        ``CPU`` ``Ascend``
2637
2638    Examples:
2639        >>> import numpy as np
2640        >>> import mindspore.dataset as ds
2641        >>> import mindspore.dataset.vision as vision
2642        >>>
2643        >>> # Use the transform in dataset pipeline mode
2644        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2645        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2646        >>> transforms_list = [vision.Invert()]
2647        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
2648        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2649        ...     print(item["image"].shape, item["image"].dtype)
2650        ...     break
2651        (100, 100, 3) uint8
2652        >>>
2653        >>> # Use the transform in eager mode
2654        >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3))
2655        >>> output = vision.Invert()(data)
2656        >>> print(output.shape, output.dtype)
2657        (2, 2, 3) uint8
2658
2659    Tutorial Examples:
2660        - `Illustration of vision transforms
2661          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2662    """
2663
2664    def __init__(self):
2665        super().__init__()
2666        self.random = False
2667
2668    @check_device_target
2669    def device(self, device_target="CPU"):
2670        """
2671        Set the device for the current operator execution.
2672
2673        - When the device is CPU, input type only support `uint8` , input channel support 1/2/3.
2674        - When the device is Ascend, input type supports  `uint8`/`float32`, input channel supports 1/3.
2675          input shape should be limited from [4, 6] to [8192, 4096].
2676
2677        Args:
2678            device_target (str, optional): The operator will be executed on this device. Currently supports
2679                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
2680
2681        Raises:
2682            TypeError: If `device_target` is not of type str.
2683            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
2684
2685        Supported Platforms:
2686            ``CPU`` ``Ascend``
2687
2688        Examples:
2689            >>> import numpy as np
2690            >>> import mindspore.dataset as ds
2691            >>> import mindspore.dataset.vision as vision
2692            >>> from mindspore.dataset.vision import Inter
2693            >>>
2694            >>> # Use the transform in dataset pipeline mode
2695            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2696            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2697            >>> invert_op = vision.Invert()
2698            >>> transforms_list = [invert_op]
2699            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
2700            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2701            ...     print(item["image"].shape, item["image"].dtype)
2702            ...     break
2703            (100, 100, 3) uint8
2704            >>>
2705            >>> # Use the transform in eager mode
2706            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
2707            >>> output = vision.Invert().device("Ascend")(data)
2708            >>> print(output.shape, output.dtype)
2709            (100, 100, 3) uint8
2710
2711        Tutorial Examples:
2712            - `Illustration of vision transforms
2713              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2714        """
2715        self.device_target = device_target
2716        return self
2717
2718    def parse(self):
2719        return cde.InvertOperation(self.device_target)
2720
2721    def _execute_py(self, img):
2722        """
2723        Execute method.
2724
2725        Args:
2726            img (PIL Image): Image to be color inverted.
2727
2728        Returns:
2729            PIL Image, color inverted image.
2730        """
2731
2732        return util.invert_color(img)
2733
2734
2735class LinearTransformation(PyTensorOperation):
2736    r"""
2737    Linearly transform the input numpy.ndarray image with a square transformation matrix and a mean vector.
2738
2739    It will first flatten the input image and subtract the mean vector from it, then compute the dot
2740    product with the transformation matrix, finally reshape it back to its original shape.
2741
2742    Args:
2743        transformation_matrix (numpy.ndarray): A square transformation matrix in shape of (D, D), where
2744            :math:`D = C \times H \times W` .
2745        mean_vector (numpy.ndarray): A mean vector in shape of (D,), where :math:`D = C \times H \times W` .
2746
2747    Raises:
2748        TypeError: If `transformation_matrix` is not of type :class:`numpy.ndarray` .
2749        TypeError: If `mean_vector` is not of type :class:`numpy.ndarray` .
2750
2751    Supported Platforms:
2752        ``CPU``
2753
2754    Examples:
2755        >>> import numpy as np
2756        >>> import mindspore.dataset as ds
2757        >>> import mindspore.dataset.vision as vision
2758        >>> from mindspore.dataset.transforms import Compose
2759        >>>
2760        >>> # Use the transform in dataset pipeline mode
2761        >>> height, width = 32, 32
2762        >>> dim = 3 * height * width
2763        >>> transformation_matrix = np.ones([dim, dim])
2764        >>> mean_vector = np.zeros(dim)
2765        >>> transforms_list = Compose([vision.Resize((height,width)),
2766        ...                            vision.ToTensor(),
2767        ...                            vision.LinearTransformation(transformation_matrix, mean_vector)])
2768        >>> # apply the transform to dataset through map function
2769        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
2770        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2771        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
2772        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2773        ...     print(item["image"].shape, item["image"].dtype)
2774        ...     break
2775        (3, 32, 32) float64
2776        >>>
2777        >>> # Use the transform in eager mode
2778        >>> data = np.random.randn(10, 10, 3)
2779        >>> transformation_matrix = np.random.randn(300, 300)
2780        >>> mean_vector = np.random.randn(300,)
2781        >>> output = vision.LinearTransformation(transformation_matrix, mean_vector)(data)
2782        >>> print(output.shape, output.dtype)
2783        (10, 10, 3) float64
2784
2785    Tutorial Examples:
2786        - `Illustration of vision transforms
2787          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2788    """
2789
2790    @check_linear_transform
2791    def __init__(self, transformation_matrix, mean_vector):
2792        super().__init__()
2793        self.transformation_matrix = transformation_matrix
2794        self.mean_vector = mean_vector
2795        self.random = False
2796        self.implementation = Implementation.PY
2797
2798    def _execute_py(self, np_img):
2799        """
2800        Execute method.
2801
2802        Args:
2803            np_img (numpy.ndarray): Image in shape of <C, H, W> to be linearly transformed.
2804
2805        Returns:
2806            numpy.ndarray, linearly transformed image.
2807        """
2808        return util.linear_transform(np_img, self.transformation_matrix, self.mean_vector)
2809
2810
2811class MixUp(PyTensorOperation):
2812    """
2813    Randomly mix up a batch of numpy.ndarray images together with its labels.
2814
2815    Each image will be multiplied by a random weight :math:`lambda` generated from the Beta distribution and then added
2816    to another image multiplied by :math:`1 - lambda`. The same transformation will be applied to their labels with the
2817    same value of :math:`lambda`. Make sure that the labels are one-hot encoded in advance.
2818
2819    Args:
2820        batch_size (int): The number of images in a batch.
2821        alpha (float): The alpha and beta parameter for the Beta distribution.
2822        is_single (bool, optional): If ``True``, it will randomly mix up [img0, ..., img(n-1), img(n)] with
2823            [img1, ..., img(n), img0] in each batch. Otherwise, it will randomly mix up images with the
2824            output of the previous batch. Default: ``True``.
2825
2826    Raises:
2827        TypeError: If `batch_size` is not of type integer.
2828        TypeError: If `alpha` is not of type float.
2829        TypeError: If `is_single` is not of type boolean.
2830        ValueError: If `batch_size` is not positive.
2831        ValueError: If `alpha` is not positive.
2832
2833    Supported Platforms:
2834        ``CPU``
2835
2836    Examples:
2837        >>> import numpy as np
2838        >>> import mindspore.dataset as ds
2839        >>> import mindspore.dataset.vision as vision
2840        >>> import mindspore.dataset.transforms as transforms
2841        >>>
2842        >>> # Use the transform in dataset pipeline mode
2843        >>> data = np.random.randint(0, 255, size=(64, 64, 3)).astype(np.uint8)
2844        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2845        >>> numpy_slices_dataset = numpy_slices_dataset.map(
2846        ...     operations=lambda img: (data, np.random.randint(0, 5, (3, 1))),
2847        ...     input_columns=["image"],
2848        ...     output_columns=["image", "label"])
2849        >>> # ont hot decode the label
2850        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms.OneHot(10), input_columns="label")
2851        >>> # batch the samples
2852        >>> numpy_slices_dataset = numpy_slices_dataset.batch(batch_size=4)
2853        >>> # finally mix up the images and labels
2854        >>> numpy_slices_dataset = numpy_slices_dataset.map(
2855        ...     operations=vision.MixUp(batch_size=1, alpha=0.2),
2856        ...     input_columns=["image", "label"])
2857        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2858        ...     print(item["image"].shape, item["image"].dtype)
2859        ...     print(item["label"].shape, item["label"].dtype)
2860        ...     break
2861        (4, 64, 64, 3) float64
2862        (4, 3, 10) float64
2863        >>>
2864        >>> # Use the transform in eager mode
2865        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
2866        >>> label = np.array([[0, 1]])
2867        >>> output = vision.MixUp(batch_size=2, alpha=0.2, is_single=False)(data, label)
2868        >>> print(output[0].shape, output[0].dtype)
2869        (2, 100, 100, 3) float64
2870        >>> print(output[1].shape, output[1].dtype)
2871        (2, 2) float64
2872
2873    Tutorial Examples:
2874        - `Illustration of vision transforms
2875          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2876    """
2877
2878    @check_mix_up
2879    def __init__(self, batch_size, alpha, is_single=True):
2880        super().__init__()
2881        self.image = 0
2882        self.label = 0
2883        self.is_first = True
2884        self.batch_size = batch_size
2885        self.alpha = alpha
2886        self.is_single = is_single
2887        self.random = False
2888        self.implementation = Implementation.PY
2889
2890    def __call__(self, image, label):
2891        """
2892        Call method to apply mix up transformation to image and label.
2893
2894        Note: No execute method for MixUp
2895
2896        Args:
2897            image (numpy.ndarray): Images to be mixed up.
2898            label (numpy.ndarray): Labels to be mixed up.
2899
2900        Returns:
2901            numpy.ndarray, images after mixing up.
2902            numpy.ndarray, labels after mixing up.
2903        """
2904        if self.is_single:
2905            return util.mix_up_single(self.batch_size, image, label, self.alpha)
2906        return util.mix_up_muti(self, self.batch_size, image, label, self.alpha)
2907
2908
2909class MixUpBatch(ImageTensorOperation):
2910    """
2911    Apply MixUp transformation on input batch of images and labels. Each image is
2912    multiplied by a random weight (lambda) and then added to a randomly selected image from the batch
2913    multiplied by (1 - lambda). The same formula is also applied to the one-hot labels.
2914
2915    The lambda is generated based on the specified alpha value. Two coefficients x1, x2 are randomly generated
2916    in the range [alpha, 1], and lambda = (x1 / (x1 + x2)).
2917
2918    Note that you need to make labels into one-hot format and batched before calling this operation.
2919
2920    Args:
2921        alpha (float, optional): Hyperparameter of beta distribution. The value must be positive.
2922            Default: ``1.0``.
2923
2924    Raises:
2925        TypeError: If `alpha` is not of type float.
2926        ValueError: If `alpha` is not positive.
2927        RuntimeError: If given tensor shape is not <N, H, W, C> or <N, C, H, W>.
2928
2929    Supported Platforms:
2930        ``CPU``
2931
2932    Examples:
2933        >>> import numpy as np
2934        >>> import mindspore.dataset as ds
2935        >>> import mindspore.dataset.vision as vision
2936        >>> import mindspore.dataset.transforms as transforms
2937        >>>
2938        >>> # Use the transform in dataset pipeline mode
2939        >>> data = np.random.randint(0, 255, size=(64, 64, 3)).astype(np.uint8)
2940        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
2941        >>> numpy_slices_dataset = numpy_slices_dataset.map(
2942        ...     operations=lambda img: (data, np.random.randint(0, 5, (3, 1))),
2943        ...     input_columns=["image"],
2944        ...     output_columns=["image", "label"])
2945        >>> onehot_op = transforms.OneHot(num_classes=10)
2946        >>> numpy_slices_dataset= numpy_slices_dataset.map(operations=onehot_op,
2947        ...                                                input_columns=["label"])
2948        >>> mixup_batch_op = vision.MixUpBatch(alpha=0.9)
2949        >>> numpy_slices_dataset = numpy_slices_dataset.batch(5)
2950        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=mixup_batch_op,
2951        ...                                                 input_columns=["image", "label"])
2952        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2953        ...     print(item["image"].shape, item["image"].dtype)
2954        ...     print(item["label"].shape, item["label"].dtype)
2955        ...     break
2956        (5, 64, 64, 3) uint8
2957        (5, 3, 10) float32
2958        >>>
2959        >>> # Use the transform in eager mode
2960        >>> data = np.random.randint(0, 255, (2, 10, 10, 3)).astype(np.uint8)
2961        >>> label = np.array([[0, 1], [1, 0]])
2962        >>> output = vision.MixUpBatch(1)(data, label)
2963        >>> print(output[0].shape, output[0].dtype)
2964        (2, 10, 10, 3) uint8
2965        >>> print(output[1].shape, output[1].dtype)
2966        (2, 2) float32
2967
2968    Tutorial Examples:
2969        - `Illustration of vision transforms
2970          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
2971    """
2972
2973    @check_mix_up_batch_c
2974    def __init__(self, alpha=1.0):
2975        super().__init__()
2976        self.alpha = alpha
2977        self.implementation = Implementation.C
2978
2979    def parse(self):
2980        return cde.MixUpBatchOperation(self.alpha)
2981
2982
2983class Normalize(ImageTensorOperation):
2984    """
2985    Normalize the input image with respect to mean and standard deviation. This operation will normalize
2986    the input image with: output[channel] = (input[channel] - mean[channel]) / std[channel], where channel >= 1.
2987
2988    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
2989
2990    Note:
2991        This operation is executed on the CPU by default, but it is also supported
2992        to be executed on the GPU or Ascend via heterogeneous acceleration.
2993
2994    Args:
2995        mean (sequence): List or tuple of mean values for each channel, with respect to channel order.
2996            The mean values must be in range [0.0, 255.0].
2997        std (sequence): List or tuple of standard deviations for each channel, with respect to channel order.
2998            The standard deviation values must be in range (0.0, 255.0].
2999        is_hwc (bool, optional): Whether the input image is HWC.
3000            ``True`` - HWC format, ``False`` - CHW format. Default: ``True``.
3001
3002    Raises:
3003        TypeError: If `mean` is not of type sequence.
3004        TypeError: If `std` is not of type sequence.
3005        TypeError: If `is_hwc` is not of type bool.
3006        ValueError: If `mean` is not in range [0.0, 255.0].
3007        ValueError: If `std` is not in range (0.0, 255.0].
3008        RuntimeError: If given tensor format is not <H, W> or <..., H, W, C>.
3009
3010    Supported Platforms:
3011        ``CPU`` ``GPU`` ``Ascend``
3012
3013    Examples:
3014        >>> import numpy as np
3015        >>> import mindspore.dataset as ds
3016        >>> import mindspore.dataset.vision as vision
3017        >>>
3018        >>> # Use the transform in dataset pipeline mode
3019        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3020        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3021        >>> normalize_op = vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0], is_hwc=True)
3022        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[normalize_op],
3023        ...                                                 input_columns=["image"])
3024        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3025        ...     print(item["image"].shape, item["image"].dtype)
3026        ...     break
3027        (100, 100, 3) float32
3028        >>>
3029        >>> # Use the transform in eager mode
3030        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3031        >>> output = vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0])(data)
3032        >>> print(output.shape, output.dtype)
3033        (100, 100, 3) float32
3034
3035    Tutorial Examples:
3036        - `Illustration of vision transforms
3037          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3038    """
3039
3040    @check_normalize
3041    def __init__(self, mean, std, is_hwc=True):
3042        super().__init__()
3043        self.mean = mean
3044        self.std = std
3045        self.is_hwc = is_hwc
3046        self.random = False
3047        self.implementation = Implementation.C
3048
3049    @check_device_target
3050    def device(self, device_target="CPU"):
3051        """
3052        Set the device for the current operator execution.
3053
3054        - When the device is CPU, input type support  `uint8`/`float32`/`float64`, input channel support 1/2/3.
3055        - When the device is Ascend, input type supports  `uint8`/`float32`, input channel supports 1/3.
3056          input shape should be limited from [4, 6] to [8192, 4096].
3057
3058        Args:
3059            device_target (str, optional): The operator will be executed on this device. Currently supports
3060                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
3061
3062        Raises:
3063            TypeError: If `device_target` is not of type str.
3064            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
3065
3066        Supported Platforms:
3067            ``CPU`` ``Ascend``
3068
3069        Examples:
3070            >>> import numpy as np
3071            >>> import mindspore.dataset as ds
3072            >>> import mindspore.dataset.vision as vision
3073            >>> from mindspore.dataset.vision import Inter
3074            >>>
3075            >>> # Use the transform in dataset pipeline mode
3076            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3077            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3078            >>> resize_op = vision.Resize([100, 75], Inter.BICUBIC)
3079            >>> transforms_list = [resize_op]
3080            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3081            >>> normalize_op = vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0]).device("Ascend")
3082            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=normalize_op, input_columns=["image"])
3083            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3084            ...     print(item["image"].shape, item["image"].dtype)
3085            ...     break
3086            (100, 75, 3) float32
3087            >>>
3088            >>> # Use the transform in eager mode
3089            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3090            >>> output = vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0]).device("Ascend")(data)
3091            >>> print(output.shape, output.dtype)
3092            (100, 100, 3) float32
3093
3094        Tutorial Examples:
3095            - `Illustration of vision transforms
3096              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3097        """
3098        self.device_target = device_target
3099        return self
3100
3101    def parse(self):
3102        return cde.NormalizeOperation(self.mean, self.std, self.is_hwc, self.device_target)
3103
3104
3105class NormalizePad(ImageTensorOperation):
3106    """
3107    Normalize the input image with respect to mean and standard deviation then pad an extra channel with value zero.
3108
3109    Args:
3110        mean (sequence): List or tuple of mean values for each channel, with respect to channel order.
3111            The mean values must be in range (0.0, 255.0].
3112        std (sequence): List or tuple of standard deviations for each channel, with respect to channel order.
3113            The standard deviation values must be in range (0.0, 255.0].
3114        dtype (str, optional): Set the output data type of normalized image. Default: ``"float32"``.
3115        is_hwc (bool, optional): Specify the format of input image.
3116            ``True`` - HW(C) format, ``False`` - CHW format. Default: ``True``.
3117
3118    Raises:
3119        TypeError: If `mean` is not of type sequence.
3120        TypeError: If `std` is not of type sequence.
3121        TypeError: If `dtype` is not of type string.
3122        TypeError: If `is_hwc` is not of type bool.
3123        ValueError: If `mean` is not in range [0.0, 255.0].
3124        ValueError: If `mean` is not in range (0.0, 255.0].
3125        RuntimeError: If given tensor shape is not <H, W>, <H, W, C> or <C, H, W>.
3126
3127    Supported Platforms:
3128        ``CPU``
3129
3130    Examples:
3131        >>> import numpy as np
3132        >>> import mindspore.dataset as ds
3133        >>> import mindspore.dataset.vision as vision
3134        >>>
3135        >>> # Use the transform in dataset pipeline mode
3136        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3137        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3138        >>> normalize_pad_op = vision.NormalizePad(mean=[121.0, 115.0, 100.0],
3139        ...                                        std=[70.0, 68.0, 71.0],
3140        ...                                        dtype="float32")
3141        >>> transforms_list = [normalize_pad_op]
3142        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3143        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3144        ...     print(item["image"].shape, item["image"].dtype)
3145        ...     break
3146        (100, 100, 4) float32
3147        >>>
3148        >>> # Use the transform in eager mode
3149        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3150        >>> output = vision.NormalizePad(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0], dtype="float32")(data)
3151        >>> print(output.shape, output.dtype)
3152        (100, 100, 4) float32
3153    """
3154
3155    @check_normalizepad
3156    def __init__(self, mean, std, dtype="float32", is_hwc=True):
3157        super().__init__()
3158        self.mean = mean
3159        self.std = std
3160        self.dtype = dtype
3161        self.is_hwc = is_hwc
3162        self.random = False
3163        self.implementation = Implementation.C
3164
3165    def parse(self):
3166        return cde.NormalizePadOperation(self.mean, self.std, self.dtype, self.is_hwc)
3167
3168
3169class Pad(ImageTensorOperation, PyTensorOperation):
3170    """
3171    Pad the image according to padding parameters.
3172
3173    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
3174
3175    Args:
3176        padding (Union[int, Sequence[int, int], Sequence[int, int, int, int]]): The number of pixels
3177            to pad each border of the image.
3178            If a single number is provided, it pads all borders with this value.
3179            If a tuple or lists of 2 values are provided, it pads the (left and right)
3180            with the first value and (top and bottom) with the second value.
3181            If 4 values are provided as a list or tuple, it pads the left, top, right and bottom respectively.
3182            The pad values must be non-negative.
3183        fill_value (Union[int, tuple[int]], optional): The pixel intensity of the borders, only valid for
3184            `padding_mode` ``Border.CONSTANT``. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
3185            If it is an integer, it is used for all RGB channels.
3186            The fill_value values must be in range [0, 255]. Default: ``0``.
3187        padding_mode (Border, optional): The method of padding. Default: ``Border.CONSTANT``. Can be
3188            ``Border.CONSTANT``, ``Border.EDGE``, ``Border.REFLECT``, ``Border.SYMMETRIC``.
3189
3190            - ``Border.CONSTANT`` , means it fills the border with constant values.
3191
3192            - ``Border.EDGE`` , means it pads with the last value on the edge.
3193
3194            - ``Border.REFLECT`` , means it reflects the values on the edge omitting the last
3195              value of edge.
3196
3197            - ``Border.SYMMETRIC`` , means it reflects the values on the edge repeating the last
3198              value of edge.
3199
3200    Raises:
3201        TypeError: If `padding` is not of type int or Sequence[int, int], Sequence[int, int, int, int].
3202        TypeError: If `fill_value` is not of type int or tuple[int].
3203        TypeError: If `padding_mode` is not of type :class:`mindspore.dataset.vision.Border` .
3204        ValueError: If `padding` is negative.
3205        ValueError: If `fill_value` is not in range [0, 255].
3206        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
3207
3208    Supported Platforms:
3209        ``CPU`` ``Ascend``
3210
3211    Examples:
3212        >>> import numpy as np
3213        >>> import mindspore.dataset as ds
3214        >>> import mindspore.dataset.vision as vision
3215        >>>
3216        >>> # Use the transform in dataset pipeline mode
3217        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3218        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3219        >>> transforms_list = [vision.Pad([100, 100, 100, 100])]
3220        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3221        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3222        ...     print(item["image"].shape, item["image"].dtype)
3223        ...     break
3224        (300, 300, 3) uint8
3225        >>>
3226        >>> # Use the transform in eager mode
3227        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3228        >>> output = vision.Pad([100, 100, 100, 100])(data)
3229        >>> print(output.shape, output.dtype)
3230        (300, 300, 3) uint8
3231
3232    Tutorial Examples:
3233        - `Illustration of vision transforms
3234          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3235    """
3236
3237    @check_pad
3238    def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT):
3239        super().__init__()
3240        padding = parse_padding(padding)
3241        if isinstance(fill_value, int):
3242            fill_value = tuple([fill_value] * 3)
3243        self.padding = padding
3244        self.fill_value = fill_value
3245        self.random = False
3246        self.padding_mode = padding_mode
3247
3248    @check_device_target
3249    def device(self, device_target="CPU"):
3250        """
3251        Set the device for the current operator execution.
3252
3253        - When the device is Ascend, input/output shape should be limited from [4, 6] to [32768, 32768].
3254
3255        Args:
3256            device_target (str, optional): The operator will be executed on this device. Currently supports
3257                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
3258
3259        Raises:
3260            TypeError: If `device_target` is not of type str.
3261            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
3262
3263        Supported Platforms:
3264            ``CPU`` ``Ascend``
3265
3266        Examples:
3267            >>> import numpy as np
3268            >>> import mindspore.dataset as ds
3269            >>> import mindspore.dataset.vision as vision
3270            >>>
3271            >>> # Use the transform in dataset pipeline mode
3272            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3273            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3274            >>> pad_op = vision.Pad([100, 100, 100, 100]).device("Ascend")
3275            >>> transforms_list = [pad_op]
3276            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3277            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3278            ...     print(item["image"].shape, item["image"].dtype)
3279            ...     break
3280            (300, 300, 3) uint8
3281            >>>
3282            >>> # Use the transform in eager mode
3283            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3284            >>> output = vision.Pad([100, 100, 100, 100]).device("Ascend")(data)
3285            >>> print(output.shape, output.dtype)
3286            (300, 300, 3) uint8
3287
3288        Tutorial Examples:
3289            - `Illustration of vision transforms
3290              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3291        """
3292        self.device_target = device_target
3293        return self
3294
3295    def parse(self):
3296        return cde.PadOperation(self.padding, self.fill_value, Border.to_c_type(self.padding_mode), self.device_target)
3297
3298    def _execute_py(self, img):
3299        """
3300        Execute method.
3301
3302        Args:
3303            img (PIL Image): Image to be padded.
3304
3305        Returns:
3306            PIL Image, padded image.
3307        """
3308        return util.pad(img, self.padding, self.fill_value, Border.to_python_type(self.padding_mode))
3309
3310
3311class PadToSize(ImageTensorOperation):
3312    """
3313    Pad the image to a fixed size.
3314
3315    Args:
3316        size (Union[int, Sequence[int, int]]): The target size to pad.
3317            If int is provided, pad the image to [size, size].
3318            If Sequence[int, int] is provided, it should be in order of [height, width].
3319        offset (Union[int, Sequence[int, int]], optional): The lengths to pad on the top and left.
3320            If int is provided, pad both top and left borders with this value.
3321            If Sequence[int, int] is provided, is should be in order of [top, left].
3322            Default: ``None``, means to pad symmetrically, keeping the original image in center.
3323        fill_value (Union[int, tuple[int, int, int]], optional): Pixel value used to pad the borders,
3324            only valid when `padding_mode` is ``Border.CONSTANT``.
3325            If int is provided, it will be used for all RGB channels.
3326            If tuple[int, int, int] is provided, it will be used for R, G, B channels respectively. Default: 0.
3327        padding_mode (Border, optional): Method of padding. It can be ``Border.CONSTANT``, ``Border.EDGE``,
3328            ``Border.REFLECT`` or Border.SYMMETRIC. Default: ``Border.CONSTANT``.
3329
3330            - ``Border.CONSTANT`` , pads with a constant value.
3331            - ``Border.EDGE`` , pads with the last value at the edge of the image.
3332            - ``Border.REFLECT`` , pads with reflection of the image omitting the last value on the edge.
3333            - ``Border.SYMMETRIC`` , pads with reflection of the image repeating the last value on the edge.
3334
3335    Raises:
3336        TypeError: If `size` is not of type int or Sequence[int, int].
3337        TypeError: If `offset` is not of type int or Sequence[int, int].
3338        TypeError: If `fill_value` is not of type int or tuple[int, int, int].
3339        TypeError: If `padding_mode` is not of type :class:`mindspore.dataset.vision.Border` .
3340        ValueError: If `size` is not positive.
3341        ValueError: If `offset` is negative.
3342        ValueError: If `fill_value` is not in range of [0, 255].
3343        RuntimeError: If shape of the input image is not <H, W> or <H, W, C>.
3344
3345    Supported Platforms:
3346        ``CPU``
3347
3348    Examples:
3349        >>> import numpy as np
3350        >>> import mindspore.dataset as ds
3351        >>> import mindspore.dataset.vision as vision
3352        >>>
3353        >>> # Use the transform in dataset pipeline mode
3354        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3355        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3356        >>> transforms_list = [vision.PadToSize([256, 256])]
3357        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3358        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3359        ...     print(item["image"].shape, item["image"].dtype)
3360        ...     break
3361        (256, 256, 3) uint8
3362        >>>
3363        >>> # Use the transform in eager mode
3364        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3365        >>> output = vision.PadToSize([256, 256])(data)
3366        >>> print(output.shape, output.dtype)
3367        (256, 256, 3) uint8
3368
3369    Tutorial Examples:
3370        - `Illustration of vision transforms
3371          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3372    """
3373
3374    @check_pad_to_size
3375    def __init__(self, size, offset=None, fill_value=0, padding_mode=Border.CONSTANT):
3376        super().__init__()
3377        self.size = [size, size] if isinstance(size, int) else size
3378        if offset is None:
3379            self.offset = []
3380        else:
3381            self.offset = [offset, offset] if isinstance(offset, int) else offset
3382        self.fill_value = tuple([fill_value] * 3) if isinstance(fill_value, int) else fill_value
3383        self.padding_mode = padding_mode
3384        self.implementation = Implementation.C
3385
3386    def parse(self):
3387        return cde.PadToSizeOperation(self.size, self.offset, self.fill_value, Border.to_c_type(self.padding_mode))
3388
3389
3390class Perspective(ImageTensorOperation, PyTensorOperation):
3391    """
3392    Apply perspective transformation on input image.
3393
3394    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
3395
3396    Args:
3397        start_points (Sequence[Sequence[int, int]]): Sequence of the starting point coordinates, containing four
3398            two-element subsequences, corresponding to [top-left, top-right, bottom-right, bottom-left] of the
3399            quadrilateral in the original image.
3400        end_points (Sequence[Sequence[int, int]]): Sequence of the ending point coordinates, containing four
3401            two-element subsequences, corresponding to [top-left, top-right, bottom-right, bottom-left] of the
3402            quadrilateral in the target image.
3403        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
3404            Default: ``Inter.BILINEAR``.
3405
3406    Raises:
3407        TypeError: If `start_points` is not of type Sequence[Sequence[int, int]].
3408        TypeError: If `end_points` is not of type Sequence[Sequence[int, int]].
3409        TypeError: If `interpolation` is not of type :class:`~.vision.Inter` .
3410        RuntimeError: If shape of the input image is not <H, W> or <H, W, C>.
3411
3412    Supported Platforms:
3413        ``CPU`` ``Ascend``
3414
3415    Examples:
3416        >>> import numpy as np
3417        >>> import mindspore.dataset as ds
3418        >>> import mindspore.dataset.vision as vision
3419        >>> from mindspore.dataset.vision import Inter
3420        >>>
3421        >>> # Use the transform in dataset pipeline mode
3422        >>> start_points = [[0, 63], [63, 63], [63, 0], [0, 0]]
3423        >>> end_points = [[0, 32], [32, 32], [32, 0], [0, 0]]
3424        >>> transforms_list = [vision.Perspective(start_points, end_points, Inter.BILINEAR)]
3425        >>> # apply the transform to dataset through map function
3426        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3427        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3428        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
3429        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3430        ...     print(item["image"].shape, item["image"].dtype)
3431        ...     break
3432        (100, 100, 3) uint8
3433        >>>
3434        >>> # Use the transform in eager mode
3435        >>> start_points = [[0, 63], [63, 63], [63, 0], [0, 0]]
3436        >>> end_points = [[0, 32], [32, 32], [32, 0], [0, 0]]
3437        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3438        >>> output = vision.Perspective(start_points, end_points, Inter.BILINEAR)(data)
3439        >>> print(output.shape, output.dtype)
3440        (100, 100, 3) uint8
3441
3442    Tutorial Examples:
3443        - `Illustration of vision transforms
3444          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3445    """
3446
3447    @check_perspective
3448    def __init__(self, start_points, end_points, interpolation=Inter.BILINEAR):
3449        super().__init__()
3450        self.start_points = start_points
3451        self.end_points = end_points
3452        self.interpolation = interpolation
3453        if interpolation in [Inter.AREA, Inter.PILCUBIC]:
3454            self.implementation = Implementation.C
3455        elif interpolation == Inter.ANTIALIAS:
3456            self.implementation = Implementation.PY
3457        self.random = False
3458
3459    @check_device_target
3460    def device(self, device_target="CPU"):
3461        """
3462        Set the device for the current operator execution.
3463
3464        - When the device is Ascend, input type supports `uint8` and `float32`,
3465          input channel supports 1 and 3. The input data has a height limit of [6, 8192]
3466          and a width limit of [10, 4096].
3467
3468        Args:
3469            device_target (str, optional): The operator will be executed on this device. Currently supports
3470                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
3471
3472        Raises:
3473            TypeError: If `device_target` is not of type str.
3474            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
3475
3476        Supported Platforms:
3477            ``CPU`` ``Ascend``
3478
3479        Examples:
3480            >>> import numpy as np
3481            >>> import mindspore.dataset as ds
3482            >>> import mindspore.dataset.vision as vision
3483            >>> from mindspore.dataset.vision import Inter
3484            >>>
3485            >>> # Use the transform in dataset pipeline mode
3486            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3487            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3488            >>> start_points = [[0, 63], [63, 63], [63, 0], [0, 0]]
3489            >>> end_points = [[0, 32], [32, 32], [32, 0], [0, 0]]
3490            >>> perspective_op = vision.Perspective(start_points, end_points).device("Ascend")
3491            >>> transforms_list = [perspective_op]
3492            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3493            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3494            ...     print(item["image"].shape, item["image"].dtype)
3495            ...     break
3496            (100, 100, 3) uint8
3497            >>>
3498            >>> # Use the transform in eager mode
3499            >>> start_points = [[0, 63], [63, 63], [63, 0], [0, 0]]
3500            >>> end_points = [[0, 32], [32, 32], [32, 0], [0, 0]]
3501            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3502            >>> output = vision.Perspective(start_points, end_points, Inter.BILINEAR).device("Ascend")(data)
3503            >>> print(output.shape, output.dtype)
3504            (100, 100, 3) uint8
3505
3506        Tutorial Examples:
3507            - `Illustration of vision transforms
3508              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3509        """
3510        self.device_target = device_target
3511        if self.interpolation not in [Inter.BILINEAR, Inter.NEAREST] and self.device_target == "Ascend":
3512            raise RuntimeError("Invalid interpolation mode, only support BILINEAR and NEAREST.")
3513        return self
3514
3515    def parse(self):
3516        if self.interpolation == Inter.ANTIALIAS:
3517            raise TypeError("Current Interpolation is not supported with NumPy input.")
3518        return cde.PerspectiveOperation(self.start_points, self.end_points,
3519                                        Inter.to_c_type(self.interpolation), self.device_target)
3520
3521    def _execute_py(self, img):
3522        """
3523        Execute method.
3524
3525        Args:
3526            img (PIL Image): Image to be perspectived.
3527
3528        Returns:
3529            PIL Image, perspectived image.
3530        """
3531        if self.interpolation in [Inter.AREA, Inter.PILCUBIC]:
3532            raise TypeError("Current Interpolation is not supported with PIL input.")
3533        return util.perspective(img, self.start_points, self.end_points, Inter.to_python_type(self.interpolation))
3534
3535
3536class Posterize(ImageTensorOperation):
3537    """
3538    Reduce the bit depth of the color channels of image to create a high contrast and vivid color effect,
3539    similar to that seen in posters or printed materials.
3540
3541    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
3542
3543    Args:
3544        bits (int): The number of bits to keep for each channel, should be in range of [0, 8].
3545
3546    Raises:
3547        TypeError: If `bits` is not of type int.
3548        ValueError: If `bits` is not in range [0, 8].
3549        RuntimeError: If shape of the input image is not <H, W> or <H, W, C>.
3550
3551    Examples:
3552        >>> import numpy as np
3553        >>> import mindspore.dataset as ds
3554        >>> import mindspore.dataset.vision as vision
3555        >>>
3556        >>> # Use the transform in dataset pipeline mode
3557        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3558        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3559        >>> transforms_list = [vision.Posterize(4)]
3560        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3561        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3562        ...     print(item["image"].shape, item["image"].dtype)
3563        ...     break
3564        (100, 100, 3) uint8
3565        >>>
3566        >>> # Use the transform in eager mode
3567        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3568        >>> output = vision.Posterize(4)(data)
3569        >>> print(output.shape, output.dtype)
3570        (100, 100, 3) uint8
3571
3572    Tutorial Examples:
3573        - `Illustration of vision transforms
3574          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3575    """
3576
3577    @check_posterize
3578    def __init__(self, bits):
3579        super().__init__()
3580        self.bits = bits
3581        self.implementation = Implementation.C
3582
3583    @check_device_target
3584    def device(self, device_target="CPU"):
3585        """
3586        Set the device for the current operator execution.
3587
3588        - When the device is Ascend, input type supports  `uint8`/`float32`, input channel supports 1 and 3.
3589          The input data has a height limit of [4, 8192] and a width limit of [6, 4096].
3590
3591        Args:
3592            device_target (str, optional): The operator will be executed on this device. Currently supports
3593                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
3594
3595        Raises:
3596            TypeError: If `device_target` is not of type str.
3597            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
3598
3599        Supported Platforms:
3600            ``CPU`` ``Ascend``
3601
3602        Examples:
3603            >>> import numpy as np
3604            >>> import mindspore.dataset as ds
3605            >>> import mindspore.dataset.vision as vision
3606            >>>
3607            >>> # Use the transform in dataset pipeline mode
3608            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3609            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3610            >>> posterize_op = vision.Posterize(4).device("Ascend")
3611            >>> transforms_list = [posterize_op]
3612            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3613            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3614            ...     print(item["image"].shape, item["image"].dtype)
3615            ...     break
3616            (100, 100, 3) uint8
3617            >>>
3618            >>> # Use the transform in eager mode
3619            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3620            >>> output = vision.Posterize(4).device("Ascend")(data)
3621            >>> print(output.shape, output.dtype)
3622            (100, 100, 3) uint8
3623
3624        Tutorial Examples:
3625            - `Illustration of vision transforms
3626              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3627        """
3628        self.device_target = device_target
3629        return self
3630
3631    def parse(self):
3632        return cde.PosterizeOperation(self.bits, self.device_target)
3633
3634
3635class RandAugment(ImageTensorOperation):
3636    """
3637    Apply RandAugment data augmentation method on the input image.
3638
3639    Refer to `RandAugment: Learning Augmentation Strategies from Data <https://arxiv.org/pdf/1909.13719.pdf>`_ .
3640
3641    Only support 3-channel RGB image.
3642
3643    Args:
3644        num_ops (int, optional): Number of augmentation transformations to apply sequentially. Default: ``2``.
3645        magnitude (int, optional): Magnitude for all the transformations, must be smaller than
3646            `num_magnitude_bins`. Default: ``9``.
3647        num_magnitude_bins (int, optional): The number of different magnitude values,
3648            must be no less than 2. Default: ``31``.
3649        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
3650            Default: ``Inter.NEAREST``.
3651        fill_value (Union[int, tuple[int, int, int]], optional): Pixel fill value for the area outside the
3652            transformed image, must be in range of [0, 255]. Default: ``0``.
3653            If int is provided, pad all RGB channels with this value.
3654            If tuple[int, int, int] is provided, pad R, G, B channels respectively.
3655
3656    Raises:
3657        TypeError: If `num_ops` is not of type int.
3658        ValueError: If `num_ops` is negative.
3659        TypeError: If `magnitude` is not of type int.
3660        ValueError: If `magnitude` is not positive.
3661        TypeError: If `num_magnitude_bins` is not of type int.
3662        ValueError: If `num_magnitude_bins` is less than 2.
3663        TypeError: If `interpolation` not of type :class:`~.vision.Inter` .
3664        TypeError: If `fill_value` is not of type int or tuple[int, int, int].
3665        ValueError: If `fill_value` is not in range of [0, 255].
3666        RuntimeError: If shape of the input image is not <H, W, C>.
3667
3668    Supported Platforms:
3669        ``CPU``
3670
3671    Examples:
3672        >>> import numpy as np
3673        >>> import mindspore.dataset as ds
3674        >>> import mindspore.dataset.vision as vision
3675        >>> from mindspore.dataset.vision import Inter
3676        >>>
3677        >>> # Use the transform in dataset pipeline mode
3678        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3679        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3680        >>> transforms_list = [vision.RandAugment()]
3681        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3682        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3683        ...     print(item["image"].shape, item["image"].dtype)
3684        ...     break
3685        (100, 100, 3) uint8
3686        >>>
3687        >>> # Use the transform in eager mode
3688        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3689        >>> output = vision.RandAugment(interpolation=Inter.BILINEAR, fill_value=255)(data)
3690        >>> print(output.shape, output.dtype)
3691        (100, 100, 3) uint8
3692
3693    Tutorial Examples:
3694        - `Illustration of vision transforms
3695          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3696    """
3697
3698    @check_rand_augment
3699    def __init__(self, num_ops=2, magnitude=9, num_magnitude_bins=31, interpolation=Inter.NEAREST, fill_value=0):
3700        super().__init__()
3701        self.num_ops = num_ops
3702        self.magnitude = magnitude
3703        self.num_magnitude_bins = num_magnitude_bins
3704        self.interpolation = interpolation
3705        if isinstance(fill_value, int):
3706            fill_value = tuple([fill_value] * 3)
3707        self.fill_value = fill_value
3708        self.implementation = Implementation.C
3709
3710    def parse(self):
3711        return cde.RandAugmentOperation(self.num_ops, self.magnitude, self.num_magnitude_bins,
3712                                        Inter.to_c_type(self.interpolation), self.fill_value)
3713
3714
3715class RandomAdjustSharpness(ImageTensorOperation):
3716    """
3717    Randomly adjust the sharpness of the input image with a given probability.
3718
3719    Args:
3720        degree (float): Sharpness adjustment degree, which must be non negative.
3721            Degree of ``0.0`` gives a blurred image, degree of ``1.0`` gives the original image,
3722            and degree of ``2.0`` increases the sharpness by a factor of 2.
3723        prob (float, optional): Probability of the image being sharpness adjusted, which
3724            must be in range of [0.0, 1.0]. Default: ``0.5``.
3725
3726    Raises:
3727        TypeError: If `degree` is not of type float.
3728        TypeError: If `prob` is not of type float.
3729        ValueError: If `degree` is negative.
3730        ValueError: If `prob` is not in range [0.0, 1.0].
3731        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
3732
3733    Supported Platforms:
3734        ``CPU``
3735
3736    Examples:
3737        >>> import numpy as np
3738        >>> import mindspore.dataset as ds
3739        >>> import mindspore.dataset.vision as vision
3740        >>>
3741        >>> # Use the transform in dataset pipeline mode
3742        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3743        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3744        >>> transforms_list = [vision.RandomAdjustSharpness(2.0, 0.5)]
3745        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3746        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3747        ...     print(item["image"].shape, item["image"].dtype)
3748        ...     break
3749        (100, 100, 3) uint8
3750        >>>
3751        >>> # Use the transform in eager mode
3752        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3753        >>> output = vision.RandomAdjustSharpness(2.0, 1.0)(data)
3754        >>> print(output.shape, output.dtype)
3755        (100, 100, 3) uint8
3756
3757    Tutorial Examples:
3758        - `Illustration of vision transforms
3759          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3760    """
3761
3762    @check_random_adjust_sharpness
3763    def __init__(self, degree, prob=0.5):
3764        super().__init__()
3765        self.prob = prob
3766        self.degree = degree
3767        self.implementation = Implementation.C
3768
3769    def parse(self):
3770        return cde.RandomAdjustSharpnessOperation(self.degree, self.prob)
3771
3772
3773class RandomAffine(ImageTensorOperation, PyTensorOperation):
3774    """
3775    Apply Random affine transformation to the input image.
3776
3777    Args:
3778        degrees (Union[int, float, sequence]): Range of the rotation degrees.
3779            If `degrees` is a number, the range will be (-degrees, degrees).
3780            If `degrees` is a sequence, it should be (min, max).
3781        translate (sequence, optional): Sequence (tx_min, tx_max, ty_min, ty_max) of minimum/maximum translation in
3782            x(horizontal) and y(vertical) directions, range [-1.0, 1.0]. Default: ``None``.
3783            The horizontal and vertical shift is selected randomly from the range:
3784            (tx_min*width, tx_max*width) and (ty_min*height, ty_max*height), respectively.
3785            If a tuple or list of size 2, then a translate parallel to the X axis in the range of
3786            (translate[0], translate[1]) is applied.
3787            If a tuple or list of size 4, then a translate parallel to the X axis in the range of
3788            (translate[0], translate[1]) and a translate parallel to the Y axis in the range of
3789            (translate[2], translate[3]) are applied.
3790            If ``None``, no translation is applied.
3791        scale (sequence, optional): Scaling factor interval, which must be non negative.
3792            Default: ``None``, original scale is used.
3793        shear (Union[float, Sequence[float, float], Sequence[float, float, float, float]], optional):
3794            Range of shear factor to select from.
3795            If float is provided, a shearing parallel to X axis with a factor selected from
3796            ( `-shear` , `shear` ) will be applied.
3797            If Sequence[float, float] is provided, a shearing parallel to X axis with a factor selected
3798            from ( `shear` [0], `shear` [1]) will be applied.
3799            If Sequence[float, float, float, float] is provided, a shearing parallel to X axis with a factor selected
3800            from ( `shear` [0], `shear` [1]) and a shearing parallel to Y axis with a factor selected from
3801            ( `shear` [2], `shear` [3]) will be applied. Default: ``None``, means no shearing.
3802        resample (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
3803            Default: ``Inter.NEAREST``.
3804        fill_value (Union[int, tuple[int]], optional): Optional fill_value to fill the area outside the transform
3805            in the output image. There must be three elements in tuple and the value of single element is [0, 255].
3806            Default: ``0``, filling is performed.
3807
3808    Raises:
3809        TypeError: If `degrees` is not of type int, float or sequence.
3810        TypeError: If `translate` is not of type sequence.
3811        TypeError: If `scale` is not of type sequence.
3812        TypeError: If `shear` is not of type int, float or sequence.
3813        TypeError: If `resample` is not of type :class:`~.vision.Inter` .
3814        TypeError: If `fill_value` is not of type int or tuple[int].
3815        ValueError: If `degrees` is negative.
3816        ValueError: If `translate` is not in range [-1.0, 1.0].
3817        ValueError: If `scale` is negative.
3818        ValueError: If `shear` is not positive.
3819        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
3820
3821    Supported Platforms:
3822        ``CPU``
3823
3824    Examples:
3825        >>> import numpy as np
3826        >>> import mindspore.dataset as ds
3827        >>> import mindspore.dataset.vision as vision
3828        >>> from mindspore.dataset.vision import Inter
3829        >>>
3830        >>> # Use the transform in dataset pipeline mode
3831        >>> random_affine_op = vision.RandomAffine(degrees=15,
3832        ...                                        translate=(-0.1, 0.1, 0, 0),
3833        ...                                        scale=(0.9, 1.1),
3834        ...                                        resample=Inter.NEAREST)
3835        >>> transforms_list = [random_affine_op]
3836        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3837        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3838        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3839        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3840        ...     print(item["image"].shape, item["image"].dtype)
3841        ...     break
3842        (100, 100, 3) uint8
3843        >>>
3844        >>> # Use the transform in eager mode
3845        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3846        >>> output = vision.RandomAffine(degrees=15, translate=(-0.1, 0.1, 0, 0),
3847        ...                              scale=(0.9, 1.1), resample=Inter.NEAREST)(data)
3848        >>> print(output.shape, output.dtype)
3849        (100, 100, 3) uint8
3850
3851    Tutorial Examples:
3852        - `Illustration of vision transforms
3853          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3854    """
3855
3856    @check_random_affine
3857    def __init__(self, degrees, translate=None, scale=None, shear=None, resample=Inter.NEAREST, fill_value=0):
3858        super().__init__()
3859        # Parameter checking
3860        if shear is not None:
3861            if isinstance(shear, numbers.Number):
3862                shear = (-1 * shear, shear, 0., 0.)
3863            else:
3864                if len(shear) == 2:
3865                    shear = [shear[0], shear[1], 0., 0.]
3866                elif len(shear) == 4:
3867                    shear = [s for s in shear]
3868
3869        if isinstance(degrees, numbers.Number):
3870            degrees = (-1 * degrees, degrees)
3871
3872        if isinstance(fill_value, numbers.Number):
3873            fill_value = (fill_value, fill_value, fill_value)
3874
3875        # translation
3876        if translate is None:
3877            translate = (0.0, 0.0, 0.0, 0.0)
3878
3879        # scale
3880        if scale is None:
3881            scale = (1.0, 1.0)
3882
3883        # shear
3884        if shear is None:
3885            shear = (0.0, 0.0, 0.0, 0.0)
3886
3887        self.degrees = degrees
3888        self.translate = translate
3889        self.scale = scale
3890        self.shear = shear
3891        self.resample = resample
3892        if resample in [Inter.AREA, Inter.PILCUBIC]:
3893            self.implementation = Implementation.C
3894        elif resample == Inter.ANTIALIAS:
3895            self.implementation = Implementation.PY
3896        self.fill_value = fill_value
3897
3898    def parse(self):
3899        if self.resample == Inter.ANTIALIAS:
3900            raise TypeError("Current Interpolation is not supported with NumPy input.")
3901        return cde.RandomAffineOperation(self.degrees, self.translate, self.scale, self.shear,
3902                                         Inter.to_c_type(self.resample), self.fill_value)
3903
3904    def _execute_py(self, img):
3905        """
3906        Execute method.
3907
3908        Args:
3909            img (PIL Image): Image to be randomly affine transformed.
3910
3911        Returns:
3912            PIL Image, randomly affine transformed image.
3913        """
3914        if self.resample in [Inter.AREA, Inter.PILCUBIC]:
3915            raise TypeError("Current Interpolation is not supported with PIL input.")
3916        return util.random_affine(img,
3917                                  self.degrees,
3918                                  self.translate,
3919                                  self.scale,
3920                                  self.shear,
3921                                  Inter.to_python_type(self.resample),
3922                                  self.fill_value)
3923
3924
3925class RandomAutoContrast(ImageTensorOperation):
3926    """
3927    Automatically adjust the contrast of the image with a given probability.
3928
3929    Args:
3930        cutoff (float, optional): Percent of the lightest and darkest pixels to be cut off from
3931            the histogram of the input image. The value must be in range of [0.0, 50.0]. Default: ``0.0``.
3932        ignore (Union[int, sequence], optional): The background pixel values to be ignored, each of
3933            which must be in range of [0, 255]. Default: ``None``.
3934        prob (float, optional): Probability of the image being automatically contrasted, which
3935            must be in range of [0.0, 1.0]. Default: ``0.5``.
3936
3937    Raises:
3938        TypeError: If `cutoff` is not of type float.
3939        TypeError: If `ignore` is not of type integer or sequence of integer.
3940        TypeError: If `prob` is not of type float.
3941        ValueError: If `cutoff` is not in range [0.0, 50.0).
3942        ValueError: If `ignore` is not in range [0, 255].
3943        ValueError: If `prob` is not in range [0.0, 1.0].
3944        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
3945
3946    Supported Platforms:
3947        ``CPU``
3948
3949    Examples:
3950        >>> import numpy as np
3951        >>> import mindspore.dataset as ds
3952        >>> import mindspore.dataset.vision as vision
3953        >>>
3954        >>> # Use the transform in dataset pipeline mode
3955        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
3956        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
3957        >>> transforms_list = [vision.RandomAutoContrast(cutoff=0.0, ignore=None, prob=0.5)]
3958        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
3959        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3960        ...     print(item["image"].shape, item["image"].dtype)
3961        ...     break
3962        (100, 100, 3) uint8
3963        >>>
3964        >>> # Use the transform in eager mode
3965        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
3966        >>> output = vision.RandomAutoContrast(cutoff=0.0, ignore=None, prob=1.0)(data)
3967        >>> print(output.shape, output.dtype)
3968        (100, 100, 3) uint8
3969
3970    Tutorial Examples:
3971        - `Illustration of vision transforms
3972          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
3973    """
3974
3975    @check_random_auto_contrast
3976    def __init__(self, cutoff=0.0, ignore=None, prob=0.5):
3977        super().__init__()
3978        if ignore is None:
3979            ignore = []
3980        if isinstance(ignore, int):
3981            ignore = [ignore]
3982        self.cutoff = cutoff
3983        self.ignore = ignore
3984        self.prob = prob
3985        self.implementation = Implementation.C
3986
3987    def parse(self):
3988        return cde.RandomAutoContrastOperation(self.cutoff, self.ignore, self.prob)
3989
3990
3991class RandomColor(ImageTensorOperation, PyTensorOperation):
3992    """
3993    Adjust the color of the input image by a fixed or random degree.
3994    This operation works only with 3-channel color images.
3995
3996    Args:
3997         degrees (Sequence[float], optional): Range of random color adjustment degrees, which must be non-negative.
3998            It should be in (min, max) format. If min=max, then it is a
3999            single fixed magnitude operation. Default: ``(0.1, 1.9)``.
4000
4001    Raises:
4002        TypeError: If `degrees` is not of type Sequence[float].
4003        ValueError: If `degrees` is negative.
4004        RuntimeError: If given tensor shape is not <H, W, C>.
4005
4006    Supported Platforms:
4007        ``CPU``
4008
4009    Examples:
4010        >>> import numpy as np
4011        >>> import mindspore.dataset as ds
4012        >>> import mindspore.dataset.vision as vision
4013        >>>
4014        >>> # Use the transform in dataset pipeline mode
4015        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
4016        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4017        >>> transforms_list = [vision.RandomColor((0.5, 2.0))]
4018        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
4019        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4020        ...     print(item["image"].shape, item["image"].dtype)
4021        ...     break
4022        (100, 100, 3) uint8
4023        >>>
4024        >>> # Use the transform in eager mode
4025        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
4026        >>> output = vision.RandomColor((0.1, 1.9))(data)
4027        >>> print(output.shape, output.dtype)
4028        (100, 100, 3) uint8
4029
4030    Tutorial Examples:
4031        - `Illustration of vision transforms
4032          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4033    """
4034
4035    @check_positive_degrees
4036    def __init__(self, degrees=(0.1, 1.9)):
4037        super().__init__()
4038        self.degrees = degrees
4039
4040    def parse(self):
4041        return cde.RandomColorOperation(*self.degrees)
4042
4043    def _execute_py(self, img):
4044        """
4045        Execute method.
4046
4047        Args:
4048            img (PIL Image): Image to be color adjusted.
4049
4050        Returns:
4051            PIL Image, color adjusted image.
4052        """
4053
4054        return util.random_color(img, self.degrees)
4055
4056
4057class RandomColorAdjust(ImageTensorOperation, PyTensorOperation):
4058    """
4059    Randomly adjust the brightness, contrast, saturation, and hue of the input image.
4060
4061    Note:
4062        This operation is executed on the CPU by default, but it is also supported
4063        to be executed on the GPU or Ascend via heterogeneous acceleration.
4064
4065    Args:
4066        brightness (Union[float, Sequence[float]], optional): Brightness adjustment factor. Default: ``(1, 1)``.
4067            Cannot be negative.
4068            If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness].
4069            If it is a sequence, it should be [min, max] for the range.
4070        contrast (Union[float, Sequence[float]], optional): Contrast adjustment factor. Default: ``(1, 1)``.
4071            Cannot be negative.
4072            If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast].
4073            If it is a sequence, it should be [min, max] for the range.
4074        saturation (Union[float, Sequence[float]], optional): Saturation adjustment factor. Default: ``(1, 1)``.
4075            Cannot be negative.
4076            If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation].
4077            If it is a sequence, it should be [min, max] for the range.
4078        hue (Union[float, Sequence[float]], optional): Hue adjustment factor. Default: ``(0, 0)``.
4079            If it is a float, the range will be [-hue, hue]. Value should be 0 <= hue <= 0.5.
4080            If it is a sequence, it should be [min, max] where -0.5 <= min <= max <= 0.5.
4081
4082    Raises:
4083        TypeError: If `brightness` is not of type float or Sequence[float].
4084        TypeError: If `contrast` is not of type float or Sequence[float].
4085        TypeError: If `saturation` is not of type float or Sequence[float].
4086        TypeError: If `hue` is not of type float or Sequence[float].
4087        ValueError: If `brightness` is negative.
4088        ValueError: If `contrast` is negative.
4089        ValueError: If `saturation` is negative.
4090        ValueError: If `hue` is not in range [-0.5, 0.5].
4091        RuntimeError: If given tensor shape is not <H, W, C>.
4092
4093    Supported Platforms:
4094        ``CPU`` ``GPU`` ``Ascend``
4095
4096    Examples:
4097        >>> import numpy as np
4098        >>> import mindspore.dataset as ds
4099        >>> import mindspore.dataset.vision as vision
4100        >>>
4101        >>> # Use the transform in dataset pipeline mode
4102        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
4103        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4104        >>> transform_op = vision.RandomColorAdjust(brightness=(0.5, 1),
4105        ...                                         contrast=(0.4, 1),
4106        ...                                         saturation=(0.3, 1))
4107        >>> transforms_list = [transform_op]
4108        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
4109        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4110        ...     print(item["image"].shape, item["image"].dtype)
4111        ...     break
4112        (100, 100, 3) uint8
4113        >>>
4114        >>> # Use the transform in eager mode
4115        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
4116        >>> output = vision.RandomColorAdjust(brightness=(0.5, 1), contrast=(0.4, 1), saturation=(0.3, 1))(data)
4117        >>> print(output.shape, output.dtype)
4118        (100, 100, 3) uint8
4119
4120    Tutorial Examples:
4121        - `Illustration of vision transforms
4122          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4123    """
4124
4125    @check_random_color_adjust
4126    def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)):
4127        super().__init__()
4128        brightness = self.__expand_values(brightness)
4129        contrast = self.__expand_values(contrast)
4130        saturation = self.__expand_values(saturation)
4131        hue = self.__expand_values(
4132            hue, center=0, bound=(-0.5, 0.5), non_negative=False)
4133
4134        self.brightness = brightness
4135        self.contrast = contrast
4136        self.saturation = saturation
4137        self.hue = hue
4138
4139    def parse(self):
4140        return cde.RandomColorAdjustOperation(self.brightness, self.contrast, self.saturation, self.hue)
4141
4142    def _execute_py(self, img):
4143        """
4144        Execute method.
4145
4146        Args:
4147            img (PIL image): Image to be randomly color adjusted.
4148
4149        Returns:
4150            PIL Image, randomly color adjusted image.
4151        """
4152        return util.random_color_adjust(img, self.brightness, self.contrast, self.saturation, self.hue)
4153
4154    def __expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True):
4155        """Expand input value for vision adjustment factor."""
4156        if isinstance(value, numbers.Number):
4157            value = [center - value, center + value]
4158            if non_negative:
4159                value[0] = max(0, value[0])
4160            check_range(value, bound)
4161        return (value[0], value[1])
4162
4163
4164class RandomCrop(ImageTensorOperation, PyTensorOperation):
4165    """
4166    Crop the input image at a random location. If input image size is smaller than output size,
4167    input image will be padded before cropping.
4168
4169    Note:
4170        If the input image is more than one, then make sure that the image size is the same.
4171
4172
4173    Args:
4174        size (Union[int, Sequence[int]]): The output size of the cropped image. The size value(s) must be positive.
4175            If size is an integer, a square crop of size (size, size) is returned.
4176            If size is a sequence of length 2, an image of size (height, width) will be cropped.
4177        padding (Union[int, Sequence[int]], optional): The number of pixels to pad each border of the image.
4178            The padding value(s) must be non-negative. Default: ``None``.
4179            If `padding` is not ``None``, pad image first with padding values.
4180            If a single number is provided, pad all borders with this value.
4181            If a tuple or lists of 2 values are provided, pad the (left and right)
4182            with the first value and (top and bottom) with the second value.
4183            If 4 values are provided as a list or tuple,
4184            pad the left, top, right and bottom respectively.
4185        pad_if_needed (bool, optional): Pad the image if either side is smaller than
4186            the given output size. Default: ``False``.
4187        fill_value (Union[int, tuple[int]], optional): The pixel intensity of the borders, only valid for
4188            padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
4189            If it is an integer, it is used for all RGB channels.
4190            The fill_value values must be in range [0, 255]. Default: ``0``.
4191        padding_mode (Border, optional): The method of padding. Default: ``Border.CONSTANT``. It can be any of
4192            ``Border.CONSTANT``, ``Border.EDGE``, ``Border.REFLECT``, ``Border.SYMMETRIC``.
4193
4194            - ``Border.CONSTANT`` , means it fills the border with constant values.
4195
4196            - ``Border.EDGE`` , means it pads with the last value on the edge.
4197
4198            - ``Border.REFLECT`` , means it reflects the values on the edge omitting the last
4199              value of edge.
4200
4201            - ``Border.SYMMETRIC`` , means it reflects the values on the edge repeating the last
4202              value of edge.
4203
4204    Raises:
4205        TypeError: If `size` is not of type int or Sequence[int].
4206        TypeError: If `padding` is not of type int or Sequence[int].
4207        TypeError: If `pad_if_needed` is not of type boolean.
4208        TypeError: If `fill_value` is not of type int or tuple[int].
4209        TypeError: If `padding_mode` is not of type :class:`mindspore.dataset.vision.Border` .
4210        ValueError: If `size` is not positive.
4211        ValueError: If `padding` is negative.
4212        ValueError: If `fill_value` is not in range [0, 255].
4213        RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>.
4214
4215    Supported Platforms:
4216        ``CPU``
4217
4218    Examples:
4219        >>> import numpy as np
4220        >>> import mindspore.dataset as ds
4221        >>> import mindspore.dataset.vision as vision
4222        >>> from mindspore.dataset.vision import Border
4223        >>>
4224        >>> # Use the transform in dataset pipeline mode
4225        >>> random_crop_op = vision.RandomCrop(64, [16, 16, 16, 16], padding_mode=Border.EDGE)
4226        >>> transforms_list = [random_crop_op]
4227        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
4228        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4229        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
4230        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4231        ...     print(item["image"].shape, item["image"].dtype)
4232        ...     break
4233        (64, 64, 3) uint8
4234        >>>
4235        >>> # Use the transform in eager mode
4236        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
4237        >>> output = vision.RandomCrop(8, [10, 10, 10, 10], padding_mode=Border.EDGE)(data)
4238        >>> print(output.shape, output.dtype)
4239        (8, 8, 3) uint8
4240
4241    Tutorial Examples:
4242        - `Illustration of vision transforms
4243          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4244    """
4245
4246    @check_random_crop
4247    def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
4248        super().__init__()
4249        if isinstance(size, int):
4250            size = (size, size)
4251        if padding is None:
4252            padding = (0, 0, 0, 0)
4253        else:
4254            padding = parse_padding(padding)
4255        if isinstance(fill_value, int):
4256            fill_value = tuple([fill_value] * 3)
4257
4258        self.size = size
4259        self.padding = padding
4260        self.pad_if_needed = pad_if_needed
4261        self.fill_value = fill_value
4262        self.padding_mode = padding_mode
4263
4264    def parse(self):
4265        return cde.RandomCropOperation(self.size, self.padding, self.pad_if_needed, self.fill_value,
4266                                       Border.to_c_type(self.padding_mode))
4267
4268    def _execute_py(self, img):
4269        """
4270        Execute method.
4271
4272        Args:
4273            img (PIL Image): Image to be randomly cropped.
4274
4275        Returns:
4276            PIL Image, cropped image.
4277        """
4278        return util.random_crop(img, self.size, self.padding, self.pad_if_needed,
4279                                self.fill_value, Border.to_python_type(self.padding_mode))
4280
4281
4282class RandomCropDecodeResize(ImageTensorOperation):
4283    """
4284    A combination of `Crop` , `Decode` and `Resize` . It will get better performance for JPEG images. This operation
4285    will crop the input image at a random location, decode the cropped image in RGB mode, and resize the decoded image.
4286
4287    Args:
4288        size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive.
4289            If size is an integer, a square crop of size (size, size) is returned.
4290            If size is a sequence of length 2, it should be (height, width).
4291        scale (Union[list, tuple], optional): Range [min, max) of respective size of the
4292            original size to be cropped, which must be non-negative. Default: ``(0.08, 1.0)``.
4293        ratio (Union[list, tuple], optional): Range [min, max) of aspect ratio to be
4294            cropped, which must be non-negative. Default: ``(3. / 4., 4. / 3.)``.
4295        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
4296            Default: ``Inter.BILINEAR``.
4297        max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area. Default: ``10``.
4298            If exceeded, fall back to use center_crop instead. The `max_attempts` value must be positive.
4299
4300    Raises:
4301        TypeError: If `size` is not of type int or Sequence[int].
4302        TypeError: If `scale` is not of type tuple.
4303        TypeError: If `ratio` is not of type tuple.
4304        TypeError: If `interpolation` is not of type :class:`~.vision.Inter` .
4305        TypeError: If `max_attempts` is not of type integer.
4306        ValueError: If `size` is not positive.
4307        ValueError: If `scale` is negative.
4308        ValueError: If `ratio` is negative.
4309        ValueError: If `max_attempts` is not positive.
4310        RuntimeError: If given tensor is not a 1D sequence.
4311
4312    Supported Platforms:
4313        ``CPU``
4314
4315    Examples:
4316        >>> import os
4317        >>> import numpy as np
4318        >>> from PIL import Image, ImageDraw
4319        >>> import mindspore.dataset as ds
4320        >>> import mindspore.dataset.vision as vision
4321        >>> from mindspore.dataset.vision import Inter
4322        >>>
4323        >>> # Use the transform in dataset pipeline mode
4324        >>> class MyDataset:
4325        ...     def __init__(self):
4326        ...         self.data = []
4327        ...         img = Image.new("RGB", (300, 300), (255, 255, 255))
4328        ...         draw = ImageDraw.Draw(img)
4329        ...         draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5)
4330        ...         img.save("./1.jpg")
4331        ...         data = np.fromfile("./1.jpg", np.uint8)
4332        ...         self.data.append(data)
4333        ...
4334        ...     def __getitem__(self, index):
4335        ...         return self.data[0]
4336        ...
4337        ...     def __len__(self):
4338        ...         return 5
4339        >>>
4340        >>> my_dataset = MyDataset()
4341        >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image")
4342        >>> resize_crop_decode_op = vision.RandomCropDecodeResize(size=(50, 75),
4343        ...                                                       scale=(0.25, 0.5),
4344        ...                                                       interpolation=Inter.NEAREST,
4345        ...                                                       max_attempts=5)
4346        >>> transforms_list = [resize_crop_decode_op]
4347        >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns=["image"])
4348        >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4349        ...     print(item["image"].shape, item["image"].dtype)
4350        ...     break
4351        (50, 75, 3) uint8
4352        >>> os.remove("./1.jpg")
4353        >>>
4354        >>> # Use the transform in eager mode
4355        >>> img = Image.new("RGB", (300, 300), (255, 255, 255))
4356        >>> draw = ImageDraw.Draw(img)
4357        >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0))
4358        >>> img.save("./2.jpg")
4359        >>> data = np.fromfile("./2.jpg", np.uint8)
4360        >>> output = vision.RandomCropDecodeResize(size=(50, 75), scale=(0, 10.0), ratio=(0.5, 0.5),
4361        ...                                        interpolation=Inter.BILINEAR, max_attempts=1)(data)
4362        >>> print(np.array(output).shape, np.array(output).dtype)
4363        (50, 75, 3) uint8
4364        >>> os.remove("./2.jpg")
4365
4366    Tutorial Examples:
4367        - `Illustration of vision transforms
4368          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4369    """
4370
4371    @check_random_resize_crop
4372    def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
4373                 interpolation=Inter.BILINEAR, max_attempts=10):
4374        super().__init__()
4375        if isinstance(size, int):
4376            size = (size, size)
4377        self.size = size
4378        self.scale = scale
4379        self.ratio = ratio
4380        self.interpolation = interpolation
4381        self.max_attempts = max_attempts
4382        self.implementation = Implementation.C
4383
4384    def __call__(self, img):
4385        if not isinstance(img, np.ndarray):
4386            raise TypeError(
4387                "Input should be an encoded image in 1-D NumPy format, got {}.".format(type(img)))
4388        if img.ndim != 1 or img.dtype.type is not np.uint8:
4389            raise TypeError("Input should be an encoded image with uint8 type in 1-D NumPy format, " +
4390                            "got format:{}, dtype:{}.".format(type(img), img.dtype.type))
4391        return super().__call__(img)
4392
4393    def parse(self):
4394        return cde.RandomCropDecodeResizeOperation(self.size, self.scale, self.ratio,
4395                                                   Inter.to_c_type(self.interpolation),
4396                                                   self.max_attempts)
4397
4398
4399class RandomCropWithBBox(ImageTensorOperation):
4400    """
4401    Crop the input image at a random location and adjust bounding boxes accordingly.
4402
4403    Args:
4404        size (Union[int, Sequence[int]]): The output size of the cropped image. The size value(s) must be positive.
4405            If size is an integer, a square crop of size (size, size) is returned.
4406            If size is a sequence of length 2, an image of size (height, width) will be cropped.
4407        padding (Union[int, Sequence[int]], optional): The number of pixels to pad the image
4408            The padding value(s) must be non-negative. Default: ``None``.
4409            If `padding` is not ``None``, first pad image with padding values.
4410            If a single number is provided, pad all borders with this value.
4411            If a tuple or lists of 2 values are provided, pad the (left and right)
4412            with the first value and (top and bottom) with the second value.
4413            If 4 values are provided as a list or tuple, pad the left, top, right and bottom respectively.
4414        pad_if_needed (bool, optional): Pad the image if either side is smaller than
4415            the given output size. Default: ``False``.
4416        fill_value (Union[int, tuple[int]], optional): The pixel intensity of the borders, only valid for
4417            padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
4418            If it is an integer, it is used for all RGB channels.
4419            The fill_value values must be in range [0, 255]. Default: ``0``.
4420        padding_mode (Border, optional): The method of padding. Default: ``Border.CONSTANT``. It can be any of
4421            ``Border.CONSTANT``, ``Border.EDGE``, ``Border.REFLECT``, ``Border.SYMMETRIC``.
4422
4423            - ``Border.CONSTANT`` , means it fills the border with constant values.
4424
4425            - ``Border.EDGE`` , means it pads with the last value on the edge.
4426
4427            - ``Border.REFLECT`` , means it reflects the values on the edge omitting the last
4428              value of edge.
4429
4430            - ``Border.SYMMETRIC`` , means it reflects the values on the edge repeating the last
4431
4432              value of edge.
4433
4434    Raises:
4435        TypeError: If `size` is not of type int or Sequence[int].
4436        TypeError: If `padding` is not of type int or Sequence[int].
4437        TypeError: If `pad_if_needed` is not of type boolean.
4438        TypeError: If `fill_value` is not of type int or tuple[int].
4439        TypeError: If `padding_mode` is not of type :class:`mindspore.dataset.vision.Border` .
4440        ValueError: If `size` is not positive.
4441        ValueError: If `padding` is negative.
4442        ValueError: If `fill_value` is not in range [0, 255].
4443        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
4444
4445    Supported Platforms:
4446        ``CPU``
4447
4448    Examples:
4449        >>> import numpy as np
4450        >>> import mindspore.dataset as ds
4451        >>> import mindspore.dataset.vision as vision
4452        >>>
4453        >>> # Use the transform in dataset pipeline mode
4454        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
4455        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4456        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32))
4457        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func],
4458        ...                                                 input_columns=["image"],
4459        ...                                                 output_columns=["image", "bbox"])
4460        >>> random_crop_with_bbox_op = vision.RandomCropWithBBox([64, 64], [20, 20, 20, 20])
4461        >>> transforms_list = [random_crop_with_bbox_op]
4462        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image", "bbox"])
4463        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4464        ...     print(item["image"].shape, item["image"].dtype)
4465        ...     print(item["bbox"].shape, item["bbox"].dtype)
4466        ...     break
4467        (64, 64, 3) float32
4468        (1, 4) float32
4469        >>>
4470        >>> # Use the transform in eager mode
4471        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
4472        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype))
4473        >>> func_data, func_bboxes = func(data)
4474        >>> output = vision.RandomCropWithBBox([64, 64], [20, 20, 20, 20])(func_data, func_bboxes)
4475        >>> print(output[0].shape, output[0].dtype)
4476        (64, 64, 3) float32
4477        >>> print(output[1].shape, output[1].dtype)
4478        (1, 4) float32
4479
4480    Tutorial Examples:
4481        - `Illustration of vision transforms
4482          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4483    """
4484
4485    @check_random_crop
4486    def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
4487        super().__init__()
4488        if isinstance(size, int):
4489            size = (size, size)
4490        if padding is None:
4491            padding = (0, 0, 0, 0)
4492        else:
4493            padding = parse_padding(padding)
4494
4495        if isinstance(fill_value, int):
4496            fill_value = tuple([fill_value] * 3)
4497
4498        self.size = size
4499        self.padding = padding
4500        self.pad_if_needed = pad_if_needed
4501        self.fill_value = fill_value
4502        self.padding_mode = padding_mode
4503        self.implementation = Implementation.C
4504
4505    def parse(self):
4506        border_type = Border.to_c_type(self.padding_mode)
4507        return cde.RandomCropWithBBoxOperation(self.size, self.padding, self.pad_if_needed, self.fill_value,
4508                                               border_type)
4509
4510
4511class RandomEqualize(ImageTensorOperation):
4512    """
4513    Apply histogram equalization on the input image with a given probability.
4514
4515    Args:
4516        prob (float, optional): Probability of the image being equalized, which
4517            must be in range of [0.0, 1.0]. Default: ``0.5``.
4518
4519    Raises:
4520        TypeError: If `prob` is not of type float.
4521        ValueError: If `prob` is not in range [0.0, 1.0].
4522        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
4523
4524    Supported Platforms:
4525        ``CPU``
4526
4527    Examples:
4528        >>> import numpy as np
4529        >>> import mindspore.dataset as ds
4530        >>> import mindspore.dataset.vision as vision
4531        >>>
4532        >>> # Use the transform in dataset pipeline mode
4533        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
4534        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4535        >>> transforms_list = [vision.RandomEqualize(0.5)]
4536        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
4537        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4538        ...     print(item["image"].shape, item["image"].dtype)
4539        ...     break
4540        (100, 100, 3) uint8
4541        >>>
4542        >>> # Use the transform in eager mode
4543        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
4544        >>> output = vision.RandomEqualize(1.0)(data)
4545        >>> print(output.shape, output.dtype)
4546        (100, 100, 3) uint8
4547
4548    Tutorial Examples:
4549        - `Illustration of vision transforms
4550          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4551    """
4552
4553    @check_prob
4554    def __init__(self, prob=0.5):
4555        super().__init__()
4556        self.prob = prob
4557        self.implementation = Implementation.C
4558
4559    def parse(self):
4560        return cde.RandomEqualizeOperation(self.prob)
4561
4562
4563class RandomErasing(PyTensorOperation):
4564    """
4565    Randomly erase pixels within a random selected rectangle erea on the input numpy.ndarray image.
4566
4567    See `Random Erasing Data Augmentation <https://arxiv.org/pdf/1708.04896.pdf>`_ .
4568
4569    Args:
4570        prob (float, optional): Probability of performing erasing, which
4571            must be in range of [0.0, 1.0]. Default: ``0.5``.
4572        scale (Sequence[float, float], optional): Range of area scale of the erased area relative
4573            to the original image to select from, arranged in order of (min, max).
4574            Default: ``(0.02, 0.33)``.
4575        ratio (Sequence[float, float], optional): Range of aspect ratio of the erased area to select
4576            from, arraged in order of (min, max). Default: ``(0.3, 3.3)``.
4577        value (Union[int, str, Sequence[int, int, int]]): Pixel value used to pad the erased area.
4578            If a single integer is provided, it will be used for all RGB channels.
4579            If a sequence of length 3 is provided, it will be used for R, G, B channels respectively.
4580            If a string of ``'random'`` is provided, each pixel will be erased with a random value obtained
4581            from a standard normal distribution. Default: ``0``.
4582        inplace (bool, optional): Whether to apply erasing inplace. Default: ``False``.
4583        max_attempts (int, optional): The maximum number of attempts to propose a valid
4584            erased area, beyond which the original image will be returned. Default: ``10``.
4585
4586    Raises:
4587        TypeError: If `prob` is not of type float.
4588        TypeError: If `scale` is not of type sequence.
4589        TypeError: If `ratio` is not of type sequence.
4590        TypeError: If `value` is not of type integer, string, or sequence.
4591        TypeError: If `inplace` is not of type boolean.
4592        TypeError: If `max_attempts` is not of type integer.
4593        ValueError: If `prob` is not in range of [0.0, 1.0].
4594        ValueError: If `scale` is negative.
4595        ValueError: If `ratio` is negative.
4596        ValueError: If `value` is not in range of [0, 255].
4597        ValueError: If `max_attempts` is not positive.
4598
4599    Supported Platforms:
4600        ``CPU``
4601
4602    Examples:
4603        >>> import numpy as np
4604        >>> import mindspore.dataset as ds
4605        >>> import mindspore.dataset.vision as vision
4606        >>> from mindspore.dataset.transforms import Compose
4607        >>>
4608        >>> # Use the transform in dataset pipeline mode
4609        >>> transforms_list = Compose([vision.ToTensor(),
4610        ...                            vision.RandomErasing(value='random')])
4611        >>> # apply the transform to dataset through map function
4612        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
4613        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4614        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
4615        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4616        ...     print(item["image"].shape, item["image"].dtype)
4617        ...     break
4618        (3, 100, 100) float32
4619        >>>
4620        >>> # Use the transform in eager mode
4621        >>> data = np.random.randint(254, 255, size=(3, 100, 100)).astype(np.uint8)
4622        >>> output = vision.RandomErasing(prob=1.0, max_attempts=1)(data)
4623        >>> print(output.shape, output.dtype)
4624        (3, 100, 100) uint8
4625
4626    Tutorial Examples:
4627        - `Illustration of vision transforms
4628          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4629    """
4630
4631    @check_random_erasing
4632    def __init__(self, prob=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False, max_attempts=10):
4633        super().__init__()
4634        self.prob = prob
4635        self.scale = scale
4636        self.ratio = ratio
4637        self.value = value
4638        self.inplace = inplace
4639        self.max_attempts = max_attempts
4640        self.implementation = Implementation.PY
4641
4642    def _execute_py(self, np_img):
4643        """
4644        Execute method.
4645
4646        Args:
4647            np_img (numpy.ndarray): image in shape of <C, H, W> to be randomly erased.
4648
4649        Returns:
4650            numpy.ndarray, erased image.
4651        """
4652        bounded = True
4653        if self.prob > random.random():
4654            i, j, erase_h, erase_w, erase_value = util.get_erase_params(np_img, self.scale, self.ratio,
4655                                                                        self.value, bounded, self.max_attempts)
4656            return util.erase(np_img, i, j, erase_h, erase_w, erase_value, self.inplace)
4657        return np_img
4658
4659
4660class RandomGrayscale(PyTensorOperation):
4661    """
4662    Randomly convert the input PIL Image to grayscale.
4663
4664    Args:
4665        prob (float, optional): Probability of performing grayscale conversion,
4666            which must be in range of [0.0, 1.0]. Default: ``0.1``.
4667
4668    Raises:
4669        TypeError: If `prob` is not of type float.
4670        ValueError: If `prob` is not in range of [0.0, 1.0].
4671
4672    Supported Platforms:
4673        ``CPU``
4674
4675    Examples:
4676        >>> import os
4677        >>> import numpy as np
4678        >>> from PIL import Image, ImageDraw
4679        >>> import mindspore.dataset as ds
4680        >>> import mindspore.dataset.vision as vision
4681        >>> from mindspore.dataset.transforms import Compose
4682        >>>
4683        >>> # Use the transform in dataset pipeline mode
4684        >>> class MyDataset:
4685        ...     def __init__(self):
4686        ...         self.data = []
4687        ...         img = Image.new("RGB", (300, 300), (255, 255, 255))
4688        ...         draw = ImageDraw.Draw(img)
4689        ...         draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5)
4690        ...         img.save("./1.jpg")
4691        ...         data = np.fromfile("./1.jpg", np.uint8)
4692        ...         self.data.append(data)
4693        ...
4694        ...     def __getitem__(self, index):
4695        ...         return self.data[0]
4696        ...
4697        ...     def __len__(self):
4698        ...         return 5
4699        >>>
4700        >>> my_dataset = MyDataset()
4701        >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image")
4702        >>> transforms_list = Compose([vision.Decode(to_pil=True),
4703        ...                            vision.RandomGrayscale(0.3),
4704        ...                            vision.ToTensor()])
4705        >>> # apply the transform to dataset through map function
4706        >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image")
4707        >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4708        ...     print(item["image"].shape, item["image"].dtype)
4709        ...     break
4710        (3, 300, 300) float32
4711        >>> os.remove("./1.jpg")
4712        >>>
4713        >>> # Use the transform in eager mode
4714        >>> img = Image.new("RGB", (300, 300), (255, 255, 255))
4715        >>> draw = ImageDraw.Draw(img)
4716        >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0))
4717        >>> img.save("./2.jpg")
4718        >>> data = Image.open("./2.jpg")
4719        >>> output = vision.RandomGrayscale(1.0)(data)
4720        >>> print(np.array(output).shape, np.array(output).dtype)
4721        (300, 300, 3) uint8
4722        >>> os.remove("./2.jpg")
4723
4724    Tutorial Examples:
4725        - `Illustration of vision transforms
4726          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4727    """
4728
4729    @check_prob
4730    def __init__(self, prob=0.1):
4731        super().__init__()
4732        self.prob = prob
4733        self.implementation = Implementation.PY
4734
4735    def _execute_py(self, img):
4736        """
4737        Execute method.
4738
4739        Args:
4740            img (PIL Image): Image to be randomly converted to grayscale.
4741
4742        Returns:
4743            PIL Image, randomly converted grayscale image, which has the same number of channels as the input image.
4744                If input image has 1 channel, the output grayscale image will have 1 channel.
4745                If input image has 3 channels, the output grayscale image will have 3 identical channels.
4746        """
4747        if img.mode == 'L':
4748            num_output_channels = 1
4749        else:
4750            num_output_channels = 3
4751
4752        if self.prob > random.random():
4753            return util.grayscale(img, num_output_channels=num_output_channels)
4754        return img
4755
4756
4757class RandomHorizontalFlip(ImageTensorOperation, PyTensorOperation):
4758    """
4759    Randomly flip the input image horizontally with a given probability.
4760
4761    Args:
4762        prob (float, optional): Probability of the image being flipped,
4763            which must be in range of [0.0, 1.0]. Default: ``0.5``.
4764
4765    Raises:
4766        TypeError: If `prob` is not of type float.
4767        ValueError: If `prob` is not in range [0.0, 1.0].
4768        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
4769
4770    Supported Platforms:
4771        ``CPU``
4772
4773    Examples:
4774        >>> import numpy as np
4775        >>> import mindspore.dataset as ds
4776        >>> import mindspore.dataset.vision as vision
4777        >>>
4778        >>> # Use the transform in dataset pipeline mode
4779        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
4780        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4781        >>> transforms_list = [vision.RandomHorizontalFlip(0.75)]
4782        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
4783        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4784        ...     print(item["image"].shape, item["image"].dtype)
4785        ...     break
4786        (100, 100, 3) uint8
4787        >>>
4788        >>> # Use the transform in eager mode
4789        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
4790        >>> output = vision.RandomHorizontalFlip(1.0)(data)
4791        >>> print(output.shape, output.dtype)
4792        (100, 100, 3) uint8
4793
4794    Tutorial Examples:
4795        - `Illustration of vision transforms
4796          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4797    """
4798
4799    @check_prob
4800    def __init__(self, prob=0.5):
4801        super().__init__()
4802        self.prob = prob
4803
4804    def parse(self):
4805        return cde.RandomHorizontalFlipOperation(self.prob)
4806
4807    def _execute_py(self, img):
4808        """
4809        Execute method.
4810
4811        Args:
4812            img (PIL Image): Image to be horizontally flipped.
4813
4814        Returns:
4815            PIL Image, randomly horizontally flipped image.
4816        """
4817        return util.random_horizontal_flip(img, self.prob)
4818
4819
4820class RandomHorizontalFlipWithBBox(ImageTensorOperation):
4821    """
4822    Randomly flip the input image and its bounding box horizontally with a given probability.
4823
4824    Args:
4825        prob (float, optional): Probability of the image being flipped,
4826            which must be in range of [0.0, 1.0]. Default: ``0.5``.
4827
4828    Raises:
4829        TypeError: If `prob` is not of type float.
4830        ValueError: If `prob` is not in range [0.0, 1.0].
4831        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
4832
4833    Supported Platforms:
4834        ``CPU``
4835
4836    Examples:
4837        >>> import numpy as np
4838        >>> import mindspore.dataset as ds
4839        >>> import mindspore.dataset.vision as vision
4840        >>>
4841        >>> # Use the transform in dataset pipeline mode
4842        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
4843        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4844        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32))
4845        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func],
4846        ...                                                 input_columns=["image"],
4847        ...                                                 output_columns=["image", "bbox"])
4848        >>> transforms_list = [vision.RandomHorizontalFlipWithBBox(0.70)]
4849        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list,
4850        ...                                                 input_columns=["image", "bbox"])
4851        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4852        ...     print(item["image"].shape, item["image"].dtype)
4853        ...     print(item["bbox"].shape, item["bbox"].dtype)
4854        ...     break
4855        (100, 100, 3) float32
4856        (1, 4) float32
4857        >>>
4858        >>> # Use the transform in eager mode
4859        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
4860        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype))
4861        >>> func_data, func_bboxes = func(data)
4862        >>> output = vision.RandomHorizontalFlipWithBBox(1)(func_data, func_bboxes)
4863        >>> print(output[0].shape, output[0].dtype)
4864        (100, 100, 3) float32
4865        >>> print(output[1].shape, output[1].dtype)
4866        (1, 4) float32
4867
4868    Tutorial Examples:
4869        - `Illustration of vision transforms
4870          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4871    """
4872
4873    @check_prob
4874    def __init__(self, prob=0.5):
4875        super().__init__()
4876        self.prob = prob
4877        self.implementation = Implementation.C
4878
4879    def parse(self):
4880        return cde.RandomHorizontalFlipWithBBoxOperation(self.prob)
4881
4882
4883class RandomInvert(ImageTensorOperation):
4884    """
4885    Randomly invert the colors of image with a given probability.
4886
4887    Args:
4888        prob (float, optional): Probability of the image being inverted,
4889            which must be in range of [0.0, 1.0]. Default: ``0.5``.
4890
4891    Raises:
4892        TypeError: If `prob` is not of type float.
4893        ValueError: If `prob` is not in range [0.0, 1.0].
4894        RuntimeError: If given tensor shape is not <H, W, C>.
4895
4896    Supported Platforms:
4897        ``CPU``
4898
4899    Examples:
4900        >>> import numpy as np
4901        >>> import mindspore.dataset as ds
4902        >>> import mindspore.dataset.vision as vision
4903        >>>
4904        >>> # Use the transform in dataset pipeline mode
4905        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
4906        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4907        >>> transforms_list = [vision.RandomInvert(0.5)]
4908        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
4909        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4910        ...     print(item["image"].shape, item["image"].dtype)
4911        ...     break
4912        (100, 100, 3) uint8
4913        >>>
4914        >>> # Use the transform in eager mode
4915        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
4916        >>> output = vision.RandomInvert(1.0)(data)
4917        >>> print(output.shape, output.dtype)
4918        (100, 100, 3) uint8
4919
4920    Tutorial Examples:
4921        - `Illustration of vision transforms
4922          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4923    """
4924
4925    @check_prob
4926    def __init__(self, prob=0.5):
4927        super().__init__()
4928        self.prob = prob
4929        self.implementation = Implementation.C
4930
4931    def parse(self):
4932        return cde.RandomInvertOperation(self.prob)
4933
4934
4935class RandomLighting(ImageTensorOperation, PyTensorOperation):
4936    """
4937    Add AlexNet-style PCA-based noise to an image. The eigenvalue and eigenvectors for Alexnet's PCA noise is
4938    calculated from the imagenet dataset.
4939
4940    Args:
4941        alpha (float, optional): Intensity of the image, which must be non-negative. Default: ``0.05``.
4942
4943    Raises:
4944        TypeError: If `alpha` is not of type float.
4945        ValueError: If `alpha` is negative.
4946        RuntimeError: If given tensor shape is not <H, W, C>.
4947
4948    Supported Platforms:
4949        ``CPU``
4950
4951    Examples:
4952        >>> import numpy as np
4953        >>> import mindspore.dataset as ds
4954        >>> import mindspore.dataset.vision as vision
4955        >>>
4956        >>> # Use the transform in dataset pipeline mode
4957        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
4958        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
4959        >>> transforms_list = [vision.RandomLighting(0.1)]
4960        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
4961        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
4962        ...     print(item["image"].shape, item["image"].dtype)
4963        ...     break
4964        (100, 100, 3) uint8
4965        >>>
4966        >>> # Use the transform in eager mode
4967        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
4968        >>> output = vision.RandomLighting(0.1)(data)
4969        >>> print(output.shape, output.dtype)
4970        (100, 100, 3) uint8
4971
4972    Tutorial Examples:
4973        - `Illustration of vision transforms
4974          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
4975    """
4976
4977    @check_alpha
4978    def __init__(self, alpha=0.05):
4979        super().__init__()
4980        self.alpha = alpha
4981
4982    def parse(self):
4983        return cde.RandomLightingOperation(self.alpha)
4984
4985    def _execute_py(self, img):
4986        """
4987        Execute method.
4988
4989        Args:
4990            img (PIL Image): Image to be added AlexNet-style PCA-based noise.
4991
4992        Returns:
4993            PIL Image, image with noise added.
4994        """
4995
4996        return util.random_lighting(img, self.alpha)
4997
4998
4999class RandomPerspective(PyTensorOperation):
5000    """
5001    Randomly apply perspective transformation to the input PIL Image with a given probability.
5002
5003    Args:
5004        distortion_scale (float, optional): Scale of distortion, in range of [0.0, 1.0]. Default: ``0.5``.
5005        prob (float, optional): Probability of performing perspective transformation, which
5006            must be in range of [0.0, 1.0]. Default: ``0.5``.
5007        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
5008            Default: ``Inter.BICUBIC``.
5009
5010    Raises:
5011        TypeError: If `distortion_scale` is not of type float.
5012        TypeError: If `prob` is not of type float.
5013        TypeError: If `interpolation` is not of type :class:`~.vision.Inter` .
5014        ValueError: If `distortion_scale` is not in range of [0.0, 1.0].
5015        ValueError: If `prob` is not in range of [0.0, 1.0].
5016
5017    Supported Platforms:
5018        ``CPU``
5019
5020    Examples:
5021        >>> import os
5022        >>> import numpy as np
5023        >>> from PIL import Image, ImageDraw
5024        >>> import mindspore.dataset as ds
5025        >>> import mindspore.dataset.vision as vision
5026        >>> from mindspore.dataset.transforms import Compose
5027        >>>
5028        >>> # Use the transform in dataset pipeline mode
5029        >>> class MyDataset:
5030        ...     def __init__(self):
5031        ...         self.data = []
5032        ...         img = Image.new("RGB", (300, 300), (255, 255, 255))
5033        ...         draw = ImageDraw.Draw(img)
5034        ...         draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5)
5035        ...         img.save("./1.jpg")
5036        ...         data = np.fromfile("./1.jpg", np.uint8)
5037        ...         self.data.append(data)
5038        ...
5039        ...     def __getitem__(self, index):
5040        ...         return self.data[0]
5041        ...
5042        ...     def __len__(self):
5043        ...         return 5
5044        >>>
5045        >>> my_dataset = MyDataset()
5046        >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image")
5047        >>> transforms_list = Compose([vision.Decode(to_pil=True),
5048        ...                            vision.RandomPerspective(prob=0.1),
5049        ...                            vision.ToTensor()])
5050        >>> # apply the transform to dataset through map function
5051        >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image")
5052        >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5053        ...     print(item["image"].shape, item["image"].dtype)
5054        ...     break
5055        (3, 300, 300) float32
5056        >>> os.remove("./1.jpg")
5057        >>>
5058        >>> # Use the transform in eager mode
5059        >>> img = Image.new("RGB", (300, 300), (255, 255, 255))
5060        >>> draw = ImageDraw.Draw(img)
5061        >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0))
5062        >>> img.save("./2.jpg")
5063        >>> data = Image.open("./2.jpg")
5064        >>> output = vision.RandomPerspective(prob=1.0)(data)
5065        >>> print(np.array(output).shape, np.array(output).dtype)
5066        (300, 300, 3) uint8
5067        >>> os.remove("./2.jpg")
5068
5069    Tutorial Examples:
5070        - `Illustration of vision transforms
5071          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5072    """
5073
5074    @check_random_perspective
5075    def __init__(self, distortion_scale=0.5, prob=0.5, interpolation=Inter.BICUBIC):
5076        super().__init__()
5077        self.distortion_scale = distortion_scale
5078        self.prob = prob
5079        self.interpolation = interpolation
5080        self.implementation = Implementation.PY
5081
5082    def _execute_py(self, img):
5083        """
5084        Execute method.
5085
5086        Args:
5087            img (PIL Image): Image to be applied randomly perspective transformation.
5088
5089        Returns:
5090            PIL Image, image applied randomly perspective transformation.
5091        """
5092        if not is_pil(img):
5093            raise ValueError("Input image should be a Pillow image.")
5094        if self.prob > random.random():
5095            start_points, end_points = util.get_perspective_params(
5096                img, self.distortion_scale)
5097            return util.perspective(img, start_points, end_points, Inter.to_python_type(self.interpolation))
5098        return img
5099
5100
5101class RandomPosterize(ImageTensorOperation):
5102    """
5103    Reduce the bit depth of the color channels of image with a given probability
5104    to create a high contrast and vivid color image.
5105
5106    Reduce the number of bits for each color channel to posterize the input image randomly with a given probability.
5107
5108    Args:
5109        bits (Union[int, Sequence[int]], optional): Range of random posterize to compress image.
5110            Bits values must be in range of [1,8], and include at
5111            least one integer value in the given range. It must be in
5112            (min, max) or integer format. If min=max, then it is a single fixed
5113            magnitude operation. Default: ``(8, 8)``.
5114
5115    Raises:
5116        TypeError: If `bits` is not of type integer or sequence of integer.
5117        ValueError: If `bits` is not in range [1, 8].
5118        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
5119
5120    Supported Platforms:
5121        ``CPU``
5122
5123    Examples:
5124        >>> import numpy as np
5125        >>> import mindspore.dataset as ds
5126        >>> import mindspore.dataset.vision as vision
5127        >>>
5128        >>> # Use the transform in dataset pipeline mode
5129        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
5130        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5131        >>> transforms_list = [vision.RandomPosterize((6, 8))]
5132        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
5133        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5134        ...     print(item["image"].shape, item["image"].dtype)
5135        ...     break
5136        (100, 100, 3) uint8
5137        >>>
5138        >>> # Use the transform in eager mode
5139        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
5140        >>> output = vision.RandomPosterize(1)(data)
5141        >>> print(output.shape, output.dtype)
5142        (100, 100, 3) uint8
5143
5144    Tutorial Examples:
5145        - `Illustration of vision transforms
5146          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5147    """
5148
5149    @check_random_posterize
5150    def __init__(self, bits=(8, 8)):
5151        super().__init__()
5152        self.bits = bits
5153        self.implementation = Implementation.C
5154
5155    def parse(self):
5156        bits = self.bits
5157        if isinstance(bits, int):
5158            bits = (bits, bits)
5159        return cde.RandomPosterizeOperation(bits)
5160
5161
5162class RandomResizedCrop(ImageTensorOperation, PyTensorOperation):
5163    """
5164    This operation will crop the input image randomly,
5165    and resize the cropped image using a selected interpolation mode :class:`~.vision.Inter` .
5166
5167    Note:
5168        If the input image is more than one, then make sure that the image size is the same.
5169
5170    Args:
5171        size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive.
5172            If size is an integer, a square of size (size, size) will be cropped with this value.
5173            If size is a sequence of length 2, an image of size (height, width) will be cropped.
5174        scale (Union[list, tuple], optional): Range [min, max) of respective size of the original
5175            size to be cropped, which must be non-negative. Default: ``(0.08, 1.0)``.
5176        ratio (Union[list, tuple], optional): Range [min, max) of aspect ratio to be
5177            cropped, which must be non-negative. Default: ``(3. / 4., 4. / 3.)``.
5178        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
5179            Default: ``Inter.BILINEAR``.
5180        max_attempts (int, optional): The maximum number of attempts to propose a valid
5181            crop_area. Default: ``10``. If exceeded, fall back to use center_crop instead.
5182
5183    Raises:
5184        TypeError: If `size` is not of type int or Sequence[int].
5185        TypeError: If `scale` is not of type tuple or list.
5186        TypeError: If `ratio` is not of type tuple or list.
5187        TypeError: If `interpolation` is not of type :class:`~.vision.Inter` .
5188        TypeError: If `max_attempts` is not of type int.
5189        ValueError: If `size` is not positive.
5190        ValueError: If `scale` is negative.
5191        ValueError: If `ratio` is negative.
5192        ValueError: If `max_attempts` is not positive.
5193
5194    Supported Platforms:
5195        ``CPU``
5196
5197    Examples:
5198        >>> import numpy as np
5199        >>> import mindspore.dataset as ds
5200        >>> import mindspore.dataset.vision as vision
5201        >>> from mindspore.dataset.vision import Inter
5202        >>>
5203        >>> # Use the transform in dataset pipeline mode
5204        >>> resize_crop_op = vision.RandomResizedCrop(size=(50, 75), scale=(0.25, 0.5),
5205        ...                                           interpolation=Inter.BILINEAR)
5206        >>> transforms_list = [resize_crop_op]
5207        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
5208        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5209        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
5210        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5211        ...     print(item["image"].shape, item["image"].dtype)
5212        ...     break
5213        (50, 75, 3) uint8
5214        >>>
5215        >>> # Use the transform in eager mode
5216        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
5217        >>> output = vision.RandomResizedCrop(size=(50, 75), scale=(0.25, 0.5), interpolation=Inter.BILINEAR)(data)
5218        >>> print(output.shape, output.dtype)
5219        (50, 75, 3) uint8
5220
5221    Tutorial Examples:
5222        - `Illustration of vision transforms
5223          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5224    """
5225
5226    @check_random_resize_crop
5227    def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
5228                 interpolation=Inter.BILINEAR, max_attempts=10):
5229        super().__init__()
5230        if isinstance(size, int):
5231            size = (size, size)
5232        self.size = size
5233        self.scale = scale
5234        self.ratio = ratio
5235        self.interpolation = interpolation
5236        if interpolation in [Inter.AREA, Inter.PILCUBIC]:
5237            self.implementation = Implementation.C
5238        elif interpolation == Inter.ANTIALIAS:
5239            self.implementation = Implementation.PY
5240        self.max_attempts = max_attempts
5241
5242    def parse(self):
5243        if self.interpolation == Inter.ANTIALIAS:
5244            raise TypeError("Current Interpolation is not supported with NumPy input.")
5245        return cde.RandomResizedCropOperation(self.size, self.scale, self.ratio, Inter.to_c_type(self.interpolation),
5246                                              self.max_attempts)
5247
5248    def _execute_py(self, img):
5249        """
5250        Execute method.
5251
5252        Args:
5253            img (PIL Image): Image to be randomly cropped and resized.
5254
5255        Returns:
5256            PIL Image, randomly cropped and resized image.
5257        """
5258        if self.interpolation in [Inter.AREA, Inter.PILCUBIC]:
5259            raise TypeError("Current Interpolation is not supported with PIL input.")
5260        return util.random_resize_crop(img, self.size, self.scale, self.ratio,
5261                                       Inter.to_python_type(self.interpolation), self.max_attempts)
5262
5263
5264class RandomResizedCropWithBBox(ImageTensorOperation):
5265    """
5266    Crop the input image to a random size and aspect ratio and adjust bounding boxes accordingly.
5267
5268    Args:
5269        size (Union[int, Sequence[int]]): The size of the output image. The size value(s) must be positive.
5270            If size is an integer, a square crop of size (size, size) is returned.
5271            If size is a sequence of length 2, it should be (height, width).
5272        scale (Union[list, tuple], optional): Range (min, max) of respective size of the original
5273            size to be cropped, which must be non-negative. Default: ``(0.08, 1.0)``.
5274        ratio (Union[list, tuple], optional): Range (min, max) of aspect ratio to be
5275            cropped, which must be non-negative. Default: ``(3. / 4., 4. / 3.)``.
5276        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
5277            Default: ``Inter.BILINEAR``.
5278        max_attempts (int, optional): The maximum number of attempts to propose a valid
5279            crop area. Default: ``10``. If exceeded, fall back to use center crop instead.
5280
5281    Raises:
5282        TypeError: If `size` is not of type int or Sequence[int].
5283        TypeError: If `scale` is not of type tuple.
5284        TypeError: If `ratio` is not of type tuple.
5285        TypeError: If `interpolation` is not of type Inter.
5286        TypeError: If `max_attempts` is not of type integer.
5287        ValueError: If `size` is not positive.
5288        ValueError: If `scale` is negative.
5289        ValueError: If `ratio` is negative.
5290        ValueError: If `max_attempts` is not positive.
5291        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
5292
5293    Supported Platforms:
5294        ``CPU``
5295
5296    Examples:
5297        >>> import numpy as np
5298        >>> import mindspore.dataset as ds
5299        >>> import mindspore.dataset.vision as vision
5300        >>> from mindspore.dataset.vision import Inter
5301        >>>
5302        >>> # Use the transform in dataset pipeline mode
5303        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
5304        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5305        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32))
5306        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func],
5307        ...                                                 input_columns=["image"],
5308        ...                                                 output_columns=["image", "bbox"])
5309        >>> bbox_op = vision.RandomResizedCropWithBBox(size=50, interpolation=Inter.NEAREST)
5310        >>> transforms_list = [bbox_op]
5311        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list,
5312        ...                                                 input_columns=["image", "bbox"])
5313        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5314        ...     print(item["image"].shape, item["image"].dtype)
5315        ...     print(item["bbox"].shape, item["bbox"].dtype)
5316        ...     break
5317        (50, 50, 3) float32
5318        (1, 4) float32
5319        >>>
5320        >>> # Use the transform in eager mode
5321        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
5322        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype))
5323        >>> func_data, func_bboxes = func(data)
5324        >>> output = vision.RandomResizedCropWithBBox((16, 64), (0.5, 0.5), (0.5, 0.5))(func_data, func_bboxes)
5325        >>> print(output[0].shape, output[0].dtype)
5326        (16, 64, 3) float32
5327        >>> print(output[1].shape, output[1].dtype)
5328        (1, 4) float32
5329
5330    Tutorial Examples:
5331        - `Illustration of vision transforms
5332          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5333    """
5334
5335    @check_random_resize_crop
5336    def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
5337                 interpolation=Inter.BILINEAR, max_attempts=10):
5338        super().__init__()
5339        if isinstance(size, int):
5340            size = (size, size)
5341        self.size = size
5342        self.scale = scale
5343        self.ratio = ratio
5344        self.interpolation = interpolation
5345        self.max_attempts = max_attempts
5346        self.implementation = Implementation.C
5347
5348    def parse(self):
5349        return cde.RandomResizedCropWithBBoxOperation(self.size, self.scale, self.ratio,
5350                                                      Inter.to_c_type(self.interpolation), self.max_attempts)
5351
5352
5353class RandomResize(ImageTensorOperation):
5354    """
5355    Resize the input image using :class:`~.vision.Inter` , a randomly selected interpolation mode.
5356
5357    Args:
5358        size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive.
5359            If size is an integer, smaller edge of the image will be resized to this value with
5360            the same image aspect ratio.
5361            If size is a sequence of length 2, it should be (height, width).
5362
5363    Raises:
5364        TypeError: If `size` is not of type int or Sequence[int].
5365        ValueError: If `size` is not positive.
5366        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
5367
5368    Supported Platforms:
5369        ``CPU``
5370
5371    Examples:
5372        >>> import numpy as np
5373        >>> import mindspore.dataset as ds
5374        >>> import mindspore.dataset.vision as vision
5375        >>>
5376        >>> # Use the transform in dataset pipeline mode
5377        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
5378        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5379        >>> # 1) randomly resize image, keeping aspect ratio
5380        >>> transforms_list1 = [vision.RandomResize(50)]
5381        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list1, input_columns=["image"])
5382        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5383        ...     print(item["image"].shape, item["image"].dtype)
5384        ...     break
5385        (50, 50, 3) uint8
5386        >>> # 2) randomly resize image to landscape style
5387        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5388        >>> transforms_list2 = [vision.RandomResize((40, 60))]
5389        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list2, input_columns=["image"])
5390        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5391        ...     print(item["image"].shape, item["image"].dtype)
5392        ...     break
5393        (40, 60, 3) uint8
5394        >>>
5395        >>> # Use the transform in eager mode
5396        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
5397        >>> output = vision.RandomResize(10)(data)
5398        >>> print(output.shape, output.dtype)
5399        (10, 10, 3) uint8
5400
5401    Tutorial Examples:
5402        - `Illustration of vision transforms
5403          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5404    """
5405
5406    @check_resize
5407    def __init__(self, size):
5408        super().__init__()
5409        self.size = size
5410        self.implementation = Implementation.C
5411
5412    def parse(self):
5413        size = self.size
5414        if isinstance(size, int):
5415            size = (size,)
5416        return cde.RandomResizeOperation(size)
5417
5418
5419class RandomResizeWithBBox(ImageTensorOperation):
5420    """
5421    Tensor operation to resize the input image
5422    using a randomly selected interpolation mode :class:`~.vision.Inter` and adjust
5423    bounding boxes accordingly.
5424
5425    Args:
5426        size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive.
5427            If size is an integer, smaller edge of the image will be resized to this value with
5428            the same image aspect ratio.
5429            If size is a sequence of length 2, it should be (height, width).
5430
5431    Raises:
5432        TypeError: If `size` is not of type int or Sequence[int].
5433        ValueError: If `size` is not positive.
5434        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
5435
5436    Supported Platforms:
5437        ``CPU``
5438
5439    Examples:
5440        >>> import copy
5441        >>> import numpy as np
5442        >>> import mindspore.dataset as ds
5443        >>> import mindspore.dataset.vision as vision
5444        >>>
5445        >>> # Use the transform in dataset pipeline mode
5446        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
5447        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5448        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32))
5449        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func],
5450        ...                                                 input_columns=["image"],
5451        ...                                                 output_columns=["image", "bbox"])
5452        >>> numpy_slices_dataset2 = copy.deepcopy(numpy_slices_dataset)
5453        >>>
5454        >>> # 1) randomly resize image with bounding boxes, keeping aspect ratio
5455        >>> transforms_list1 = [vision.RandomResizeWithBBox(60)]
5456        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list1,
5457        ...                                                 input_columns=["image", "bbox"])
5458        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5459        ...     print(item["image"].shape, item["image"].dtype)
5460        ...     print(item["bbox"].shape, item["bbox"].dtype)
5461        ...     break
5462        (60, 60, 3) float32
5463        (1, 4) float32
5464        >>>
5465        >>> # 2) randomly resize image with bounding boxes to portrait style
5466        >>> transforms_list2 = [vision.RandomResizeWithBBox((80, 60))]
5467        >>> numpy_slices_dataset2 = numpy_slices_dataset2.map(operations=transforms_list2,
5468        ...                                                   input_columns=["image", "bbox"])
5469        >>> for item in numpy_slices_dataset2.create_dict_iterator(num_epochs=1, output_numpy=True):
5470        ...     print(item["image"].shape, item["image"].dtype)
5471        ...     print(item["bbox"].shape, item["bbox"].dtype)
5472        ...     break
5473        (80, 60, 3) float32
5474        (1, 4) float32
5475        >>>
5476        >>> # Use the transform in eager mode
5477        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
5478        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype))
5479        >>> func_data, func_bboxes = func(data)
5480        >>> output = vision.RandomResizeWithBBox(64)(func_data, func_bboxes)
5481        >>> print(output[0].shape, output[0].dtype)
5482        (64, 64, 3) float32
5483        >>> print(output[1].shape, output[1].dtype)
5484        (1, 4) float32
5485
5486    Tutorial Examples:
5487        - `Illustration of vision transforms
5488          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5489    """
5490
5491    @check_resize
5492    def __init__(self, size):
5493        super().__init__()
5494        self.size = size
5495        self.implementation = Implementation.C
5496
5497    def parse(self):
5498        size = self.size
5499        if isinstance(size, int):
5500            size = (size,)
5501        return cde.RandomResizeWithBBoxOperation(size)
5502
5503
5504class RandomRotation(ImageTensorOperation, PyTensorOperation):
5505    """
5506    Rotate the input image randomly within a specified range of degrees.
5507
5508    Args:
5509        degrees (Union[int, float, sequence]): Range of random rotation degrees.
5510            If `degrees` is a number, the range will be converted to (-degrees, degrees).
5511            If `degrees` is a sequence, it should be (min, max).
5512        resample (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
5513            Default: ``Inter.NEAREST``.
5514        expand (bool, optional):  Optional expansion flag. Default: ``False``. If set to ``True``,
5515            expand the output image to make it large enough to hold the entire rotated image.
5516            If set to ``False`` or omitted, make the output image the same size as the input.
5517            Note that the expand flag assumes rotation around the center and no translation.
5518        center (tuple, optional): Optional center of rotation (a 2-tuple). Default: ``None``.
5519            Origin is the top left corner. ``None`` sets to the center of the image.
5520        fill_value (Union[int, tuple[int]], optional): Optional fill color for the area outside the rotated image.
5521            If it is a 3-tuple, it is used to fill R, G, B channels respectively.
5522            If it is an integer, it is used for all RGB channels.
5523            The fill_value values must be in range [0, 255]. Default: ``0``.
5524
5525    Raises:
5526        TypeError: If `degrees` is not of type integer, float or sequence.
5527        TypeError: If `resample` is not of type Inter.
5528        TypeError: If `expand` is not of type boolean.
5529        TypeError: If `center` is not of type tuple.
5530        TypeError: If `fill_value` is not of type int or tuple[int].
5531        ValueError: If `fill_value` is not in range [0, 255].
5532        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
5533
5534    Supported Platforms:
5535        ``CPU``
5536
5537    Examples:
5538        >>> import numpy as np
5539        >>> import mindspore.dataset as ds
5540        >>> import mindspore.dataset.vision as vision
5541        >>> from mindspore.dataset.vision import Inter
5542        >>>
5543        >>> # Use the transform in dataset pipeline mode
5544        >>> seed = ds.config.get_seed()
5545        >>> ds.config.set_seed(12345)
5546        >>> transforms_list = [vision.RandomRotation(degrees=5.0, resample=Inter.NEAREST, expand=True)]
5547        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
5548        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5549        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
5550        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5551        ...     print(item["image"].shape, item["image"].dtype)
5552        ...     break
5553        (107, 107, 3) uint8
5554        >>>
5555        >>> # Use the transform in eager mode
5556        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
5557        >>> output = vision.RandomRotation(degrees=90, resample=Inter.NEAREST, expand=True)(data)
5558        >>> print(output.shape, output.dtype)
5559        (119, 119, 3) uint8
5560        >>> ds.config.set_seed(seed)
5561
5562    Tutorial Examples:
5563        - `Illustration of vision transforms
5564          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5565    """
5566
5567    @check_random_rotation
5568    def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0):
5569        super().__init__()
5570        if isinstance(degrees, (int, float)):
5571            degrees = degrees % 360
5572            degrees = [-degrees, degrees]
5573        elif isinstance(degrees, (list, tuple)):
5574            if degrees[1] - degrees[0] >= 360:
5575                degrees = [-180, 180]
5576            else:
5577                degrees = [degrees[0] % 360, degrees[1] % 360]
5578                if degrees[0] > degrees[1]:
5579                    degrees[1] += 360
5580        if isinstance(fill_value, int):
5581            fill_value = tuple([fill_value] * 3)
5582        self.degrees = degrees
5583        self.resample = resample
5584        if resample in [Inter.AREA, Inter.PILCUBIC]:
5585            self.implementation = Implementation.C
5586        elif resample == Inter.ANTIALIAS:
5587            self.implementation = Implementation.PY
5588        self.expand = expand
5589        self.py_center = center
5590        self.c_center = center
5591        if center is None:
5592            self.c_center = ()
5593        self.fill_value = fill_value
5594
5595    def parse(self):
5596        if self.resample == Inter.ANTIALIAS:
5597            raise TypeError("Current Interpolation is not supported with NumPy input.")
5598        return cde.RandomRotationOperation(self.degrees, Inter.to_c_type(self.resample), self.expand, self.c_center,
5599                                           self.fill_value)
5600
5601    def _execute_py(self, img):
5602        """
5603        Execute method.
5604
5605        Args:
5606            img (PIL Image): Image to be randomly rotated.
5607
5608        Returns:
5609            PIL Image, randomly rotated image.
5610        """
5611        if self.resample in [Inter.AREA, Inter.PILCUBIC]:
5612            raise TypeError("Current Interpolation is not supported with PIL input.")
5613        return util.random_rotation(img, self.degrees, Inter.to_python_type(self.resample), self.expand,
5614                                    self.py_center, self.fill_value)
5615
5616
5617class RandomSelectSubpolicy(ImageTensorOperation):
5618    """
5619    Choose a random sub-policy from a policy list to be applied on the input image.
5620
5621    Args:
5622        policy (list[list[tuple[TensorOperation, float]]]): List of sub-policies to choose from.
5623            A sub-policy is a list of tuple[operation, prob], where operation is a data processing operation and prob
5624            is the probability that this operation will be applied, and the prob values must be in range [0.0, 1.0].
5625            Once a sub-policy is selected, each operation within the sub-policy with be applied in sequence according
5626            to its probability.
5627
5628    Raises:
5629        TypeError: If `policy` contains invalid data processing operations.
5630
5631    Supported Platforms:
5632        ``CPU``
5633
5634    Examples:
5635        >>> import numpy as np
5636        >>> import mindspore.dataset as ds
5637        >>> import mindspore.dataset.vision as vision
5638        >>>
5639        >>> # Use the transform in dataset pipeline mode
5640        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
5641        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5642        >>> policy = [[(vision.RandomRotation((45, 45)), 0.5),
5643        ...            (vision.RandomVerticalFlip(), 1),
5644        ...            (vision.RandomColorAdjust(), 0.8)],
5645        ...           [(vision.RandomRotation((90, 90)), 1),
5646        ...            (vision.RandomColorAdjust(), 0.2)]]
5647        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=vision.RandomSelectSubpolicy(policy),
5648        ...                                                 input_columns=["image"])
5649        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5650        ...     print(item["image"].shape, item["image"].dtype)
5651        ...     break
5652        (100, 100, 3) uint8
5653        >>>
5654        >>> # Use the transform in eager mode
5655        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
5656        >>> policy = [[(vision.RandomRotation((90, 90)), 1), (vision.RandomColorAdjust(), 1)]]
5657        >>> output = vision.RandomSelectSubpolicy(policy)(data)
5658        >>> print(output.shape, output.dtype)
5659        (100, 100, 3) uint8
5660
5661    Tutorial Examples:
5662        - `Illustration of vision transforms
5663          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5664    """
5665
5666    @check_random_select_subpolicy_op
5667    def __init__(self, policy):
5668        super().__init__()
5669        self.policy = policy
5670        self.implementation = Implementation.C
5671
5672    def parse(self):
5673        policy = []
5674        for list_one in self.policy:
5675            policy_one = []
5676            for list_two in list_one:
5677                if list_two[0] and getattr(list_two[0], 'parse', None):
5678                    policy_one.append((list_two[0].parse(), list_two[1]))
5679                else:
5680                    policy_one.append((list_two[0], list_two[1]))
5681            policy.append(policy_one)
5682        return cde.RandomSelectSubpolicyOperation(policy)
5683
5684
5685class RandomSharpness(ImageTensorOperation, PyTensorOperation):
5686    """
5687    Adjust the sharpness of the input image by a fixed or random degree. Degree of 0.0 gives a blurred image,
5688    degree of 1.0 gives the original image, and degree of 2.0 gives a sharpened image.
5689
5690    Args:
5691        degrees (Union[list, tuple], optional): Range of random sharpness adjustment degrees,
5692            which must be non-negative. It should be in (min, max) format. If min=max, then
5693            it is a single fixed magnitude operation. Default: ``(0.1, 1.9)``.
5694
5695    Raises:
5696        TypeError : If `degrees` is not a list or a tuple.
5697        ValueError: If `degrees` is negative.
5698        ValueError: If `degrees` is in (max, min) format instead of (min, max).
5699
5700    Supported Platforms:
5701        ``CPU``
5702
5703    Examples:
5704        >>> import numpy as np
5705        >>> import mindspore.dataset as ds
5706        >>> import mindspore.dataset.vision as vision
5707        >>>
5708        >>> # Use the transform in dataset pipeline mode
5709        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
5710        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5711        >>> transforms_list = [vision.RandomSharpness(degrees=(0.2, 1.9))]
5712        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
5713        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5714        ...     print(item["image"].shape, item["image"].dtype)
5715        ...     break
5716        (100, 100, 3) uint8
5717        >>>
5718        >>> # Use the transform in eager mode
5719        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
5720        >>> output = vision.RandomSharpness(degrees=(0, 0.6))(data)
5721        >>> print(output.shape, output.dtype)
5722        (100, 100, 3) uint8
5723
5724    Tutorial Examples:
5725        - `Illustration of vision transforms
5726          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5727    """
5728
5729    @check_positive_degrees
5730    def __init__(self, degrees=(0.1, 1.9)):
5731        super().__init__()
5732        self.degrees = degrees
5733
5734    def parse(self):
5735        return cde.RandomSharpnessOperation(self.degrees)
5736
5737    def _execute_py(self, img):
5738        """
5739        Execute method.
5740
5741        Args:
5742            img (PIL Image): Image to be sharpness adjusted.
5743
5744        Returns:
5745            PIL Image, sharpness adjusted image.
5746        """
5747
5748        return util.random_sharpness(img, self.degrees)
5749
5750
5751class RandomSolarize(ImageTensorOperation):
5752    """
5753    Randomly selects a subrange within the specified threshold range and sets the pixel value within
5754    the subrange to (255 - pixel).
5755
5756    Args:
5757        threshold (tuple, optional): Range of random solarize threshold. Default: ``(0, 255)``.
5758            Threshold values should always be in (min, max) format,
5759            where min and max are integers in the range [0, 255], and min <= max. The pixel values
5760            belonging to the [min, max] range will be inverted.
5761            If min=max, then invert all pixel values greater than or equal min(max).
5762
5763    Raises:
5764        TypeError : If `threshold` is not of type tuple.
5765        ValueError: If `threshold` is not in range of [0, 255].
5766
5767    Supported Platforms:
5768        ``CPU``
5769
5770    Examples:
5771        >>> import numpy as np
5772        >>> import mindspore.dataset as ds
5773        >>> import mindspore.dataset.vision as vision
5774        >>>
5775        >>> # Use the transform in dataset pipeline mode
5776        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
5777        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5778        >>> transforms_list = [vision.RandomSolarize(threshold=(10,100))]
5779        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
5780        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5781        ...     print(item["image"].shape, item["image"].dtype)
5782        ...     break
5783        (100, 100, 3) uint8
5784        >>>
5785        >>> # Use the transform in eager mode
5786        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
5787        >>> output = vision.RandomSolarize(threshold=(1, 10))(data)
5788        >>> print(output.shape, output.dtype)
5789        (100, 100, 3) uint8
5790
5791    Tutorial Examples:
5792        - `Illustration of vision transforms
5793          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5794    """
5795
5796    @check_random_solarize
5797    def __init__(self, threshold=(0, 255)):
5798        super().__init__()
5799        self.threshold = threshold
5800        self.implementation = Implementation.C
5801
5802    def parse(self):
5803        return cde.RandomSolarizeOperation(self.threshold)
5804
5805
5806class RandomVerticalFlip(ImageTensorOperation, PyTensorOperation):
5807    """
5808    Randomly flip the input image vertically with a given probability.
5809
5810    Args:
5811        prob (float, optional): Probability of the image being flipped, which
5812            must be in range of [0.0, 1.0]. Default: ``0.5``.
5813
5814    Raises:
5815        TypeError: If `prob` is not of type float.
5816        ValueError: If `prob` is not in range [0.0, 1.0].
5817        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
5818
5819    Supported Platforms:
5820        ``CPU``
5821
5822    Examples:
5823        >>> import numpy as np
5824        >>> import mindspore.dataset as ds
5825        >>> import mindspore.dataset.vision as vision
5826        >>>
5827        >>> # Use the transform in dataset pipeline mode
5828        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
5829        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5830        >>> transforms_list = [vision.RandomVerticalFlip(0.25)]
5831        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
5832        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5833        ...     print(item["image"].shape, item["image"].dtype)
5834        ...     break
5835        (100, 100, 3) uint8
5836        >>>
5837        >>> # Use the transform in eager mode
5838        >>> data = np.array([[0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 3))
5839        >>> output = vision.RandomVerticalFlip(1.0)(data)
5840        >>> print(output.shape, output.dtype)
5841        (2, 3) uint8
5842
5843    Tutorial Examples:
5844        - `Illustration of vision transforms
5845          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5846    """
5847
5848    @check_prob
5849    def __init__(self, prob=0.5):
5850        super().__init__()
5851        self.prob = prob
5852
5853    def parse(self):
5854        return cde.RandomVerticalFlipOperation(self.prob)
5855
5856    def _execute_py(self, img):
5857        """
5858        Execute method.
5859
5860        Args:
5861            img (PIL Image): Image to be vertically flipped.
5862
5863        Returns:
5864            PIL Image, randomly vertically flipped image.
5865        """
5866        return util.random_vertical_flip(img, self.prob)
5867
5868
5869class RandomVerticalFlipWithBBox(ImageTensorOperation):
5870    """
5871    Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly.
5872
5873    Args:
5874        prob (float, optional): Probability of the image being flipped,
5875            which must be in range of [0.0, 1.0]. Default: ``0.5``.
5876
5877    Raises:
5878        TypeError: If `prob` is not of type float.
5879        ValueError: If `prob` is not in range [0.0, 1.0].
5880        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
5881
5882    Supported Platforms:
5883        ``CPU``
5884
5885    Examples:
5886        >>> import numpy as np
5887        >>> import mindspore.dataset as ds
5888        >>> import mindspore.dataset.vision as vision
5889        >>>
5890        >>> # Use the transform in dataset pipeline mode
5891        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
5892        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5893        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32))
5894        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func],
5895        ...                                                 input_columns=["image"],
5896        ...                                                 output_columns=["image", "bbox"])
5897        >>> transforms_list = [vision.RandomVerticalFlipWithBBox(0.20)]
5898        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image", "bbox"])
5899        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5900        ...     print(item["image"].shape, item["image"].dtype)
5901        ...     print(item["bbox"].shape, item["bbox"].dtype)
5902        ...     break
5903        (100, 100, 3) float32
5904        (1, 4) float32
5905        >>>
5906        >>> # Use the transform in eager mode
5907        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
5908        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype))
5909        >>> func_data, func_bboxes = func(data)
5910        >>> output = vision.RandomVerticalFlipWithBBox(1)(func_data, func_bboxes)
5911        >>> print(output[0].shape, output[0].dtype)
5912        (100, 100, 3) float32
5913        >>> print(output[1].shape, output[1].dtype)
5914        (1, 4) float32
5915
5916    Tutorial Examples:
5917        - `Illustration of vision transforms
5918          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5919    """
5920
5921    @check_prob
5922    def __init__(self, prob=0.5):
5923        super().__init__()
5924        self.prob = prob
5925        self.implementation = Implementation.C
5926
5927    def parse(self):
5928        return cde.RandomVerticalFlipWithBBoxOperation(self.prob)
5929
5930
5931class Rescale(ImageTensorOperation):
5932    """
5933    Rescale the input image with the given rescale and shift. This operation will rescale the input image
5934    with: output = image * rescale + shift.
5935
5936    Note:
5937        This operation is executed on the CPU by default, but it is also supported
5938        to be executed on the GPU or Ascend via heterogeneous acceleration.
5939
5940    Args:
5941        rescale (float): Rescale factor.
5942        shift (float): Shift factor.
5943
5944    Raises:
5945        TypeError: If `rescale` is not of type float.
5946        TypeError: If `shift` is not of type float.
5947
5948    Supported Platforms:
5949        ``CPU`` ``GPU`` ``Ascend``
5950
5951    Examples:
5952        >>> import numpy as np
5953        >>> import mindspore.dataset as ds
5954        >>> import mindspore.dataset.vision as vision
5955        >>>
5956        >>> # Use the transform in dataset pipeline mode
5957        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
5958        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
5959        >>> transforms_list = [vision.Rescale(1.0 / 255.0, -1.0)]
5960        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
5961        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
5962        ...     print(item["image"].shape, item["image"].dtype)
5963        ...     break
5964        (100, 100, 3) float32
5965        >>>
5966        >>> # Use the transform in eager mode
5967        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
5968        >>> output = vision.Rescale(1.0 / 255.0, -1.0)(data)
5969        >>> print(output.shape, output.dtype)
5970        (100, 100, 3) float32
5971
5972    Tutorial Examples:
5973        - `Illustration of vision transforms
5974          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
5975    """
5976
5977    @check_rescale
5978    def __init__(self, rescale, shift):
5979        super().__init__()
5980        self.rescale = rescale
5981        self.shift = shift
5982        self.implementation = Implementation.C
5983
5984    def parse(self):
5985        return cde.RescaleOperation(self.rescale, self.shift)
5986
5987
5988class Resize(ImageTensorOperation, PyTensorOperation):
5989    """
5990    Resize the input image to the given size with a given interpolation mode :class:`~.vision.Inter` .
5991
5992    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
5993
5994    Args:
5995        size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive.
5996            If size is an integer, the smaller edge of the image will be resized to this value with
5997            the same image aspect ratio.
5998            If size is a sequence of length 2, it should be (height, width).
5999        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
6000            Default: ``Inter.LINEAR``.
6001
6002    Raises:
6003        TypeError: If `size` is not of type int or Sequence[int].
6004        TypeError: If `interpolation` is not of type :class:`~.vision.Inter` .
6005        ValueError: If `size` is not positive.
6006        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
6007
6008    Supported Platforms:
6009        ``CPU`` ``Ascend``
6010
6011    Examples:
6012        >>> import numpy as np
6013        >>> import mindspore.dataset as ds
6014        >>> import mindspore.dataset.vision as vision
6015        >>> from mindspore.dataset.vision import Inter
6016        >>>
6017        >>> # Use the transform in dataset pipeline mode
6018        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6019        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6020        >>> resize_op = vision.Resize([100, 75], Inter.BICUBIC)
6021        >>> transforms_list = [resize_op]
6022        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
6023        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6024        ...     print(item["image"].shape, item["image"].dtype)
6025        ...     break
6026        (100, 75, 3) uint8
6027        >>>
6028        >>> # Use the transform in eager mode
6029        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6030        >>> output = vision.Resize([5, 5], Inter.BICUBIC)(data)
6031        >>> print(output.shape, output.dtype)
6032        (5, 5, 3) uint8
6033
6034    Tutorial Examples:
6035        - `Illustration of vision transforms
6036          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6037    """
6038
6039    @check_resize_interpolation
6040    def __init__(self, size, interpolation=Inter.LINEAR):
6041        super().__init__()
6042        self.py_size = size
6043        if isinstance(size, int):
6044            size = (size,)
6045        self.c_size = size
6046        self.interpolation = interpolation
6047        if interpolation in [Inter.AREA, Inter.PILCUBIC]:
6048            self.implementation = Implementation.C
6049        elif interpolation == Inter.ANTIALIAS:
6050            self.implementation = Implementation.PY
6051        self.random = False
6052
6053    @check_device_target
6054    def device(self, device_target="CPU"):
6055        """
6056        Set the device for the current operator execution.
6057
6058        - When the device is Ascend, input/output shape should be limited from [4, 6] to [32768, 32768].
6059
6060        Args:
6061            device_target (str, optional): The operator will be executed on this device. Currently supports
6062                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
6063
6064        Raises:
6065            TypeError: If `device_target` is not of type str.
6066            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
6067
6068        Supported Platforms:
6069            ``CPU`` ``Ascend``
6070
6071        Examples:
6072            >>> import numpy as np
6073            >>> import mindspore.dataset as ds
6074            >>> import mindspore.dataset.vision as vision
6075            >>> from mindspore.dataset.vision import Inter
6076            >>>
6077            >>> # Use the transform in dataset pipeline mode
6078            >>> resize_op = vision.Resize([100, 75], Inter.BICUBIC).device("Ascend")
6079            >>> transforms_list = [resize_op]
6080            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6081            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6082            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
6083            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6084            ...     print(item["image"].shape, item["image"].dtype)
6085            ...     break
6086            (100, 75, 3) uint8
6087            >>>
6088            >>> # Use the transform in eager mode
6089            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6090            >>> output = vision.Resize([25, 25], Inter.BICUBIC).device("Ascend")(data)
6091            >>> print(output.shape, output.dtype)
6092            (25, 25, 3) uint8
6093
6094        Tutorial Examples:
6095            - `Illustration of vision transforms
6096              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6097        """
6098        self.device_target = device_target
6099        if self.interpolation not in [Inter.BILINEAR, Inter.CUBIC, Inter.NEAREST] and self.device_target == "Ascend":
6100            raise RuntimeError("Invalid interpolation mode, only support BILINEAR, CUBIC and NEAREST.")
6101        return self
6102
6103    def parse(self):
6104        if self.interpolation == Inter.ANTIALIAS:
6105            raise TypeError("The current InterpolationMode is not supported with NumPy input.")
6106        return cde.ResizeOperation(self.c_size, Inter.to_c_type(self.interpolation), self.device_target)
6107
6108    def _execute_py(self, img):
6109        """
6110        Execute method.
6111
6112        Args:
6113            img (PIL Image): Image to be resized.
6114
6115        Returns:
6116            PIL Image, resized image.
6117        """
6118        if self.interpolation in [Inter.AREA, Inter.PILCUBIC]:
6119            raise TypeError("Current Interpolation is not supported with PIL input.")
6120        return util.resize(img, self.py_size, Inter.to_python_type(self.interpolation))
6121
6122
6123class ResizedCrop(ImageTensorOperation):
6124    """
6125    Crop the input image at a specific region and resize it to desired size.
6126
6127    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
6128
6129    Args:
6130        top (int): Horizontal ordinate of the upper left corner of the crop region.
6131        left (int): Vertical ordinate of the upper left corner of the crop region.
6132        height (int): Height of the crop region.
6133        width (int): Width of the cropp region.
6134        size (Union[int, Sequence[int, int]]): The size of the output image.
6135            If int is provided, the smaller edge of the image will be resized to this value,
6136            keeping the image aspect ratio the same.
6137            If Sequence[int, int] is provided, it should be (height, width).
6138        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
6139            Default: ``Inter.BILINEAR``.
6140
6141    Raises:
6142        TypeError: If `top` is not of type int.
6143        ValueError: If `top` is negative.
6144        TypeError: If `left` is not of type int.
6145        ValueError: If `left` is negative.
6146        TypeError: If `height` is not of type int.
6147        ValueError: If `height` is not positive.
6148        TypeError: If `width` is not of type int.
6149        ValueError: If `width` is not positive.
6150        TypeError: If `size` is not of type int or Sequence[int, int].
6151        ValueError: If `size` is not posotive.
6152        TypeError: If `interpolation` is not of type :class:`~.vision.Inter` .
6153        RuntimeError: If shape of the input image is not <H, W> or <H, W, C>.
6154
6155    Supported Platforms:
6156        ``CPU`` ``Ascend``
6157
6158    Examples:
6159        >>> import numpy as np
6160        >>> import mindspore.dataset as ds
6161        >>> import mindspore.dataset.vision as vision
6162        >>> from mindspore.dataset.vision import Inter
6163        >>>
6164        >>> # Use the transform in dataset pipeline mode
6165        >>> transforms_list = [vision.ResizedCrop(0, 0, 64, 64, (100, 75), Inter.BILINEAR)]
6166        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6167        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6168        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
6169        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6170        ...     print(item["image"].shape, item["image"].dtype)
6171        ...     break
6172        (100, 75, 3) uint8
6173        >>>
6174        >>> # Use the transform in eager mode
6175        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6176        >>> output = vision.ResizedCrop(0, 0, 1, 1, (5, 5), Inter.BILINEAR)(data)
6177        >>> print(output.shape, output.dtype)
6178        (5, 5, 3) uint8
6179
6180    Tutorial Examples:
6181        - `Illustration of vision transforms
6182          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6183    """
6184
6185    @check_resized_crop
6186    def __init__(self, top, left, height, width, size, interpolation=Inter.BILINEAR):
6187        super().__init__()
6188        if isinstance(size, int):
6189            size = (size, size)
6190
6191        self.top = top
6192        self.left = left
6193        self.height = height
6194        self.width = width
6195        self.size = size
6196        self.interpolation = interpolation
6197        self.implementation = Implementation.C
6198
6199    @check_device_target
6200    def device(self, device_target="CPU"):
6201        """
6202        Set the device for the current operator execution.
6203
6204        - When the device is Ascend, input type supports `uint8` and `float32`,
6205          input channel supports 1 and 3. The input data has a height limit of [4, 32768]
6206          and a width limit of [6, 32768].
6207
6208        Args:
6209            device_target (str, optional): The operator will be executed on this device. Currently supports
6210                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
6211
6212        Raises:
6213            TypeError: If `device_target` is not of type str.
6214            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
6215
6216        Supported Platforms:
6217            ``CPU`` ``Ascend``
6218
6219        Examples:
6220            >>> import numpy as np
6221            >>> import mindspore.dataset as ds
6222            >>> import mindspore.dataset.vision as vision
6223            >>> from mindspore.dataset.vision import Inter
6224            >>>
6225            >>> # Use the transform in dataset pipeline mode
6226            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6227            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6228            >>> resize_crop_op = vision.ResizedCrop(0, 0, 64, 64, (100, 75)).device("Ascend")
6229            >>> transforms_list = [resize_crop_op]
6230            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
6231            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6232            ...     print(item["image"].shape, item["image"].dtype)
6233            ...     break
6234            (100, 75, 3) uint8
6235            >>>
6236            >>> # Use the transform in eager mode
6237            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6238            >>> output = vision.ResizedCrop(0, 0, 64, 64, (32, 16), Inter.BILINEAR).device("Ascend")(data)
6239            >>> print(output.shape, output.dtype)
6240            (32, 16, 3) uint8
6241
6242        Tutorial Examples:
6243            - `Illustration of vision transforms
6244              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6245        """
6246        self.device_target = device_target
6247        if self.interpolation not in [Inter.BILINEAR, Inter.CUBIC, Inter.NEAREST] and self.device_target == "Ascend":
6248            raise RuntimeError("Invalid interpolation mode, only support BILINEAR, CUBIC and NEAREST.")
6249        return self
6250
6251    def parse(self):
6252        return cde.ResizedCropOperation(self.top, self.left, self.height,
6253                                        self.width, self.size, Inter.to_c_type(self.interpolation), self.device_target)
6254
6255
6256class ResizeWithBBox(ImageTensorOperation):
6257    """
6258    Resize the input image to the given size and adjust bounding boxes accordingly.
6259
6260    Args:
6261        size (Union[int, Sequence[int]]): The output size of the resized image.
6262            If size is an integer, smaller edge of the image will be resized to this value with
6263            the same image aspect ratio.
6264            If size is a sequence of length 2, it should be (height, width).
6265        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
6266            Default: ``Inter.LINEAR``.
6267
6268    Raises:
6269        TypeError: If `size` is not of type int or Sequence[int].
6270        TypeError: If `interpolation` is not of type :class:`~.vision.Inter` .
6271        ValueError: If `size` is not positive.
6272        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
6273
6274    Supported Platforms:
6275        ``CPU``
6276
6277    Examples:
6278        >>> import numpy as np
6279        >>> import mindspore.dataset as ds
6280        >>> import mindspore.dataset.vision as vision
6281        >>> from mindspore.dataset.vision import Inter
6282        >>>
6283        >>> # Use the transform in dataset pipeline mode
6284        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
6285        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6286        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32))
6287        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func],
6288        ...                                                 input_columns=["image"],
6289        ...                                                 output_columns=["image", "bbox"])
6290        >>> bbox_op = vision.ResizeWithBBox(50, Inter.NEAREST)
6291        >>> transforms_list = [bbox_op]
6292        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image", "bbox"])
6293        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6294        ...     print(item["image"].shape, item["image"].dtype)
6295        ...     print(item["bbox"].shape, item["bbox"].dtype)
6296        ...     break
6297        (50, 50, 3) float32
6298        (1, 4) float32
6299        >>>
6300        >>> # Use the transform in eager mode
6301        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32)
6302        >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype))
6303        >>> func_data, func_bboxes = func(data)
6304        >>> output = vision.ResizeWithBBox(100)(func_data, func_bboxes)
6305        >>> print(output[0].shape, output[0].dtype)
6306        (100, 100, 3) float32
6307        >>> print(output[1].shape, output[1].dtype)
6308        (1, 4) float32
6309
6310    Tutorial Examples:
6311        - `Illustration of vision transforms
6312          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6313    """
6314
6315    @check_resize_interpolation
6316    def __init__(self, size, interpolation=Inter.LINEAR):
6317        super().__init__()
6318        self.size = size
6319        self.interpolation = interpolation
6320        self.implementation = Implementation.C
6321
6322    def parse(self):
6323        size = self.size
6324        if isinstance(size, int):
6325            size = (size,)
6326        return cde.ResizeWithBBoxOperation(size, Inter.to_c_type(self.interpolation))
6327
6328
6329class RgbToHsv(PyTensorOperation):
6330    """
6331    Convert the input numpy.ndarray images from RGB to HSV.
6332
6333    Args:
6334        is_hwc (bool): If ``True``, means the input image is in shape of <H, W, C> or <N, H, W, C>.
6335            Otherwise, it is in shape of <C, H, W> or <N, C, H, W>. Default: ``False``.
6336
6337    Raises:
6338        TypeError: If `is_hwc` is not of type bool.
6339
6340    Supported Platforms:
6341        ``CPU``
6342
6343    Examples:
6344        >>> import numpy as np
6345        >>> import mindspore.dataset as ds
6346        >>> import mindspore.dataset.vision as vision
6347        >>> from mindspore.dataset.transforms import Compose
6348        >>>
6349        >>> # Use the transform in dataset pipeline mode
6350        >>> transforms_list = Compose([vision.CenterCrop(20),
6351        ...                            vision.ToTensor(),
6352        ...                            vision.RgbToHsv()])
6353        >>> # apply the transform to dataset through map function
6354        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6355        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6356        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
6357        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6358        ...     print(item["image"].shape, item["image"].dtype)
6359        ...     break
6360        (3, 20, 20) float64
6361        >>>
6362        >>> # Use the transform in eager mode
6363        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6364        >>> output = vision.RgbToHsv(is_hwc=True)(data)
6365        >>> print(output.shape, output.dtype)
6366        (100, 100, 3) float64
6367
6368    Tutorial Examples:
6369        - `Illustration of vision transforms
6370          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6371    """
6372
6373    @check_rgb_to_hsv
6374    def __init__(self, is_hwc=False):
6375        super().__init__()
6376        self.is_hwc = is_hwc
6377        self.random = False
6378        self.implementation = Implementation.PY
6379
6380    def _execute_py(self, rgb_imgs):
6381        """
6382        Execute method.
6383
6384        Args:
6385            rgb_imgs (numpy.ndarray): RGB images to be converted.
6386
6387        Returns:
6388            numpy.ndarray, converted HSV images.
6389        """
6390        return util.rgb_to_hsvs(rgb_imgs, self.is_hwc)
6391
6392
6393class Rotate(ImageTensorOperation):
6394    """
6395    Rotate the input image by specified degrees.
6396
6397    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
6398
6399    Args:
6400        degrees (Union[int, float]): Rotation degrees.
6401        resample (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
6402            Default: ``Inter.NEAREST``.
6403        expand (bool, optional):  Optional expansion flag. Default: ``False``. If set to ``True``,
6404            expand the output image to make it large enough to hold the entire rotated image.
6405            If set to ``False`` or omitted, make the output image the same size as the input.
6406            Note that the expand flag assumes rotation around the center and no translation.
6407        center (tuple, optional): Optional center of rotation (a 2-tuple). Default: ``None``.
6408            Origin is the top left corner. ``None`` sets to the center of the image.
6409        fill_value (Union[int, tuple[int]], optional): Optional fill color for the area outside the rotated image.
6410            If it is a 3-tuple, it is used to fill R, G, B channels respectively.
6411            If it is an integer, it is used for all RGB channels.
6412            The fill_value values must be in range [0, 255]. Default: ``0``.
6413
6414    Raises:
6415        TypeError: If `degrees` is not of type integer, float or sequence.
6416        TypeError: If `resample` is not of type :class:`~.vision.Inter` .
6417        TypeError: If `expand` is not of type bool.
6418        TypeError: If `center` is not of type tuple.
6419        TypeError: If `fill_value` is not of type int or tuple[int].
6420        ValueError: If `fill_value` is not in range [0, 255].
6421        RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>.
6422
6423    Supported Platforms:
6424        ``CPU`` ``Ascend``
6425
6426    Examples:
6427        >>> import numpy as np
6428        >>> import mindspore.dataset as ds
6429        >>> import mindspore.dataset.vision as vision
6430        >>> from mindspore.dataset.vision import Inter
6431        >>>
6432        >>> # Use the transform in dataset pipeline mode
6433        >>> transforms_list = [vision.Rotate(degrees=30.0, resample=Inter.NEAREST, expand=True)]
6434        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6435        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6436        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
6437        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6438        ...     print(item["image"].shape, item["image"].dtype)
6439        ...     break
6440        (137, 137, 3) uint8
6441        >>>
6442        >>> # Use the transform in eager mode
6443        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6444        >>> output = vision.Rotate(degrees=30.0, resample=Inter.NEAREST, expand=True)(data)
6445        >>> print(output.shape, output.dtype)
6446        (137, 137, 3) uint8
6447
6448    Tutorial Examples:
6449        - `Illustration of vision transforms
6450          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6451    """
6452
6453    @check_rotate
6454    def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0):
6455        super().__init__()
6456        if isinstance(degrees, (int, float)):
6457            degrees = degrees % 360
6458        if center is None:
6459            center = ()
6460        if isinstance(fill_value, int):
6461            fill_value = tuple([fill_value] * 3)
6462        self.degrees = degrees
6463        self.resample = resample
6464        self.expand = expand
6465        self.center = center
6466        self.fill_value = fill_value
6467        self.implementation = Implementation.C
6468
6469    @check_device_target
6470    def device(self, device_target="CPU"):
6471        """
6472        Set the device for the current operator execution.
6473
6474        - When the device is Ascend, input type supports  `uint8`/`float32`, input channel supports 1 and 3.
6475          The input data has a height limit of [4, 8192] and a width limit of [6, 4096].
6476        - When the device is Ascend and `expand` is True, `center` does not take effect
6477          and the image is rotated according to the center of the image.
6478
6479        Args:
6480            device_target (str, optional): The operator will be executed on this device. Currently supports
6481                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
6482
6483        Raises:
6484            TypeError: If `device_target` is not of type str.
6485            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
6486
6487        Supported Platforms:
6488            ``CPU`` ``Ascend``
6489
6490        Examples:
6491            >>> import numpy as np
6492            >>> import mindspore.dataset as ds
6493            >>> import mindspore.dataset.vision as vision
6494            >>> from mindspore.dataset.vision import Inter
6495            >>>
6496            >>> # Use the transform in dataset pipeline mode
6497            >>> data = np.random.randint(0, 255, size=(1, 300, 400, 3)).astype(np.uint8)
6498            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6499            >>> rotate_op = vision.Rotate(degrees=90.0, resample=Inter.NEAREST, expand=True).device("Ascend")
6500            >>> transforms_list = [rotate_op]
6501            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
6502            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6503            ...     print(item["image"].shape, item["image"].dtype)
6504            ...     break
6505            (400, 300, 3) uint8
6506            >>>
6507            >>> # Use the transform in eager mode
6508            >>> data = np.random.randint(0, 255, size=(300, 400, 3)).astype(np.uint8)
6509            >>> output = vision.Rotate(degrees=90.0, resample=Inter.NEAREST, expand=True).device("Ascend")(data)
6510            >>> print(output.shape, output.dtype)
6511            (400, 300, 3) uint8
6512
6513        Tutorial Examples:
6514            - `Illustration of vision transforms
6515              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6516        """
6517        self.device_target = device_target
6518        if self.resample not in [Inter.BILINEAR, Inter.NEAREST] and self.device_target == "Ascend":
6519            raise RuntimeError("Invalid interpolation mode, only support BILINEAR and NEAREST.")
6520        return self
6521
6522    def parse(self):
6523        return cde.RotateOperation(self.degrees, Inter.to_c_type(self.resample), self.expand, self.center,
6524                                   self.fill_value, self.device_target)
6525
6526
6527class SlicePatches(ImageTensorOperation):
6528    r"""
6529    Slice Tensor to multiple patches in horizontal and vertical directions.
6530
6531    The usage scenario is suitable to large height and width Tensor. The Tensor
6532    will keep the same if set both num_height and num_width to 1. And the
6533    number of output tensors is equal to :math:`num\_height * num\_width`.
6534
6535    Args:
6536        num_height (int, optional): The number of patches in vertical direction, which must be positive. Default: ``1``.
6537        num_width (int, optional): The number of patches in horizontal direction, which must be positive.
6538            Default: ``1``.
6539        slice_mode (SliceMode, optional): A mode represents pad or drop. Default: ``SliceMode.PAD``.
6540            It can be ``SliceMode.PAD``, ``SliceMode.DROP``.
6541        fill_value (int, optional): The border width in number of pixels in
6542            right and bottom direction if slice_mode is set to be SliceMode.PAD.
6543            The `fill_value` must be in range [0, 255]. Default: ``0``.
6544
6545    Raises:
6546        TypeError: If `num_height` is not of type integer.
6547        TypeError: If `num_width` is not of type integer.
6548        TypeError: If `slice_mode` is not of type Inter.
6549        TypeError: If `fill_value` is not of type integer.
6550        ValueError: If `num_height` is not positive.
6551        ValueError: If `num_width` is not positive.
6552        ValueError: If `fill_value` is not in range [0, 255].
6553        RuntimeError: If given tensor shape is not <H, W> or <H, W, C>.
6554
6555    Supported Platforms:
6556        ``CPU``
6557
6558    Examples:
6559        >>> import numpy as np
6560        >>> import mindspore.dataset as ds
6561        >>> import mindspore.dataset.vision as vision
6562        >>>
6563        >>> # Use the transform in dataset pipeline mode
6564        >>> # default padding mode
6565        >>> num_h, num_w = (1, 4)
6566        >>> slice_patches_op = vision.SlicePatches(num_h, num_w)
6567        >>> transforms_list = [slice_patches_op]
6568        >>> cols = ['img' + str(x) for x in range(num_h*num_w)]
6569        >>>
6570        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6571        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6572        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list,
6573        ...                                                 input_columns=["image"],
6574        ...                                                 output_columns=cols)
6575        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6576        ...     print(len(item), item["img0"].shape, item["img0"].dtype)
6577        ...     break
6578        4 (100, 25, 3) uint8
6579        >>>
6580        >>> # Use the transform in eager mode
6581        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6582        >>> output = vision.SlicePatches(1, 2)(data)
6583        >>> print(np.array(output).shape, np.array(output).dtype)
6584        (2, 100, 50, 3) uint8
6585
6586    Tutorial Examples:
6587        - `Illustration of vision transforms
6588          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6589    """
6590
6591    @check_slice_patches
6592    def __init__(self, num_height=1, num_width=1, slice_mode=SliceMode.PAD, fill_value=0):
6593        super().__init__()
6594        self.num_height = num_height
6595        self.num_width = num_width
6596        self.slice_mode = slice_mode
6597        self.fill_value = fill_value
6598        self.implementation = Implementation.C
6599
6600    def parse(self):
6601        return cde.SlicePatchesOperation(self.num_height, self.num_width,
6602                                         SliceMode.to_c_type(self.slice_mode), self.fill_value)
6603
6604
6605class Solarize(ImageTensorOperation):
6606    """
6607    Solarize the image by inverting all pixel values within the threshold.
6608
6609    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
6610
6611    Args:
6612        threshold (Union[float, Sequence[float, float]]): Range of solarize threshold, should always
6613            be in (min, max) format, where min and max are integers in range of [0, 255], and min <= max.
6614            The pixel values belonging to the [min, max] range will be inverted.
6615            If a single value is provided or min=max, then invert all pixel values greater than or equal min(max).
6616
6617    Raises:
6618        TypeError: If `threshold` is not of type float or Sequence[float, float].
6619        ValueError: If `threshold` is not in range of [0, 255].
6620
6621    Supported Platforms:
6622        ``CPU`` ``Ascend``
6623
6624    Examples:
6625        >>> import numpy as np
6626        >>> import mindspore.dataset as ds
6627        >>> import mindspore.dataset.vision as vision
6628        >>>
6629        >>> # Use the transform in dataset pipeline mode
6630        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6631        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6632        >>> transforms_list = [vision.Solarize(threshold=(10, 100))]
6633        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
6634        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6635        ...     print(item["image"].shape, item["image"].dtype)
6636        ...     break
6637        (100, 100, 3) uint8
6638        >>>
6639        >>> # Use the transform in eager mode
6640        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6641        >>> output = vision.Solarize(threshold=(1, 10))(data)
6642        >>> print(output.shape, output.dtype)
6643        (100, 100, 3) uint8
6644
6645    Tutorial Examples:
6646        - `Illustration of vision transforms
6647          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6648    """
6649
6650    @check_solarize
6651    def __init__(self, threshold):
6652        super().__init__()
6653        if isinstance(threshold, (float, int)):
6654            threshold = (threshold, threshold)
6655        self.threshold = threshold
6656        self.implementation = Implementation.C
6657
6658    @check_device_target
6659    def device(self, device_target="CPU"):
6660        """
6661        Set the device for the current operator execution.
6662
6663        - When the device is Ascend, input type only supports `uint8` , input channel supports 1 and 3.
6664          The input data has a height limit of [4, 8192] and a width limit of [6, 4096].
6665
6666        Args:
6667            device_target (str, optional): The operator will be executed on this device. Currently supports
6668                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
6669
6670        Raises:
6671            TypeError: If `device_target` is not of type str.
6672            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
6673
6674        Supported Platforms:
6675            ``CPU`` ``Ascend``
6676
6677        Examples:
6678            >>> import numpy as np
6679            >>> import mindspore.dataset as ds
6680            >>> import mindspore.dataset.vision as vision
6681            >>>
6682            >>> # Use the transform in dataset pipeline mode
6683            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6684            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6685            >>> solarize_op = vision.Solarize(threshold=(10, 100)).device("Ascend")
6686            >>> transforms_list = [solarize_op]
6687            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
6688            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6689            ...     print(item["image"].shape, item["image"].dtype)
6690            ...     break
6691            (100, 100, 3) uint8
6692            >>>
6693            >>> # Use the transform in eager mode
6694            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6695            >>> output = vision.Solarize(threshold=(10, 100)).device("Ascend")(data)
6696            >>> print(output.shape, output.dtype)
6697            (100, 100, 3) uint8
6698
6699        Tutorial Examples:
6700            - `Illustration of vision transforms
6701              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6702        """
6703        self.device_target = device_target
6704        return self
6705
6706    def parse(self):
6707        return cde.SolarizeOperation(self.threshold, self.device_target)
6708
6709
6710class TenCrop(PyTensorOperation):
6711    """
6712    Crop the given image into one central crop and four corners with the flipped version of these.
6713
6714    Args:
6715        size (Union[int, Sequence[int, int]]): The size of the cropped image.
6716            If a single integer is provided, a square of size (size, size) will be cropped with this value.
6717            If a sequence of length 2 is provided, an image of size (height, width) will be cropped.
6718        use_vertical_flip (bool, optional): If ``True``, flip the images vertically. Otherwise, flip them
6719            horizontally. Default: ``False``.
6720
6721    Raises:
6722        TypeError: If `size` is not of type integer or sequence of integer.
6723        TypeError: If `use_vertical_flip` is not of type boolean.
6724        ValueError: If `size` is not positive.
6725
6726    Supported Platforms:
6727        ``CPU``
6728
6729    Examples:
6730        >>> import os
6731        >>> import numpy as np
6732        >>> from PIL import Image, ImageDraw
6733        >>> import mindspore.dataset as ds
6734        >>> import mindspore.dataset.vision as vision
6735        >>> from mindspore.dataset.transforms import Compose
6736        >>>
6737        >>> # Use the transform in dataset pipeline mode
6738        >>> class MyDataset:
6739        ...     def __init__(self):
6740        ...         self.data = []
6741        ...         img = Image.new("RGB", (300, 300), (255, 255, 255))
6742        ...         draw = ImageDraw.Draw(img)
6743        ...         draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5)
6744        ...         img.save("./1.jpg")
6745        ...         data = np.fromfile("./1.jpg", np.uint8)
6746        ...         self.data.append(data)
6747        ...
6748        ...     def __getitem__(self, index):
6749        ...         return self.data[0]
6750        ...
6751        ...     def __len__(self):
6752        ...         return 5
6753        >>>
6754        >>> my_dataset = MyDataset()
6755        >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image")
6756        >>> transforms_list = Compose([vision.Decode(to_pil=True),
6757        ...                            vision.TenCrop(size=200),
6758        ...                            # 4D stack of 10 images
6759        ...                            lambda *images: np.stack([vision.ToTensor()(image) for image in images])])
6760        >>> # apply the transform to dataset through map function
6761        >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image")
6762        >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6763        ...     print(item["image"].shape, item["image"].dtype)
6764        ...     break
6765        (10, 3, 200, 200) float32
6766        >>> os.remove("./1.jpg")
6767        >>>
6768        >>> # Use the transform in eager mode
6769        >>> img = Image.new("RGB", (300, 300), (255, 255, 255))
6770        >>> draw = ImageDraw.Draw(img)
6771        >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0))
6772        >>> img.save("./2.jpg")
6773        >>> data = Image.open("./2.jpg")
6774        >>> output = vision.TenCrop(size=200)(data)
6775        >>> print(len(output), np.array(output[0]).shape, np.array(output[0]).dtype)
6776        10 (200, 200, 3) uint8
6777        >>> os.remove("./2.jpg")
6778
6779    Tutorial Examples:
6780        - `Illustration of vision transforms
6781          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6782    """
6783
6784    @check_ten_crop
6785    def __init__(self, size, use_vertical_flip=False):
6786        super().__init__()
6787        if isinstance(size, int):
6788            size = (size, size)
6789        self.size = size
6790        self.use_vertical_flip = use_vertical_flip
6791        self.random = False
6792        self.implementation = Implementation.PY
6793
6794    def _execute_py(self, img):
6795        """
6796        Execute method.
6797
6798        Args:
6799            img (PIL Image): Image to be cropped.
6800
6801        Returns:
6802            tuple, a tuple of 10 PIL Image, in order of top_left, top_right, bottom_left, bottom_right, center
6803                of the original image and top_left, top_right, bottom_left, bottom_right, center of the flipped image.
6804        """
6805        return util.ten_crop(img, self.size, self.use_vertical_flip)
6806
6807
6808class ToNumpy(PyTensorOperation):
6809    """
6810    Convert the PIL input image to numpy.ndarray image.
6811
6812    Supported Platforms:
6813        ``CPU``
6814
6815    Examples:
6816        >>> import numpy as np
6817        >>> import mindspore.dataset as ds
6818        >>> import mindspore.dataset.vision as vision
6819        >>> from mindspore.dataset.transforms import Compose
6820        >>>
6821        >>> # Use the transform in dataset pipeline mode
6822        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6823        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6824        >>> # Use ToNumpy to explicitly select C++ implementation of subsequent op
6825        >>> transforms_list = Compose([vision.RandomHorizontalFlip(0.5),
6826        ...                            vision.ToNumpy(),
6827        ...                            vision.Resize((50, 60))])
6828        >>> # apply the transform to dataset through map function
6829        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
6830        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6831        ...     print(item["image"].shape, item["image"].dtype)
6832        ...     break
6833        (50, 60, 3) uint8
6834        >>>
6835        >>> # Use the transform in eager mode
6836        >>> data = list(np.random.randint(0, 255, size=(32, 32, 3, 3)).astype(np.int32))
6837        >>> output = vision.ToNumpy()(data)
6838        >>> print(type(output), output.shape, output.dtype)
6839        <class 'numpy.ndarray'> (32, 32, 3, 3) int32
6840
6841    Tutorial Examples:
6842        - `Illustration of vision transforms
6843          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6844    """
6845
6846    def __init__(self):
6847        super().__init__()
6848        self.random = False
6849        # Use "Implementation.C" to indicate to select C++ implementation for next op in transforms list
6850        self.implementation = Implementation.C
6851
6852    def _execute_py(self, img):
6853        """
6854        Execute method.
6855
6856        Args:
6857            img (PIL Image): Image to be converted to numpy.ndarray.
6858
6859        Returns:
6860            Image converted to numpy.ndarray
6861        """
6862        return np.array(img)
6863
6864
6865class ToPIL(PyTensorOperation):
6866    """
6867    Convert the input decoded numpy.ndarray image to PIL Image.
6868
6869    Raises:
6870        TypeError: If the input image is not of type :class:`numpy.ndarray` or `PIL.Image.Image` .
6871
6872    Supported Platforms:
6873        ``CPU``
6874
6875    Examples:
6876        >>> import numpy as np
6877        >>> import mindspore.dataset as ds
6878        >>> import mindspore.dataset.vision as vision
6879        >>> from mindspore.dataset.transforms import Compose
6880        >>>
6881        >>> # Use the transform in dataset pipeline mode
6882        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6883        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6884        >>> # data is already decoded, but not in PIL Image format
6885        >>> transforms_list = Compose([vision.ToPIL(),
6886        ...                            vision.RandomHorizontalFlip(0.5),
6887        ...                            vision.ToTensor()])
6888        >>> # apply the transform to dataset through map function
6889        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
6890        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6891        ...     print(item["image"].shape, item["image"].dtype)
6892        ...     break
6893        (3, 100, 100) float32
6894        >>>
6895        >>> # Use the transform in eager mode
6896        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6897        >>> output = vision.ToPIL()(data)
6898        >>> print(type(output), np.array(output).shape, np.array(output).dtype)
6899        <class 'PIL.Image.Image'> (100, 100, 3) uint8
6900
6901    Tutorial Examples:
6902        - `Illustration of vision transforms
6903          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6904    """
6905
6906    def __init__(self):
6907        super().__init__()
6908        self.random = False
6909        self.implementation = Implementation.PY
6910
6911    def _execute_py(self, img):
6912        """
6913        Execute method.
6914
6915        Args:
6916            img (numpy.ndarray): Decoded numpy.ndarray image to be converted to PIL Image.
6917
6918        Returns:
6919            PIL Image, converted PIL Image.
6920        """
6921        return util.to_pil(img)
6922
6923
6924class ToTensor(ImageTensorOperation):
6925    """
6926    Convert the input PIL Image or numpy.ndarray to numpy.ndarray of the desired dtype, rescale the pixel value
6927    range from [0, 255] to [0.0, 1.0] and change the shape from <H, W, C> to <C, H, W>.
6928
6929    Args:
6930        output_type (Union[mindspore.dtype, numpy.dtype], optional): The desired dtype of the output image.
6931            Default: ``np.float32`` .
6932
6933    Raises:
6934        TypeError: If the input image is not of type `PIL.Image.Image` or :class:`numpy.ndarray` .
6935        TypeError: If dimension of the input image is not 2 or 3.
6936
6937    Supported Platforms:
6938        ``CPU``
6939
6940    Examples:
6941        >>> import numpy as np
6942        >>> import mindspore.dataset as ds
6943        >>> import mindspore.dataset.vision as vision
6944        >>> from mindspore.dataset.transforms import Compose
6945        >>>
6946        >>> # Use the transform in dataset pipeline mode
6947        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
6948        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
6949        >>> # create a list of transformations to be applied to the "image" column of each data row
6950        >>> transforms_list = Compose([vision.RandomHorizontalFlip(0.5),
6951        ...                            vision.ToTensor()])
6952        >>> # apply the transform to dataset through map function
6953        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
6954        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
6955        ...     print(item["image"].shape, item["image"].dtype)
6956        ...     break
6957        (3, 100, 100) float32
6958        >>>
6959        >>> # Use the transform in eager mode
6960        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
6961        >>> output = vision.ToTensor()(data)
6962        >>> print(output.shape, output.dtype)
6963        (3, 100, 100) float32
6964
6965    Tutorial Examples:
6966        - `Illustration of vision transforms
6967          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
6968    """
6969
6970    @check_to_tensor
6971    def __init__(self, output_type=np.float32):
6972        super().__init__()
6973        if isinstance(output_type, typing.Type):
6974            output_type = mstype_to_detype(output_type)
6975        else:
6976            output_type = nptype_to_detype(output_type)
6977        self.output_type = str(output_type)
6978        self.random = False
6979        self.implementation = Implementation.C
6980
6981    def parse(self):
6982        return cde.ToTensorOperation(self.output_type)
6983
6984
6985class ToType(TypeCast):
6986    """
6987    Cast the input to a given MindSpore data type or NumPy data type.
6988
6989    It is the same as that of :class:`mindspore.dataset.transforms.TypeCast` .
6990
6991    Note:
6992        This operation is executed on the CPU by default, but it is also supported
6993        to be executed on the GPU or Ascend via heterogeneous acceleration.
6994
6995    Args:
6996        data_type (Union[mindspore.dtype, numpy.dtype]): The desired data type of the output image,
6997            such as ``numpy.float32`` .
6998
6999    Raises:
7000        TypeError: If `data_type` is not of type :class:`mindspore.dtype` or :class:`numpy.dtype` .
7001
7002    Supported Platforms:
7003        ``CPU`` ``GPU`` ``Ascend``
7004
7005    Examples:
7006        >>> import numpy as np
7007        >>> import mindspore.dataset as ds
7008        >>> import mindspore.dataset.vision as vision
7009        >>> import numpy as np
7010        >>> from mindspore.dataset.transforms import Compose
7011        >>>
7012        >>> # Use the transform in dataset pipeline mode
7013        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
7014        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
7015        >>> transforms_list = Compose([vision.RandomHorizontalFlip(0.5),
7016        ...                            vision.ToTensor(),
7017        ...                            vision.ToType(np.float32)])
7018        >>> # apply the transform to dataset through map function
7019        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
7020        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
7021        ...     print(item["image"].shape, item["image"].dtype)
7022        ...     break
7023        (3, 100, 100) float32
7024        >>>
7025        >>> # Use the transform in eager mode
7026        >>> data = np.array([2.71606445312564e-03, 6.3476562564e-03]).astype(np.float64)
7027        >>> output = vision.ToType(np.float32)(data)
7028        >>> print(output, output.dtype)
7029        [0.00271606 0.00634766] float32
7030
7031    Tutorial Examples:
7032        - `Illustration of vision transforms
7033          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
7034    """
7035
7036
7037class TrivialAugmentWide(ImageTensorOperation):
7038    """
7039    Apply TrivialAugmentWide data augmentation method on the input image.
7040
7041    Refer to
7042    `TrivialAugmentWide: Tuning-free Yet State-of-the-Art Data Augmentation <https://arxiv.org/abs/2103.10158>`_ .
7043
7044    Only support 3-channel RGB image.
7045
7046    Args:
7047        num_magnitude_bins (int, optional): The number of different magnitude values,
7048            must be greater than or equal to 2. Default: ``31``.
7049        interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` .
7050            Default: ``Inter.NEAREST``.
7051        fill_value (Union[int, tuple[int, int, int]], optional): Pixel fill value for the area outside the
7052            transformed image, must be in range of [0, 255]. Default: ``0``.
7053            If int is provided, pad all RGB channels with this value.
7054            If tuple[int, int, int] is provided, pad R, G, B channels respectively.
7055
7056    Raises:
7057        TypeError: If `num_magnitude_bins` is not of type int.
7058        ValueError: If `num_magnitude_bins` is less than 2.
7059        TypeError: If `interpolation` not of type :class:`~.vision.Inter` .
7060        TypeError: If `fill_value` is not of type int or tuple[int, int, int].
7061        ValueError: If `fill_value` is not in range of [0, 255].
7062        RuntimeError: If shape of the input image is not <H, W, C>.
7063
7064    Supported Platforms:
7065        ``CPU``
7066
7067    Examples:
7068        >>> import numpy as np
7069        >>> import mindspore.dataset as ds
7070        >>> import mindspore.dataset.vision as vision
7071        >>> from mindspore.dataset.vision import Inter
7072        >>>
7073        >>> # Use the transform in dataset pipeline mode
7074        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
7075        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
7076        >>> transforms_list = [vision.TrivialAugmentWide(num_magnitude_bins=31,
7077        ...                                              interpolation=Inter.NEAREST,
7078        ...                                              fill_value=0)]
7079        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
7080        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
7081        ...     print(item["image"].shape, item["image"].dtype)
7082        ...     break
7083        (100, 100, 3) uint8
7084        >>>
7085        >>> # Use the transform in eager mode
7086        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
7087        >>> output = vision.TrivialAugmentWide()(data)
7088        >>> print(output.shape, output.dtype)
7089        (100, 100, 3) uint8
7090
7091    Tutorial Examples:
7092        - `Illustration of vision transforms
7093          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
7094    """
7095
7096    @check_trivial_augment_wide
7097    def __init__(self, num_magnitude_bins=31, interpolation=Inter.NEAREST, fill_value=0):
7098        super().__init__()
7099        self.num_magnitude_bins = num_magnitude_bins
7100        self.interpolation = interpolation
7101        if isinstance(fill_value, int):
7102            fill_value = tuple([fill_value] * 3)
7103        self.fill_value = fill_value
7104        self.implementation = Implementation.C
7105
7106    def parse(self):
7107        return cde.TrivialAugmentWideOperation(self.num_magnitude_bins, Inter.to_c_type(self.interpolation),
7108                                               self.fill_value)
7109
7110
7111class UniformAugment(CompoundOperation):
7112    """
7113    Uniformly select a number of transformations from a sequence and apply them
7114    sequentially and randomly, which means that there is a chance that a chosen
7115    transformation will not be applied.
7116
7117    All transformations in the sequence require the output type to be the same as
7118    the input. Thus, the latter one can deal with the output of the previous one.
7119
7120    Args:
7121         transforms (Sequence): Sequence of transformations to select from.
7122         num_ops (int, optional): Number of transformations to be sequentially and randomly applied.
7123            Default: ``2``.
7124
7125    Raises:
7126        TypeError: If `transforms` is not a sequence of data processing operations.
7127        TypeError: If `num_ops` is not of type integer.
7128        ValueError: If `num_ops` is not positive.
7129
7130    Supported Platforms:
7131        ``CPU``
7132
7133    Examples:
7134        >>> import numpy as np
7135        >>> import mindspore.dataset as ds
7136        >>> import mindspore.dataset.vision as vision
7137        >>> from mindspore.dataset.transforms import Compose
7138        >>>
7139        >>> # Use the transform in dataset pipeline mode
7140        >>> seed = ds.config.get_seed()
7141        >>> ds.config.set_seed(12345)
7142        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
7143        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
7144        >>> transform = [vision.CenterCrop(64),
7145        ...              vision.RandomColor(),
7146        ...              vision.RandomSharpness(),
7147        ...              vision.RandomRotation(30)]
7148        >>> transforms_list = Compose([vision.UniformAugment(transform),
7149        ...                            vision.ToTensor()])
7150        >>> # apply the transform to dataset through map function
7151        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image")
7152        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
7153        ...     print(item["image"].shape, item["image"].dtype)
7154        ...     break
7155        (3, 100, 100) float32
7156        >>>
7157        >>> # Use the transform in eager mode
7158        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
7159        >>> transform = [vision.RandomCrop(size=[20, 40], padding=[32, 32, 32, 32]),
7160        ...              vision.RandomCrop(size=[20, 40], padding=[32, 32, 32, 32])]
7161        >>> output = vision.UniformAugment(transform)(data)
7162        >>> print(output.shape, output.dtype)
7163        (20, 40, 3) uint8
7164        >>> ds.config.set_seed(seed)
7165
7166    Tutorial Examples:
7167        - `Illustration of vision transforms
7168          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
7169    """
7170
7171    @check_uniform_augment
7172    def __init__(self, transforms, num_ops=2):
7173        super().__init__(transforms)
7174        self.num_ops = num_ops
7175        self.random = True
7176
7177    def parse(self):
7178        operations = self.parse_transforms()
7179        return cde.UniformAugOperation(operations, self.num_ops)
7180
7181    def _execute_py(self, img):
7182        """
7183        Execute method.
7184
7185        Args:
7186            img (PIL Image): Image to be transformed.
7187
7188        Returns:
7189            PIL Image, transformed image.
7190        """
7191        return util.uniform_augment(img, self.transforms.copy(), self.num_ops)
7192
7193
7194class VerticalFlip(ImageTensorOperation):
7195    """
7196    Flip the input image vertically.
7197
7198    Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method.
7199
7200    Raises:
7201        RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>.
7202
7203    Supported Platforms:
7204        ``CPU`` ``Ascend``
7205
7206    Examples:
7207        >>> import numpy as np
7208        >>> import mindspore.dataset as ds
7209        >>> import mindspore.dataset.vision as vision
7210        >>>
7211        >>> # Use the transform in dataset pipeline mode
7212        >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
7213        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
7214        >>> transforms_list = [vision.VerticalFlip()]
7215        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
7216        >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
7217        ...     print(item["image"].shape, item["image"].dtype)
7218        ...     break
7219        (100, 100, 3) uint8
7220        >>>
7221        >>> # Use the transform in eager mode
7222        >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
7223        >>> output = vision.VerticalFlip()(data)
7224        >>> print(output.shape, output.dtype)
7225        (100, 100, 3) uint8
7226
7227    Tutorial Examples:
7228        - `Illustration of vision transforms
7229          <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
7230    """
7231
7232    def __init__(self):
7233        super().__init__()
7234        self.implementation = Implementation.C
7235
7236    @check_device_target
7237    def device(self, device_target="CPU"):
7238        """
7239        Set the device for the current operator execution.
7240
7241        - When the device is Ascend, input type supports `uint8` and `float32`,
7242          input channel supports 1 and 3. The input data has a height limit of [4, 8192]
7243          and a width limit of [6, 4096].
7244
7245        Args:
7246            device_target (str, optional): The operator will be executed on this device. Currently supports
7247                ``CPU`` and ``Ascend`` . Default: ``CPU`` .
7248
7249        Raises:
7250            TypeError: If `device_target` is not of type str.
7251            ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend'].
7252
7253        Supported Platforms:
7254            ``CPU`` ``Ascend``
7255
7256        Examples:
7257            >>> import numpy as np
7258            >>> import mindspore.dataset as ds
7259            >>> import mindspore.dataset.vision as vision
7260            >>>
7261            >>> # Use the transform in dataset pipeline mode
7262            >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8)
7263            >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"])
7264            >>> vertical_flip_op = vision.VerticalFlip().device("Ascend")
7265            >>> transforms_list = [vertical_flip_op]
7266            >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"])
7267            >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
7268            ...     print(item["image"].shape, item["image"].dtype)
7269            ...     break
7270            (100, 100, 3) uint8
7271            >>>
7272            >>> # Use the transform in eager mode
7273            >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8)
7274            >>> output = vision.VerticalFlip().device("Ascend")(data)
7275            >>> print(output.shape, output.dtype)
7276            (100, 100, 3) uint8
7277
7278        Tutorial Examples:
7279            - `Illustration of vision transforms
7280              <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_
7281        """
7282        self.device_target = device_target
7283        return self
7284
7285    def parse(self):
7286        return cde.VerticalFlipOperation(self.device_target)
7287
7288
7289def not_random(func):
7290    """
7291    Specify the function as "not random", i.e., it produces deterministic result.
7292    A Python function can only be cached after it is specified as "not random".
7293    """
7294    func.random = False
7295    return func
7296