• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019-2024 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""
15Interpolation Mode, Resampling Filters
16"""
17from enum import Enum, IntEnum
18from fractions import Fraction
19import numbers
20
21import numpy as np
22from PIL import Image
23
24import mindspore
25import mindspore._c_dataengine as cde
26
27# The following constants have been deprecated by Pillow since version 9.1.0
28if int(Image.__version__.split(".")[0]) > 9 or Image.__version__ >= "9.1.0":
29    FLIP_LEFT_RIGHT = Image.Transpose.FLIP_LEFT_RIGHT
30    FLIP_TOP_BOTTOM = Image.Transpose.FLIP_TOP_BOTTOM
31    PERSPECTIVE = Image.Transform.PERSPECTIVE
32    AFFINE = Image.Transform.AFFINE
33    NEAREST = Image.Resampling.NEAREST
34    ANTIALIAS = Image.Resampling.LANCZOS
35    LINEAR = Image.Resampling.BILINEAR
36    CUBIC = Image.Resampling.BICUBIC
37else:
38    FLIP_LEFT_RIGHT = Image.FLIP_LEFT_RIGHT
39    FLIP_TOP_BOTTOM = Image.FLIP_TOP_BOTTOM
40    PERSPECTIVE = Image.PERSPECTIVE
41    AFFINE = Image.AFFINE
42    NEAREST = Image.NEAREST
43    ANTIALIAS = Image.ANTIALIAS
44    LINEAR = Image.LINEAR
45    CUBIC = Image.CUBIC
46
47
48class AutoAugmentPolicy(str, Enum):
49    """
50    AutoAugment policy for different datasets.
51
52    Possible enumeration values are: ``AutoAugmentPolicy.IMAGENET``, ``AutoAugmentPolicy.CIFAR10``,
53    AutoAugmentPolicy.SVHN.
54
55    Each policy contains 25 pairs of augmentation operations. When using AutoAugment, each image is randomly
56    transformed with one of these operation pairs. Each pair has 2 different operations. The following shows
57    all of these augmentation operations, including operation names with their probabilities and random params.
58
59    - ``AutoAugmentPolicy.IMAGENET``: dataset auto augment policy for ImageNet.
60
61      .. code-block::
62
63          Augmentation operations pair:
64          [(("Posterize", 0.4, 8), ("Rotate", 0.6, 9)),        (("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)),
65           (("Equalize", 0.8, None), ("Equalize", 0.6, None)), (("Posterize", 0.6, 7), ("Posterize", 0.6, 6)),
66           (("Equalize", 0.4, None), ("Solarize", 0.2, 4)),    (("Equalize", 0.4, None), ("Rotate", 0.8, 8)),
67           (("Solarize", 0.6, 3), ("Equalize", 0.6, None)),    (("Posterize", 0.8, 5), ("Equalize", 1.0, None)),
68           (("Rotate", 0.2, 3), ("Solarize", 0.6, 8)),         (("Equalize", 0.6, None), ("Posterize", 0.4, 6)),
69           (("Rotate", 0.8, 8), ("Color", 0.4, 0)),            (("Rotate", 0.4, 9), ("Equalize", 0.6, None)),
70           (("Equalize", 0.0, None), ("Equalize", 0.8, None)), (("Invert", 0.6, None), ("Equalize", 1.0, None)),
71           (("Color", 0.6, 4), ("Contrast", 1.0, 8)),          (("Rotate", 0.8, 8), ("Color", 1.0, 2)),
72           (("Color", 0.8, 8), ("Solarize", 0.8, 7)),          (("Sharpness", 0.4, 7), ("Invert", 0.6, None)),
73           (("ShearX", 0.6, 5), ("Equalize", 1.0, None)),      (("Color", 0.4, 0), ("Equalize", 0.6, None)),
74           (("Equalize", 0.4, None), ("Solarize", 0.2, 4)),    (("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)),
75           (("Invert", 0.6, None), ("Equalize", 1.0, None)),   (("Color", 0.6, 4), ("Contrast", 1.0, 8)),
76           (("Equalize", 0.8, None), ("Equalize", 0.6, None))]
77
78    - ``AutoAugmentPolicy.CIFAR10``: dataset auto augment policy for Cifar10.
79
80      .. code-block::
81
82          Augmentation operations pair:
83          [(("Invert", 0.1, None), ("Contrast", 0.2, 6)),         (("Rotate", 0.7, 2), ("TranslateX", 0.3, 9)),
84           (("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),         (("ShearY", 0.5, 8), ("TranslateY", 0.7, 9)),
85           (("AutoContrast", 0.5, None), ("Equalize", 0.9, None)), (("ShearY", 0.2, 7), ("Posterize", 0.3, 7)),
86           (("Color", 0.4, 3), ("Brightness", 0.6, 7)),            (("Sharpness", 0.3, 9), ("Brightness", 0.7, 9)),
87           (("Equalize", 0.6, None), ("Equalize", 0.5, None)),     (("Contrast", 0.6, 7), ("Sharpness", 0.6, 5)),
88           (("Color", 0.7, 7), ("TranslateX", 0.5, 8)),            (("Equalize", 0.8, None), ("Invert", 0.1, None)),
89           (("TranslateY", 0.4, 3), ("Sharpness", 0.2, 6)),        (("Brightness", 0.9, 6), ("Color", 0.2, 8)),
90           (("Solarize", 0.5, 2), ("Invert", 0.0, None)),          (("TranslateY", 0.9, 9), ("TranslateY", 0.7, 9)),
91           (("Equalize", 0.2, None), ("Equalize", 0.6, None)),     (("Color", 0.9, 9), ("Equalize", 0.6, None)),
92           (("AutoContrast", 0.8, None), ("Solarize", 0.2, 8)),    (("Brightness", 0.1, 3), ("Color", 0.7, 0)),
93           (("Solarize", 0.4, 5), ("AutoContrast", 0.9, None)),
94           (("AutoContrast", 0.9, None), ("Solarize", 0.8, 3)),
95           (("TranslateY", 0.7, 9), ("AutoContrast", 0.9, None)),
96           (("Equalize", 0.3, None), ("AutoContrast", 0.4, None)),
97           (("Equalize", 0.2, None), ("AutoContrast", 0.6, None))]
98
99    - ``AutoAugmentPolicy.SVHN``: dataset auto augment policy for SVHN.
100
101      .. code-block::
102
103          Augmentation operations pair:
104          [(("ShearX", 0.9, 4), ("Invert", 0.2, None)),          (("ShearY", 0.9, 8), ("Invert", 0.7, None)),
105           (("Equalize", 0.6, None), ("Solarize", 0.6, 6)),      (("Invert", 0.9, None), ("Equalize", 0.6, None)),
106           (("Equalize", 0.6, None), ("Rotate", 0.9, 3)),        (("ShearX", 0.9, 4), ("AutoContrast", 0.8, None)),
107           (("ShearY", 0.9, 8), ("Invert", 0.4, None)),          (("ShearY", 0.9, 5), ("Solarize", 0.2, 6)),
108           (("Invert", 0.9, None), ("AutoContrast", 0.8, None)), (("Equalize", 0.6, None), ("Rotate", 0.9, 3)),
109           (("ShearX", 0.9, 4), ("Solarize", 0.3, 3)),           (("ShearY", 0.8, 8), ("Invert", 0.7, None)),
110           (("Equalize", 0.9, None), ("TranslateY", 0.6, 6)),    (("Invert", 0.9, None), ("Equalize", 0.6, None)),
111           (("Contrast", 0.3, 3), ("Rotate", 0.8, 4)),           (("Invert", 0.8, None), ("TranslateY", 0.0, 2)),
112           (("ShearY", 0.7, 6), ("Solarize", 0.4, 8)),           (("Invert", 0.6, None), ("Rotate", 0.8, 4)),
113           (("ShearY", 0.3, 7), ("TranslateX", 0.9, 3)),         (("ShearX", 0.1, 6), ("Invert", 0.6, None)),
114           (("Solarize", 0.7, 2), ("TranslateY", 0.6, 7)),       (("ShearY", 0.8, 4), ("Invert", 0.8, None)),
115           (("ShearX", 0.7, 9), ("TranslateY", 0.8, 3)),         (("ShearY", 0.8, 5), ("AutoContrast", 0.7, None)),
116           (("ShearX", 0.7, 2), ("Invert", 0.1, None))]
117    """
118    IMAGENET: str = "imagenet"
119    CIFAR10: str = "cifar10"
120    SVHN: str = "svhn"
121
122    @staticmethod
123    def to_c_type(policy):
124        """
125        Function to return C type for AutoAugment policy.
126        """
127        c_values = {AutoAugmentPolicy.IMAGENET: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_IMAGENET,
128                    AutoAugmentPolicy.CIFAR10: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_CIFAR10,
129                    AutoAugmentPolicy.SVHN: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_SVHN}
130
131        value = c_values.get(policy)
132        if value is None:
133            raise RuntimeError("Unsupported AutoAugmentPolicy, only support IMAGENET, CIFAR10, and SVHN.")
134        return value
135
136
137class Border(str, Enum):
138    """
139    Padding Mode, Border Type.
140
141    Possible enumeration values are: ``Border.CONSTANT``, ``Border.EDGE``, ``Border.REFLECT``, ``Border.SYMMETRIC``.
142
143    - ``Border.CONSTANT`` : means it fills the border with constant values.
144    - ``Border.EDGE`` : means it pads with the last value on the edge.
145    - ``Border.REFLECT`` : means it reflects the values on the edge omitting the last value of edge.
146      For example, padding [1,2,3,4] with 2 elements on both sides will result in [3,2,1,2,3,4,3,2].
147    - ``Border.SYMMETRIC`` : means it reflects the values on the edge repeating the last value of edge.
148      For example, padding [1,2,3,4] with 2 elements on both sides will result in [2,1,1,2,3,4,4,3].
149
150    Note:
151        This class derived from class str to support json serializable.
152    """
153    CONSTANT: str = "constant"
154    EDGE: str = "edge"
155    REFLECT: str = "reflect"
156    SYMMETRIC: str = "symmetric"
157
158    @staticmethod
159    def to_python_type(border_type):
160        """
161        Function to return Python type for Border Type.
162        """
163        python_values = {Border.CONSTANT: 'constant',
164                         Border.EDGE: 'edge',
165                         Border.REFLECT: 'reflect',
166                         Border.SYMMETRIC: 'symmetric'}
167
168        value = python_values.get(border_type)
169        if value is None:
170            raise RuntimeError("Unsupported Border type, only support CONSTANT, EDGE, REFLECT and SYMMETRIC.")
171        return value
172
173    @staticmethod
174    def to_c_type(border_type):
175        """
176        Function to return C type for Border Type.
177        """
178        c_values = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
179                    Border.EDGE: cde.BorderType.DE_BORDER_EDGE,
180                    Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
181                    Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
182
183        value = c_values.get(border_type)
184        if value is None:
185            raise RuntimeError("Unsupported Border type, only support CONSTANT, EDGE, REFLECT and SYMMETRIC.")
186        return value
187
188
189class ConvertMode(IntEnum):
190    """
191    The color conversion mode.
192
193    Possible enumeration values are as follows:
194
195    - ConvertMode.COLOR_BGR2BGRA: convert BGR format images to BGRA format images.
196    - ConvertMode.COLOR_RGB2RGBA: convert RGB format images to RGBA format images.
197    - ConvertMode.COLOR_BGRA2BGR: convert BGRA format images to BGR format images.
198    - ConvertMode.COLOR_RGBA2RGB: convert RGBA format images to RGB format images.
199    - ConvertMode.COLOR_BGR2RGBA: convert BGR format images to RGBA format images.
200    - ConvertMode.COLOR_RGB2BGRA: convert RGB format images to BGRA format images.
201    - ConvertMode.COLOR_RGBA2BGR: convert RGBA format images to BGR format images.
202    - ConvertMode.COLOR_BGRA2RGB: convert BGRA format images to RGB format images.
203    - ConvertMode.COLOR_BGR2RGB: convert BGR format images to RGB format images.
204    - ConvertMode.COLOR_RGB2BGR: convert RGB format images to BGR format images.
205    - ConvertMode.COLOR_BGRA2RGBA: convert BGRA format images to RGBA format images.
206    - ConvertMode.COLOR_RGBA2BGRA: convert RGBA format images to BGRA format images.
207    - ConvertMode.COLOR_BGR2GRAY: convert BGR format images to GRAY format images.
208    - ConvertMode.COLOR_RGB2GRAY: convert RGB format images to GRAY format images.
209    - ConvertMode.COLOR_GRAY2BGR: convert GRAY format images to BGR format images.
210    - ConvertMode.COLOR_GRAY2RGB: convert GRAY format images to RGB format images.
211    - ConvertMode.COLOR_GRAY2BGRA: convert GRAY format images to BGRA format images.
212    - ConvertMode.COLOR_GRAY2RGBA: convert GRAY format images to RGBA format images.
213    - ConvertMode.COLOR_BGRA2GRAY: convert BGRA format images to GRAY format images.
214    - ConvertMode.COLOR_RGBA2GRAY: convert RGBA format images to GRAY format images.
215    """
216    COLOR_BGR2BGRA = 0
217    COLOR_RGB2RGBA = COLOR_BGR2BGRA
218    COLOR_BGRA2BGR = 1
219    COLOR_RGBA2RGB = COLOR_BGRA2BGR
220    COLOR_BGR2RGBA = 2
221    COLOR_RGB2BGRA = COLOR_BGR2RGBA
222    COLOR_RGBA2BGR = 3
223    COLOR_BGRA2RGB = COLOR_RGBA2BGR
224    COLOR_BGR2RGB = 4
225    COLOR_RGB2BGR = COLOR_BGR2RGB
226    COLOR_BGRA2RGBA = 5
227    COLOR_RGBA2BGRA = COLOR_BGRA2RGBA
228    COLOR_BGR2GRAY = 6
229    COLOR_RGB2GRAY = 7
230    COLOR_GRAY2BGR = 8
231    COLOR_GRAY2RGB = COLOR_GRAY2BGR
232    COLOR_GRAY2BGRA = 9
233    COLOR_GRAY2RGBA = COLOR_GRAY2BGRA
234    COLOR_BGRA2GRAY = 10
235    COLOR_RGBA2GRAY = 11
236
237    @staticmethod
238    def to_c_type(mode):
239        """
240        Function to return C type for color mode.
241        """
242        c_values = {ConvertMode.COLOR_BGR2BGRA: cde.ConvertMode.DE_COLOR_BGR2BGRA,
243                    ConvertMode.COLOR_RGB2RGBA: cde.ConvertMode.DE_COLOR_RGB2RGBA,
244                    ConvertMode.COLOR_BGRA2BGR: cde.ConvertMode.DE_COLOR_BGRA2BGR,
245                    ConvertMode.COLOR_RGBA2RGB: cde.ConvertMode.DE_COLOR_RGBA2RGB,
246                    ConvertMode.COLOR_BGR2RGBA: cde.ConvertMode.DE_COLOR_BGR2RGBA,
247                    ConvertMode.COLOR_RGB2BGRA: cde.ConvertMode.DE_COLOR_RGB2BGRA,
248                    ConvertMode.COLOR_RGBA2BGR: cde.ConvertMode.DE_COLOR_RGBA2BGR,
249                    ConvertMode.COLOR_BGRA2RGB: cde.ConvertMode.DE_COLOR_BGRA2RGB,
250                    ConvertMode.COLOR_BGR2RGB: cde.ConvertMode.DE_COLOR_BGR2RGB,
251                    ConvertMode.COLOR_RGB2BGR: cde.ConvertMode.DE_COLOR_RGB2BGR,
252                    ConvertMode.COLOR_BGRA2RGBA: cde.ConvertMode.DE_COLOR_BGRA2RGBA,
253                    ConvertMode.COLOR_RGBA2BGRA: cde.ConvertMode.DE_COLOR_RGBA2BGRA,
254                    ConvertMode.COLOR_BGR2GRAY: cde.ConvertMode.DE_COLOR_BGR2GRAY,
255                    ConvertMode.COLOR_RGB2GRAY: cde.ConvertMode.DE_COLOR_RGB2GRAY,
256                    ConvertMode.COLOR_GRAY2BGR: cde.ConvertMode.DE_COLOR_GRAY2BGR,
257                    ConvertMode.COLOR_GRAY2RGB: cde.ConvertMode.DE_COLOR_GRAY2RGB,
258                    ConvertMode.COLOR_GRAY2BGRA: cde.ConvertMode.DE_COLOR_GRAY2BGRA,
259                    ConvertMode.COLOR_GRAY2RGBA: cde.ConvertMode.DE_COLOR_GRAY2RGBA,
260                    ConvertMode.COLOR_BGRA2GRAY: cde.ConvertMode.DE_COLOR_BGRA2GRAY,
261                    ConvertMode.COLOR_RGBA2GRAY: cde.ConvertMode.DE_COLOR_RGBA2GRAY,
262                    }
263
264        mode = c_values.get(mode)
265        if mode is None:
266            raise RuntimeError("Unsupported ConvertMode, see https://www.mindspore.cn/docs/zh-CN/master/api_python/"
267                               "dataset_vision/mindspore.dataset.vision.ConvertColor.html for more details.")
268        return mode
269
270
271class ImageBatchFormat(IntEnum):
272    """
273    Data Format of images after batch operation.
274
275    Possible enumeration values are: ``ImageBatchFormat.NHWC``, ``ImageBatchFormat.NCHW``.
276
277    - ``ImageBatchFormat.NHWC``: in orders like, batch N, height H, width W, channels C to store the data.
278    - ``ImageBatchFormat.NCHW``: in orders like, batch N, channels C, height H, width W to store the data.
279    """
280    NHWC = 0
281    NCHW = 1
282
283    @staticmethod
284    def to_c_type(image_batch_format):
285        """
286        Function to return C type for ImageBatchFormat.
287        """
288        c_values = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
289                    ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}
290
291        value = c_values.get(image_batch_format)
292        if value is None:
293            raise RuntimeError("Unsupported ImageBatchFormat, only support NHWC and NCHW.")
294        return value
295
296
297class ImageReadMode(IntEnum):
298    """
299    The read mode used for the image file.
300
301    Possible enumeration values are: ``ImageReadMode.UNCHANGED``, ``ImageReadMode.GRAYSCALE``, ``ImageReadMode.COLOR``.
302
303    - ``ImageReadMode.UNCHANGED``: remain the output in the original format.
304    - ``ImageReadMode.GRAYSCALE``: convert the output into one channel grayscale data.
305    - ``ImageReadMode.COLOR``: convert the output into three channels RGB color data.
306    """
307    UNCHANGED = 0
308    GRAYSCALE = 1
309    COLOR = 2
310
311    @staticmethod
312    def to_c_type(image_read_mode):
313        """
314        Function to return C type for ImageReadMode.
315        """
316        c_values = {ImageReadMode.UNCHANGED: cde.ImageReadMode.DE_IMAGE_READ_MODE_UNCHANGED,
317                    ImageReadMode.GRAYSCALE: cde.ImageReadMode.DE_IMAGE_READ_MODE_GRAYSCALE,
318                    ImageReadMode.COLOR: cde.ImageReadMode.DE_IMAGE_READ_MODE_COLOR}
319
320        value = c_values.get(image_read_mode)
321        if value is None:
322            raise RuntimeError("Unsupported ImageReadMode, only support UNCHANGED, GRAYSCALE and COLOR.")
323        return value
324
325
326class Inter(IntEnum):
327    """
328    Interpolation methods.
329
330    Available values are as follows:
331
332    - ``Inter.NEAREST`` : Nearest neighbor interpolation.
333    - ``Inter.ANTIALIAS`` : Antialias interpolation. Supported only when the input is PIL.Image.Image.
334    - ``Inter.LINEAR`` : Linear interpolation, the same as ``Inter.BILINEAR``.
335    - ``Inter.BILINEAR`` : Bilinear interpolation.
336    - ``Inter.CUBIC`` : Cubic interpolation, the same as ``Inter.BICUBIC``.
337    - ``Inter.BICUBIC`` : Bicubic interpolation.
338    - ``Inter.AREA`` : Pixel area interpolation. Supported only when the input is numpy.ndarray.
339    - ``Inter.PILCUBIC`` : Pillow implementation of bicubic interpolation. Supported only when the input
340      is numpy.ndarray.
341    """
342    NEAREST = 0
343    ANTIALIAS = 1
344    BILINEAR = LINEAR = 2
345    BICUBIC = CUBIC = 3
346    AREA = 4
347    PILCUBIC = 5
348
349    @staticmethod
350    def to_python_type(inter_type):
351        """
352        Function to return Python type for Interpolation Mode.
353        """
354        python_values = {Inter.NEAREST: NEAREST,
355                         Inter.ANTIALIAS: ANTIALIAS,
356                         Inter.LINEAR: LINEAR,
357                         Inter.CUBIC: CUBIC}
358
359        value = python_values.get(inter_type)
360        if value is None:
361            raise RuntimeError("Unsupported interpolation, only support NEAREST, ANTIALIAS, LINEAR and CUBIC.")
362        return value
363
364    @staticmethod
365    def to_c_type(inter_type):
366        """
367        Function to return C type for Interpolation Mode.
368        """
369        c_values = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
370                    Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
371                    Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
372                    Inter.AREA: cde.InterpolationMode.DE_INTER_AREA,
373                    Inter.PILCUBIC: cde.InterpolationMode.DE_INTER_PILCUBIC}
374
375        value = c_values.get(inter_type)
376        if value is None:
377            raise RuntimeError("Unsupported interpolation, only support NEAREST, LINEAR, CUBIC, AREA and PILCUBIC.")
378
379        return value
380
381
382class SliceMode(IntEnum):
383    """
384    Mode to Slice Tensor into multiple parts.
385
386    Possible enumeration values are: ``SliceMode.PAD``, ``SliceMode.DROP``.
387
388    - ``SliceMode.PAD``: pad some pixels before slice the Tensor if needed.
389    - ``SliceMode.DROP``: drop remainder pixels before slice the Tensor if needed.
390    """
391    PAD = 0
392    DROP = 1
393
394    @staticmethod
395    def to_c_type(mode):
396        """
397        Function to return C type for SliceMode.
398        """
399        c_values = {SliceMode.PAD: cde.SliceMode.DE_SLICE_PAD,
400                    SliceMode.DROP: cde.SliceMode.DE_SLICE_DROP}
401
402        value = c_values.get(mode)
403        if value is None:
404            raise RuntimeError("Unsupported SliceMode, only support PAD and DROP.")
405        return value
406
407
408def encode_jpeg(image, quality=75):
409    """
410    Encode the input image as JPEG data.
411
412    Args:
413        image (Union[numpy.ndarray, mindspore.Tensor]): The image to be encoded.
414        quality (int, optional): Quality of the resulting JPEG data, in range of [1, 100]. Default: ``75``.
415
416    Returns:
417        numpy.ndarray, one dimension uint8 data.
418
419    Raises:
420        TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor.
421        TypeError: If `quality` is not of type int.
422        RuntimeError: If the data type of `image` is not uint8.
423        RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>.
424        RuntimeError: If `quality` is less than 1 or greater than 100.
425
426    Supported Platforms:
427        ``CPU``
428
429    Examples:
430        >>> import mindspore.dataset.vision as vision
431        >>> import numpy as np
432        >>> # Generate a random image with height=120, width=340, channels=3
433        >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8)
434        >>> jpeg_data = vision.encode_jpeg(image)
435    """
436    if not isinstance(quality, int):
437        raise TypeError("Input quality is not of type {0}, but got: {1}.".format(int, type(quality)))
438    if isinstance(image, np.ndarray):
439        return cde.encode_jpeg(cde.Tensor(image), quality).as_array()
440    if isinstance(image, mindspore.Tensor):
441        return cde.encode_jpeg(cde.Tensor(image.asnumpy()), quality).as_array()
442    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
443                                                                                  mindspore.Tensor, type(image)))
444
445
446def encode_png(image, compression_level=6):
447    """
448    Encode the input image as PNG data.
449
450    Args:
451        image (Union[numpy.ndarray, mindspore.Tensor]): The image to be encoded.
452        compression_level (int, optional): The `compression_level` for encoding, in range of [0, 9].
453            Default: ``6``.
454
455    Returns:
456        numpy.ndarray, one dimension uint8 data.
457
458    Raises:
459        TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor.
460        TypeError: If `compression_level` is not of type int.
461        RuntimeError: If the data type of `image` is not uint8.
462        RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>.
463        RuntimeError: If `compression_level` is less than 0 or greater than 9.
464
465    Supported Platforms:
466        ``CPU``
467
468    Examples:
469        >>> import mindspore.dataset.vision as vision
470        >>> import numpy as np
471        >>> # Generate a random image with height=120, width=340, channels=3
472        >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8)
473        >>> png_data = vision.encode_png(image)
474    """
475    if not isinstance(compression_level, int):
476        raise TypeError("Input compression_level is not of type {0}, but got: {1}.".format(int,
477                                                                                           type(compression_level)))
478    if isinstance(image, np.ndarray):
479        return cde.encode_png(cde.Tensor(image), compression_level).as_array()
480    if isinstance(image, mindspore.Tensor):
481        return cde.encode_png(cde.Tensor(image.asnumpy()), compression_level).as_array()
482    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
483                                                                                  mindspore.Tensor, type(image)))
484
485
486def get_image_num_channels(image):
487    """
488    Get the number of input image channels.
489
490    Args:
491        image (Union[numpy.ndarray, PIL.Image.Image]): Image to get the number of channels.
492
493    Returns:
494        int, the number of input image channels.
495
496    Raises:
497        RuntimeError: If the dimension of `image` is less than 2.
498        TypeError: If `image` is not of type <class 'numpy.ndarray'> or <class 'PIL.Image.Image'>.
499
500    Examples:
501        >>> import mindspore.dataset.vision as vision
502        >>> from PIL import Image
503        >>> image = Image.open("/path/to/image_file")
504        >>> num_channels = vision.get_image_num_channels(image)
505    """
506
507    if isinstance(image, np.ndarray):
508        return cde.get_image_num_channels(cde.Tensor(image))
509
510    if isinstance(image, Image.Image):
511        if hasattr(image, "getbands"):
512            return len(image.getbands())
513
514        return image.channels
515
516    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, Image.Image, type(image)))
517
518
519def get_image_size(image):
520    """
521    Get the size of input image as [height, width].
522
523    Args:
524        image (Union[numpy.ndarray, PIL.Image.Image]): The image to get size.
525
526    Returns:
527        list[int, int], the image size.
528
529    Raises:
530        RuntimeError: If the dimension of `image` is less than 2.
531        TypeError: If `image` is not of type <class 'numpy.ndarray'> or <class 'PIL.Image.Image'>.
532
533    Examples:
534        >>> import mindspore.dataset.vision as vision
535        >>> from PIL import Image
536        >>> image = Image.open("/path/to/image_file")
537        >>> image_size = vision.get_image_size(image)
538    """
539
540    if isinstance(image, np.ndarray):
541        return cde.get_image_size(cde.Tensor(image))
542    if isinstance(image, Image.Image):
543        size_list = list(image.size)
544        size_list[0], size_list[1] = size_list[1], size_list[0]
545        return size_list
546
547    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, Image.Image, type(image)))
548
549
550def parse_padding(padding):
551    """ Parses and prepares the padding tuple"""
552
553    if isinstance(padding, numbers.Number):
554        padding = [padding] * 4
555    if len(padding) == 2:
556        left = right = padding[0]
557        top = bottom = padding[1]
558        padding = (left, top, right, bottom,)
559    if isinstance(padding, list):
560        padding = tuple(padding)
561    return padding
562
563
564def read_file(filename):
565    """
566    Read a file in binary mode.
567
568    Args:
569        filename(str): The path to the file to be read.
570
571    Returns:
572        numpy.ndarray, the one dimension uint8 data.
573
574    Raises:
575        TypeError: If `filename` is not of type str.
576        RuntimeError: If `filename` does not exist or is not a common file.
577
578    Supported Platforms:
579        ``CPU``
580
581    Examples:
582        >>> import mindspore.dataset.vision as vision
583        >>> output = vision.read_file("/path/to/file")
584    """
585    if isinstance(filename, str):
586        return cde.read_file(filename).as_array()
587    raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
588
589
590def read_image(filename, mode=ImageReadMode.UNCHANGED):
591    """
592    Read a image file and decode it into one channel grayscale data or RGB color data.
593    Supported file types are JPEG, PNG, BMP, TIFF.
594
595    Args:
596        filename(str): The path to the image file to be read.
597        mode(ImageReadMode, optional): The mode used for decoding the image. It can be
598            ``ImageReadMode.UNCHANGED``, ``ImageReadMode.GRAYSCALE``, ``IMageReadMode.COLOR``.
599            Default: ``ImageReadMode.UNCHANGED``.
600
601            - ImageReadMode.UNCHANGED, remain the output in the original format.
602
603            - ImageReadMode.GRAYSCALE, convert the output into one channel grayscale data.
604
605            - IMageReadMode.COLOR, convert the output into three channels RGB color data.
606
607    Returns:
608        numpy.ndarray, three dimensions uint8 data in the shape of (Height, Width, Channels).
609
610    Raises:
611        TypeError: If `filename` is not of type str.
612        TypeError: If `mode` is not of type :class:`mindspore.dataset.vision.ImageReadMode` .
613        RuntimeError: If `filename` does not exist, or not a regular file, or not a supported image file.
614
615    Supported Platforms:
616        ``CPU``
617
618    Examples:
619        >>> import mindspore.dataset.vision as vision
620        >>> from mindspore.dataset.vision import ImageReadMode
621        >>> output = vision.read_image("/path/to/image_file", ImageReadMode.UNCHANGED)
622    """
623    if not isinstance(filename, str):
624        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
625    if not isinstance(mode, ImageReadMode):
626        raise TypeError("Input mode is not of type {0}, but got: {1}.".format(ImageReadMode, type(mode)))
627    return cde.read_image(filename, ImageReadMode.to_c_type(mode)).as_array()
628
629
630def read_video(filename, start_pts=0, end_pts=None, pts_unit="pts"):
631    """
632    Read the video, audio, metadata from a video file.
633
634    It supports AVI, H264, H265, MOV, MP4, WMV file formats.
635
636    Args:
637        filename(str): The path to the video file to be read.
638        start_pts(Union[float, Fraction, int], optional): The start presentation timestamp of the video. Default: 0.
639        end_pts(Union[float, Fraction, int], optional): The end presentation timestamp of the video. Default: None.
640            The None is represented by 2147483647.
641        pts_unit(str, optional): The unit of the timestamps. It can be any of ["pts", "sec"]. Default: "pts".
642
643    Returns:
644        - numpy.ndarray, four dimensions uint8 data for video. The format is [T, H, W, C]. `T` is the number of frames,
645          `H` is the height, `W` is the width, `C` is the channel for RGB.
646        - numpy.ndarray, two dimensions float for audio. The format is [C, L]. `C` is the number of channels.
647          `L` is the length of the points in one channel.
648        - dict, metadata for the video and audio.
649          It contains video_fps data of type float and audio_fps data of type int.
650
651    Raises:
652        TypeError: If `filename` is not of type str.
653        TypeError: If `start_pts` is not of type [float, Fraction, int].
654        TypeError: If `end_pts` is not of type [float, Fraction, int].
655        TypeError: If `pts_unit` is not of type str.
656        RuntimeError: If `filename` does not exist, or not a regular file, or not a supported video file.
657        ValueError: If `start_pts` is less than 0.
658        ValueError: If `end_pts` is less than `start_pts`.
659        ValueError: If `pts_unit` is not in ["pts", "sec"].
660
661    Supported Platforms:
662        ``CPU``
663
664    Examples:
665        >>> import mindspore.dataset.vision as vision
666        >>> video_output, audio_output, metadata_output = vision.read_video("/path/to/file")
667    """
668    if not isinstance(filename, str):
669        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
670    if not isinstance(start_pts, (float, Fraction, int)):
671        raise TypeError("Input start_pts is not of type [{0}, {1}, {2}], but got: {3}".format(float, Fraction, int,
672                                                                                              type(start_pts)))
673    if start_pts < 0.0:
674        err_msg = "Not supported start_pts for " + str(start_pts) + ". The start_pts should be >= 0."
675        raise ValueError(err_msg)
676    if end_pts is None:
677        end_pts = 2147483647.0
678    if not isinstance(end_pts, (float, Fraction, int)):
679        raise TypeError("Input end_pts is not of type [{0}, {1}, {2}], but got: {3}".format(float, Fraction, int,
680                                                                                            type(end_pts)))
681    if end_pts < start_pts:
682        err_msg = "Not supported end_pts for " + str(end_pts) + ". start_pts = " + str(start_pts) + "."
683        err_msg += " The end_pts should be >= start_pts."
684        raise ValueError(err_msg)
685    if not isinstance(pts_unit, str):
686        raise TypeError("Input pts_unit is not of type {0}, but got: {1}.".format(str, type(pts_unit)))
687    if pts_unit not in ["pts", "sec"]:
688        raise ValueError("Not supported pts_unit for " + pts_unit)
689
690    video_output, audio_output, raw_metadata = cde.read_video(filename, float(start_pts), float(end_pts), pts_unit)
691
692    if video_output is not None:
693        video_output = video_output.as_array()
694    if audio_output is not None:
695        audio_output = audio_output.as_array()
696    metadata_output = {}
697    for key in raw_metadata:
698        if key == "video_fps":
699            metadata_output[key] = float(raw_metadata[key])
700            continue
701        if key == "audio_fps":
702            metadata_output[key] = int(raw_metadata[key])
703            continue
704        metadata_output[key] = raw_metadata[key]
705    return video_output, audio_output, metadata_output
706
707
708def read_video_timestamps(filename, pts_unit="pts"):
709    """
710    Read the timestamps and frames per second of a video file.
711    It supports AVI, H264, H265, MOV, MP4, WMV files.
712
713    Args:
714        filename(str): The path to the video file to be read.
715        pts_unit(str, optional): The unit of the timestamps. It can be any of ["pts", "sec"]. Default: "pts".
716
717    Returns:
718        - list, when `pts_unit` is set to "pts", list[int] is returned, when `pts_unit` is set to "sec",
719          list[float] is returned.
720        - float, the frames per second of the video file.
721
722    Raises:
723        TypeError: If `filename` is not of type str.
724        TypeError: If `pts_unit` is not of type str.
725        RuntimeError: If `filename` does not exist, or not a regular file, or not a supported video file.
726        RuntimeError: If `pts_unit` is not in ["pts", "sec"].
727
728    Supported Platforms:
729        ``CPU``
730
731    Examples:
732        >>> import mindspore.dataset.vision as vision
733        >>> video_timestamps, video_fps = vision.read_video_timestamps("/path/to/file")
734    """
735    if not isinstance(filename, str):
736        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
737    if not isinstance(pts_unit, str):
738        raise TypeError("Input pts_unit is not of type {0}, but got: {1}.".format(str, type(pts_unit)))
739
740    video_pts, video_fps, time_base = cde.read_video_timestamps(filename, pts_unit)
741
742    if video_pts == []:
743        return video_pts, None
744    if pts_unit == "pts":
745        return video_pts, video_fps
746    return [x * time_base for x in video_pts], video_fps
747
748
749def write_file(filename, data):
750    """
751    Write the one dimension uint8 data into a file using binary mode.
752
753    Args:
754        filename (str): The path to the file to be written.
755        data (Union[numpy.ndarray, mindspore.Tensor]): The one dimension uint8 data to be written.
756
757    Raises:
758        TypeError: If `filename` is not of type str.
759        TypeError: If `data` is not of type numpy.ndarray or mindspore.Tensor.
760        RuntimeError: If the `filename` is not a common file.
761        RuntimeError: If the data type of `data` is not uint8.
762        RuntimeError: If the shape of `data` is not a one-dimensional array.
763
764    Supported Platforms:
765        ``CPU``
766
767    Examples:
768        >>> import mindspore.dataset.vision as vision
769        >>> import numpy as np
770        >>> # Generate a random data with 1024 bytes
771        >>> data = np.random.randint(256, size=(1024), dtype=np.uint8)
772        >>> vision.write_file("/path/to/file", data)
773    """
774    if not isinstance(filename, str):
775        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
776    if isinstance(data, np.ndarray):
777        return cde.write_file(filename, cde.Tensor(data))
778    if isinstance(data, mindspore.Tensor):
779        return cde.write_file(filename, cde.Tensor(data.asnumpy()))
780    raise TypeError("Input data is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
781                                                                                 mindspore.Tensor, type(data)))
782
783
784def write_jpeg(filename, image, quality=75):
785    """
786    Write the image data into a JPEG file.
787
788    Args:
789        filename (str): The path to the file to be written.
790        image (Union[numpy.ndarray, mindspore.Tensor]): The image data to be written.
791        quality (int, optional): Quality of the resulting JPEG file, in range of [1, 100]. Default: ``75``.
792
793    Raises:
794        TypeError: If `filename` is not of type str.
795        TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor.
796        TypeError: If `quality` is not of type int.
797        RuntimeError: If the `filename` does not exist or not a common file.
798        RuntimeError: If the data type of `image` is not uint8.
799        RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>.
800        RuntimeError: If `quality` is less than 1 or greater than 100.
801
802    Supported Platforms:
803        ``CPU``
804
805    Examples:
806        >>> import mindspore.dataset.vision as vision
807        >>> import numpy as np
808        >>> # Generate a random image with height=120, width=340, channels=3
809        >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8)
810        >>> vision.write_jpeg("/path/to/file", image)
811    """
812    if not isinstance(filename, str):
813        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
814    if not isinstance(quality, int):
815        raise TypeError("Input quality is not of type {0}, but got: {1}.".format(int, type(quality)))
816    if isinstance(image, np.ndarray):
817        return cde.write_jpeg(filename, cde.Tensor(image), quality)
818    if isinstance(image, mindspore.Tensor):
819        return cde.write_jpeg(filename, cde.Tensor(image.asnumpy()), quality)
820    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
821                                                                                  mindspore.Tensor, type(image)))
822
823
824def write_png(filename, image, compression_level=6):
825    """
826    Write the image into a PNG file.
827
828    Args:
829        filename (str): The path to the file to be written.
830        image (Union[numpy.ndarray, mindspore.Tensor]): The image data to be written.
831        compression_level (int, optional): Compression level for the resulting PNG file, in range of [0, 9].
832            Default: ``6``.
833
834    Raises:
835        TypeError: If `filename` is not of type str.
836        TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor.
837        TypeError: If `compression_level` is not of type int.
838        RuntimeError: If the `filename` does not exist or not a common file.
839        RuntimeError: If the data type of `image` is not uint8.
840        RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>.
841        RuntimeError: If `compression_level` is less than 0 or greater than 9.
842
843    Supported Platforms:
844        ``CPU``
845
846    Examples:
847        >>> import mindspore.dataset.vision as vision
848        >>> import numpy as np
849        >>> # Generate a random image with height=120, width=340, channels=3
850        >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8)
851        >>> vision.write_png("/path/to/file", image)
852    """
853    if not isinstance(filename, str):
854        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
855    if not isinstance(compression_level, int):
856        raise TypeError("Input compression_level is not of type {0}, but got: {1}.".format(int,
857                                                                                           type(compression_level)))
858    if isinstance(image, np.ndarray):
859        return cde.write_png(filename, cde.Tensor(image), compression_level)
860    if isinstance(image, mindspore.Tensor):
861        return cde.write_png(filename, cde.Tensor(image.asnumpy()), compression_level)
862    raise TypeError("The input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
863                                                                                      mindspore.Tensor, type(image)))
864