1# Copyright 2019-2024 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14""" 15Interpolation Mode, Resampling Filters 16""" 17from enum import Enum, IntEnum 18from fractions import Fraction 19import numbers 20 21import numpy as np 22from PIL import Image 23 24import mindspore 25import mindspore._c_dataengine as cde 26 27# The following constants have been deprecated by Pillow since version 9.1.0 28if int(Image.__version__.split(".")[0]) > 9 or Image.__version__ >= "9.1.0": 29 FLIP_LEFT_RIGHT = Image.Transpose.FLIP_LEFT_RIGHT 30 FLIP_TOP_BOTTOM = Image.Transpose.FLIP_TOP_BOTTOM 31 PERSPECTIVE = Image.Transform.PERSPECTIVE 32 AFFINE = Image.Transform.AFFINE 33 NEAREST = Image.Resampling.NEAREST 34 ANTIALIAS = Image.Resampling.LANCZOS 35 LINEAR = Image.Resampling.BILINEAR 36 CUBIC = Image.Resampling.BICUBIC 37else: 38 FLIP_LEFT_RIGHT = Image.FLIP_LEFT_RIGHT 39 FLIP_TOP_BOTTOM = Image.FLIP_TOP_BOTTOM 40 PERSPECTIVE = Image.PERSPECTIVE 41 AFFINE = Image.AFFINE 42 NEAREST = Image.NEAREST 43 ANTIALIAS = Image.ANTIALIAS 44 LINEAR = Image.LINEAR 45 CUBIC = Image.CUBIC 46 47 48class AutoAugmentPolicy(str, Enum): 49 """ 50 AutoAugment policy for different datasets. 51 52 Possible enumeration values are: ``AutoAugmentPolicy.IMAGENET``, ``AutoAugmentPolicy.CIFAR10``, 53 AutoAugmentPolicy.SVHN. 54 55 Each policy contains 25 pairs of augmentation operations. When using AutoAugment, each image is randomly 56 transformed with one of these operation pairs. Each pair has 2 different operations. The following shows 57 all of these augmentation operations, including operation names with their probabilities and random params. 58 59 - ``AutoAugmentPolicy.IMAGENET``: dataset auto augment policy for ImageNet. 60 61 .. code-block:: 62 63 Augmentation operations pair: 64 [(("Posterize", 0.4, 8), ("Rotate", 0.6, 9)), (("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)), 65 (("Equalize", 0.8, None), ("Equalize", 0.6, None)), (("Posterize", 0.6, 7), ("Posterize", 0.6, 6)), 66 (("Equalize", 0.4, None), ("Solarize", 0.2, 4)), (("Equalize", 0.4, None), ("Rotate", 0.8, 8)), 67 (("Solarize", 0.6, 3), ("Equalize", 0.6, None)), (("Posterize", 0.8, 5), ("Equalize", 1.0, None)), 68 (("Rotate", 0.2, 3), ("Solarize", 0.6, 8)), (("Equalize", 0.6, None), ("Posterize", 0.4, 6)), 69 (("Rotate", 0.8, 8), ("Color", 0.4, 0)), (("Rotate", 0.4, 9), ("Equalize", 0.6, None)), 70 (("Equalize", 0.0, None), ("Equalize", 0.8, None)), (("Invert", 0.6, None), ("Equalize", 1.0, None)), 71 (("Color", 0.6, 4), ("Contrast", 1.0, 8)), (("Rotate", 0.8, 8), ("Color", 1.0, 2)), 72 (("Color", 0.8, 8), ("Solarize", 0.8, 7)), (("Sharpness", 0.4, 7), ("Invert", 0.6, None)), 73 (("ShearX", 0.6, 5), ("Equalize", 1.0, None)), (("Color", 0.4, 0), ("Equalize", 0.6, None)), 74 (("Equalize", 0.4, None), ("Solarize", 0.2, 4)), (("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)), 75 (("Invert", 0.6, None), ("Equalize", 1.0, None)), (("Color", 0.6, 4), ("Contrast", 1.0, 8)), 76 (("Equalize", 0.8, None), ("Equalize", 0.6, None))] 77 78 - ``AutoAugmentPolicy.CIFAR10``: dataset auto augment policy for Cifar10. 79 80 .. code-block:: 81 82 Augmentation operations pair: 83 [(("Invert", 0.1, None), ("Contrast", 0.2, 6)), (("Rotate", 0.7, 2), ("TranslateX", 0.3, 9)), 84 (("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)), (("ShearY", 0.5, 8), ("TranslateY", 0.7, 9)), 85 (("AutoContrast", 0.5, None), ("Equalize", 0.9, None)), (("ShearY", 0.2, 7), ("Posterize", 0.3, 7)), 86 (("Color", 0.4, 3), ("Brightness", 0.6, 7)), (("Sharpness", 0.3, 9), ("Brightness", 0.7, 9)), 87 (("Equalize", 0.6, None), ("Equalize", 0.5, None)), (("Contrast", 0.6, 7), ("Sharpness", 0.6, 5)), 88 (("Color", 0.7, 7), ("TranslateX", 0.5, 8)), (("Equalize", 0.8, None), ("Invert", 0.1, None)), 89 (("TranslateY", 0.4, 3), ("Sharpness", 0.2, 6)), (("Brightness", 0.9, 6), ("Color", 0.2, 8)), 90 (("Solarize", 0.5, 2), ("Invert", 0.0, None)), (("TranslateY", 0.9, 9), ("TranslateY", 0.7, 9)), 91 (("Equalize", 0.2, None), ("Equalize", 0.6, None)), (("Color", 0.9, 9), ("Equalize", 0.6, None)), 92 (("AutoContrast", 0.8, None), ("Solarize", 0.2, 8)), (("Brightness", 0.1, 3), ("Color", 0.7, 0)), 93 (("Solarize", 0.4, 5), ("AutoContrast", 0.9, None)), 94 (("AutoContrast", 0.9, None), ("Solarize", 0.8, 3)), 95 (("TranslateY", 0.7, 9), ("AutoContrast", 0.9, None)), 96 (("Equalize", 0.3, None), ("AutoContrast", 0.4, None)), 97 (("Equalize", 0.2, None), ("AutoContrast", 0.6, None))] 98 99 - ``AutoAugmentPolicy.SVHN``: dataset auto augment policy for SVHN. 100 101 .. code-block:: 102 103 Augmentation operations pair: 104 [(("ShearX", 0.9, 4), ("Invert", 0.2, None)), (("ShearY", 0.9, 8), ("Invert", 0.7, None)), 105 (("Equalize", 0.6, None), ("Solarize", 0.6, 6)), (("Invert", 0.9, None), ("Equalize", 0.6, None)), 106 (("Equalize", 0.6, None), ("Rotate", 0.9, 3)), (("ShearX", 0.9, 4), ("AutoContrast", 0.8, None)), 107 (("ShearY", 0.9, 8), ("Invert", 0.4, None)), (("ShearY", 0.9, 5), ("Solarize", 0.2, 6)), 108 (("Invert", 0.9, None), ("AutoContrast", 0.8, None)), (("Equalize", 0.6, None), ("Rotate", 0.9, 3)), 109 (("ShearX", 0.9, 4), ("Solarize", 0.3, 3)), (("ShearY", 0.8, 8), ("Invert", 0.7, None)), 110 (("Equalize", 0.9, None), ("TranslateY", 0.6, 6)), (("Invert", 0.9, None), ("Equalize", 0.6, None)), 111 (("Contrast", 0.3, 3), ("Rotate", 0.8, 4)), (("Invert", 0.8, None), ("TranslateY", 0.0, 2)), 112 (("ShearY", 0.7, 6), ("Solarize", 0.4, 8)), (("Invert", 0.6, None), ("Rotate", 0.8, 4)), 113 (("ShearY", 0.3, 7), ("TranslateX", 0.9, 3)), (("ShearX", 0.1, 6), ("Invert", 0.6, None)), 114 (("Solarize", 0.7, 2), ("TranslateY", 0.6, 7)), (("ShearY", 0.8, 4), ("Invert", 0.8, None)), 115 (("ShearX", 0.7, 9), ("TranslateY", 0.8, 3)), (("ShearY", 0.8, 5), ("AutoContrast", 0.7, None)), 116 (("ShearX", 0.7, 2), ("Invert", 0.1, None))] 117 """ 118 IMAGENET: str = "imagenet" 119 CIFAR10: str = "cifar10" 120 SVHN: str = "svhn" 121 122 @staticmethod 123 def to_c_type(policy): 124 """ 125 Function to return C type for AutoAugment policy. 126 """ 127 c_values = {AutoAugmentPolicy.IMAGENET: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_IMAGENET, 128 AutoAugmentPolicy.CIFAR10: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_CIFAR10, 129 AutoAugmentPolicy.SVHN: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_SVHN} 130 131 value = c_values.get(policy) 132 if value is None: 133 raise RuntimeError("Unsupported AutoAugmentPolicy, only support IMAGENET, CIFAR10, and SVHN.") 134 return value 135 136 137class Border(str, Enum): 138 """ 139 Padding Mode, Border Type. 140 141 Possible enumeration values are: ``Border.CONSTANT``, ``Border.EDGE``, ``Border.REFLECT``, ``Border.SYMMETRIC``. 142 143 - ``Border.CONSTANT`` : means it fills the border with constant values. 144 - ``Border.EDGE`` : means it pads with the last value on the edge. 145 - ``Border.REFLECT`` : means it reflects the values on the edge omitting the last value of edge. 146 For example, padding [1,2,3,4] with 2 elements on both sides will result in [3,2,1,2,3,4,3,2]. 147 - ``Border.SYMMETRIC`` : means it reflects the values on the edge repeating the last value of edge. 148 For example, padding [1,2,3,4] with 2 elements on both sides will result in [2,1,1,2,3,4,4,3]. 149 150 Note: 151 This class derived from class str to support json serializable. 152 """ 153 CONSTANT: str = "constant" 154 EDGE: str = "edge" 155 REFLECT: str = "reflect" 156 SYMMETRIC: str = "symmetric" 157 158 @staticmethod 159 def to_python_type(border_type): 160 """ 161 Function to return Python type for Border Type. 162 """ 163 python_values = {Border.CONSTANT: 'constant', 164 Border.EDGE: 'edge', 165 Border.REFLECT: 'reflect', 166 Border.SYMMETRIC: 'symmetric'} 167 168 value = python_values.get(border_type) 169 if value is None: 170 raise RuntimeError("Unsupported Border type, only support CONSTANT, EDGE, REFLECT and SYMMETRIC.") 171 return value 172 173 @staticmethod 174 def to_c_type(border_type): 175 """ 176 Function to return C type for Border Type. 177 """ 178 c_values = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT, 179 Border.EDGE: cde.BorderType.DE_BORDER_EDGE, 180 Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT, 181 Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC} 182 183 value = c_values.get(border_type) 184 if value is None: 185 raise RuntimeError("Unsupported Border type, only support CONSTANT, EDGE, REFLECT and SYMMETRIC.") 186 return value 187 188 189class ConvertMode(IntEnum): 190 """ 191 The color conversion mode. 192 193 Possible enumeration values are as follows: 194 195 - ConvertMode.COLOR_BGR2BGRA: convert BGR format images to BGRA format images. 196 - ConvertMode.COLOR_RGB2RGBA: convert RGB format images to RGBA format images. 197 - ConvertMode.COLOR_BGRA2BGR: convert BGRA format images to BGR format images. 198 - ConvertMode.COLOR_RGBA2RGB: convert RGBA format images to RGB format images. 199 - ConvertMode.COLOR_BGR2RGBA: convert BGR format images to RGBA format images. 200 - ConvertMode.COLOR_RGB2BGRA: convert RGB format images to BGRA format images. 201 - ConvertMode.COLOR_RGBA2BGR: convert RGBA format images to BGR format images. 202 - ConvertMode.COLOR_BGRA2RGB: convert BGRA format images to RGB format images. 203 - ConvertMode.COLOR_BGR2RGB: convert BGR format images to RGB format images. 204 - ConvertMode.COLOR_RGB2BGR: convert RGB format images to BGR format images. 205 - ConvertMode.COLOR_BGRA2RGBA: convert BGRA format images to RGBA format images. 206 - ConvertMode.COLOR_RGBA2BGRA: convert RGBA format images to BGRA format images. 207 - ConvertMode.COLOR_BGR2GRAY: convert BGR format images to GRAY format images. 208 - ConvertMode.COLOR_RGB2GRAY: convert RGB format images to GRAY format images. 209 - ConvertMode.COLOR_GRAY2BGR: convert GRAY format images to BGR format images. 210 - ConvertMode.COLOR_GRAY2RGB: convert GRAY format images to RGB format images. 211 - ConvertMode.COLOR_GRAY2BGRA: convert GRAY format images to BGRA format images. 212 - ConvertMode.COLOR_GRAY2RGBA: convert GRAY format images to RGBA format images. 213 - ConvertMode.COLOR_BGRA2GRAY: convert BGRA format images to GRAY format images. 214 - ConvertMode.COLOR_RGBA2GRAY: convert RGBA format images to GRAY format images. 215 """ 216 COLOR_BGR2BGRA = 0 217 COLOR_RGB2RGBA = COLOR_BGR2BGRA 218 COLOR_BGRA2BGR = 1 219 COLOR_RGBA2RGB = COLOR_BGRA2BGR 220 COLOR_BGR2RGBA = 2 221 COLOR_RGB2BGRA = COLOR_BGR2RGBA 222 COLOR_RGBA2BGR = 3 223 COLOR_BGRA2RGB = COLOR_RGBA2BGR 224 COLOR_BGR2RGB = 4 225 COLOR_RGB2BGR = COLOR_BGR2RGB 226 COLOR_BGRA2RGBA = 5 227 COLOR_RGBA2BGRA = COLOR_BGRA2RGBA 228 COLOR_BGR2GRAY = 6 229 COLOR_RGB2GRAY = 7 230 COLOR_GRAY2BGR = 8 231 COLOR_GRAY2RGB = COLOR_GRAY2BGR 232 COLOR_GRAY2BGRA = 9 233 COLOR_GRAY2RGBA = COLOR_GRAY2BGRA 234 COLOR_BGRA2GRAY = 10 235 COLOR_RGBA2GRAY = 11 236 237 @staticmethod 238 def to_c_type(mode): 239 """ 240 Function to return C type for color mode. 241 """ 242 c_values = {ConvertMode.COLOR_BGR2BGRA: cde.ConvertMode.DE_COLOR_BGR2BGRA, 243 ConvertMode.COLOR_RGB2RGBA: cde.ConvertMode.DE_COLOR_RGB2RGBA, 244 ConvertMode.COLOR_BGRA2BGR: cde.ConvertMode.DE_COLOR_BGRA2BGR, 245 ConvertMode.COLOR_RGBA2RGB: cde.ConvertMode.DE_COLOR_RGBA2RGB, 246 ConvertMode.COLOR_BGR2RGBA: cde.ConvertMode.DE_COLOR_BGR2RGBA, 247 ConvertMode.COLOR_RGB2BGRA: cde.ConvertMode.DE_COLOR_RGB2BGRA, 248 ConvertMode.COLOR_RGBA2BGR: cde.ConvertMode.DE_COLOR_RGBA2BGR, 249 ConvertMode.COLOR_BGRA2RGB: cde.ConvertMode.DE_COLOR_BGRA2RGB, 250 ConvertMode.COLOR_BGR2RGB: cde.ConvertMode.DE_COLOR_BGR2RGB, 251 ConvertMode.COLOR_RGB2BGR: cde.ConvertMode.DE_COLOR_RGB2BGR, 252 ConvertMode.COLOR_BGRA2RGBA: cde.ConvertMode.DE_COLOR_BGRA2RGBA, 253 ConvertMode.COLOR_RGBA2BGRA: cde.ConvertMode.DE_COLOR_RGBA2BGRA, 254 ConvertMode.COLOR_BGR2GRAY: cde.ConvertMode.DE_COLOR_BGR2GRAY, 255 ConvertMode.COLOR_RGB2GRAY: cde.ConvertMode.DE_COLOR_RGB2GRAY, 256 ConvertMode.COLOR_GRAY2BGR: cde.ConvertMode.DE_COLOR_GRAY2BGR, 257 ConvertMode.COLOR_GRAY2RGB: cde.ConvertMode.DE_COLOR_GRAY2RGB, 258 ConvertMode.COLOR_GRAY2BGRA: cde.ConvertMode.DE_COLOR_GRAY2BGRA, 259 ConvertMode.COLOR_GRAY2RGBA: cde.ConvertMode.DE_COLOR_GRAY2RGBA, 260 ConvertMode.COLOR_BGRA2GRAY: cde.ConvertMode.DE_COLOR_BGRA2GRAY, 261 ConvertMode.COLOR_RGBA2GRAY: cde.ConvertMode.DE_COLOR_RGBA2GRAY, 262 } 263 264 mode = c_values.get(mode) 265 if mode is None: 266 raise RuntimeError("Unsupported ConvertMode, see https://www.mindspore.cn/docs/zh-CN/master/api_python/" 267 "dataset_vision/mindspore.dataset.vision.ConvertColor.html for more details.") 268 return mode 269 270 271class ImageBatchFormat(IntEnum): 272 """ 273 Data Format of images after batch operation. 274 275 Possible enumeration values are: ``ImageBatchFormat.NHWC``, ``ImageBatchFormat.NCHW``. 276 277 - ``ImageBatchFormat.NHWC``: in orders like, batch N, height H, width W, channels C to store the data. 278 - ``ImageBatchFormat.NCHW``: in orders like, batch N, channels C, height H, width W to store the data. 279 """ 280 NHWC = 0 281 NCHW = 1 282 283 @staticmethod 284 def to_c_type(image_batch_format): 285 """ 286 Function to return C type for ImageBatchFormat. 287 """ 288 c_values = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC, 289 ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW} 290 291 value = c_values.get(image_batch_format) 292 if value is None: 293 raise RuntimeError("Unsupported ImageBatchFormat, only support NHWC and NCHW.") 294 return value 295 296 297class ImageReadMode(IntEnum): 298 """ 299 The read mode used for the image file. 300 301 Possible enumeration values are: ``ImageReadMode.UNCHANGED``, ``ImageReadMode.GRAYSCALE``, ``ImageReadMode.COLOR``. 302 303 - ``ImageReadMode.UNCHANGED``: remain the output in the original format. 304 - ``ImageReadMode.GRAYSCALE``: convert the output into one channel grayscale data. 305 - ``ImageReadMode.COLOR``: convert the output into three channels RGB color data. 306 """ 307 UNCHANGED = 0 308 GRAYSCALE = 1 309 COLOR = 2 310 311 @staticmethod 312 def to_c_type(image_read_mode): 313 """ 314 Function to return C type for ImageReadMode. 315 """ 316 c_values = {ImageReadMode.UNCHANGED: cde.ImageReadMode.DE_IMAGE_READ_MODE_UNCHANGED, 317 ImageReadMode.GRAYSCALE: cde.ImageReadMode.DE_IMAGE_READ_MODE_GRAYSCALE, 318 ImageReadMode.COLOR: cde.ImageReadMode.DE_IMAGE_READ_MODE_COLOR} 319 320 value = c_values.get(image_read_mode) 321 if value is None: 322 raise RuntimeError("Unsupported ImageReadMode, only support UNCHANGED, GRAYSCALE and COLOR.") 323 return value 324 325 326class Inter(IntEnum): 327 """ 328 Interpolation methods. 329 330 Available values are as follows: 331 332 - ``Inter.NEAREST`` : Nearest neighbor interpolation. 333 - ``Inter.ANTIALIAS`` : Antialias interpolation. Supported only when the input is PIL.Image.Image. 334 - ``Inter.LINEAR`` : Linear interpolation, the same as ``Inter.BILINEAR``. 335 - ``Inter.BILINEAR`` : Bilinear interpolation. 336 - ``Inter.CUBIC`` : Cubic interpolation, the same as ``Inter.BICUBIC``. 337 - ``Inter.BICUBIC`` : Bicubic interpolation. 338 - ``Inter.AREA`` : Pixel area interpolation. Supported only when the input is numpy.ndarray. 339 - ``Inter.PILCUBIC`` : Pillow implementation of bicubic interpolation. Supported only when the input 340 is numpy.ndarray. 341 """ 342 NEAREST = 0 343 ANTIALIAS = 1 344 BILINEAR = LINEAR = 2 345 BICUBIC = CUBIC = 3 346 AREA = 4 347 PILCUBIC = 5 348 349 @staticmethod 350 def to_python_type(inter_type): 351 """ 352 Function to return Python type for Interpolation Mode. 353 """ 354 python_values = {Inter.NEAREST: NEAREST, 355 Inter.ANTIALIAS: ANTIALIAS, 356 Inter.LINEAR: LINEAR, 357 Inter.CUBIC: CUBIC} 358 359 value = python_values.get(inter_type) 360 if value is None: 361 raise RuntimeError("Unsupported interpolation, only support NEAREST, ANTIALIAS, LINEAR and CUBIC.") 362 return value 363 364 @staticmethod 365 def to_c_type(inter_type): 366 """ 367 Function to return C type for Interpolation Mode. 368 """ 369 c_values = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, 370 Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, 371 Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC, 372 Inter.AREA: cde.InterpolationMode.DE_INTER_AREA, 373 Inter.PILCUBIC: cde.InterpolationMode.DE_INTER_PILCUBIC} 374 375 value = c_values.get(inter_type) 376 if value is None: 377 raise RuntimeError("Unsupported interpolation, only support NEAREST, LINEAR, CUBIC, AREA and PILCUBIC.") 378 379 return value 380 381 382class SliceMode(IntEnum): 383 """ 384 Mode to Slice Tensor into multiple parts. 385 386 Possible enumeration values are: ``SliceMode.PAD``, ``SliceMode.DROP``. 387 388 - ``SliceMode.PAD``: pad some pixels before slice the Tensor if needed. 389 - ``SliceMode.DROP``: drop remainder pixels before slice the Tensor if needed. 390 """ 391 PAD = 0 392 DROP = 1 393 394 @staticmethod 395 def to_c_type(mode): 396 """ 397 Function to return C type for SliceMode. 398 """ 399 c_values = {SliceMode.PAD: cde.SliceMode.DE_SLICE_PAD, 400 SliceMode.DROP: cde.SliceMode.DE_SLICE_DROP} 401 402 value = c_values.get(mode) 403 if value is None: 404 raise RuntimeError("Unsupported SliceMode, only support PAD and DROP.") 405 return value 406 407 408def encode_jpeg(image, quality=75): 409 """ 410 Encode the input image as JPEG data. 411 412 Args: 413 image (Union[numpy.ndarray, mindspore.Tensor]): The image to be encoded. 414 quality (int, optional): Quality of the resulting JPEG data, in range of [1, 100]. Default: ``75``. 415 416 Returns: 417 numpy.ndarray, one dimension uint8 data. 418 419 Raises: 420 TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor. 421 TypeError: If `quality` is not of type int. 422 RuntimeError: If the data type of `image` is not uint8. 423 RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>. 424 RuntimeError: If `quality` is less than 1 or greater than 100. 425 426 Supported Platforms: 427 ``CPU`` 428 429 Examples: 430 >>> import mindspore.dataset.vision as vision 431 >>> import numpy as np 432 >>> # Generate a random image with height=120, width=340, channels=3 433 >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8) 434 >>> jpeg_data = vision.encode_jpeg(image) 435 """ 436 if not isinstance(quality, int): 437 raise TypeError("Input quality is not of type {0}, but got: {1}.".format(int, type(quality))) 438 if isinstance(image, np.ndarray): 439 return cde.encode_jpeg(cde.Tensor(image), quality).as_array() 440 if isinstance(image, mindspore.Tensor): 441 return cde.encode_jpeg(cde.Tensor(image.asnumpy()), quality).as_array() 442 raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, 443 mindspore.Tensor, type(image))) 444 445 446def encode_png(image, compression_level=6): 447 """ 448 Encode the input image as PNG data. 449 450 Args: 451 image (Union[numpy.ndarray, mindspore.Tensor]): The image to be encoded. 452 compression_level (int, optional): The `compression_level` for encoding, in range of [0, 9]. 453 Default: ``6``. 454 455 Returns: 456 numpy.ndarray, one dimension uint8 data. 457 458 Raises: 459 TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor. 460 TypeError: If `compression_level` is not of type int. 461 RuntimeError: If the data type of `image` is not uint8. 462 RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>. 463 RuntimeError: If `compression_level` is less than 0 or greater than 9. 464 465 Supported Platforms: 466 ``CPU`` 467 468 Examples: 469 >>> import mindspore.dataset.vision as vision 470 >>> import numpy as np 471 >>> # Generate a random image with height=120, width=340, channels=3 472 >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8) 473 >>> png_data = vision.encode_png(image) 474 """ 475 if not isinstance(compression_level, int): 476 raise TypeError("Input compression_level is not of type {0}, but got: {1}.".format(int, 477 type(compression_level))) 478 if isinstance(image, np.ndarray): 479 return cde.encode_png(cde.Tensor(image), compression_level).as_array() 480 if isinstance(image, mindspore.Tensor): 481 return cde.encode_png(cde.Tensor(image.asnumpy()), compression_level).as_array() 482 raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, 483 mindspore.Tensor, type(image))) 484 485 486def get_image_num_channels(image): 487 """ 488 Get the number of input image channels. 489 490 Args: 491 image (Union[numpy.ndarray, PIL.Image.Image]): Image to get the number of channels. 492 493 Returns: 494 int, the number of input image channels. 495 496 Raises: 497 RuntimeError: If the dimension of `image` is less than 2. 498 TypeError: If `image` is not of type <class 'numpy.ndarray'> or <class 'PIL.Image.Image'>. 499 500 Examples: 501 >>> import mindspore.dataset.vision as vision 502 >>> from PIL import Image 503 >>> image = Image.open("/path/to/image_file") 504 >>> num_channels = vision.get_image_num_channels(image) 505 """ 506 507 if isinstance(image, np.ndarray): 508 return cde.get_image_num_channels(cde.Tensor(image)) 509 510 if isinstance(image, Image.Image): 511 if hasattr(image, "getbands"): 512 return len(image.getbands()) 513 514 return image.channels 515 516 raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, Image.Image, type(image))) 517 518 519def get_image_size(image): 520 """ 521 Get the size of input image as [height, width]. 522 523 Args: 524 image (Union[numpy.ndarray, PIL.Image.Image]): The image to get size. 525 526 Returns: 527 list[int, int], the image size. 528 529 Raises: 530 RuntimeError: If the dimension of `image` is less than 2. 531 TypeError: If `image` is not of type <class 'numpy.ndarray'> or <class 'PIL.Image.Image'>. 532 533 Examples: 534 >>> import mindspore.dataset.vision as vision 535 >>> from PIL import Image 536 >>> image = Image.open("/path/to/image_file") 537 >>> image_size = vision.get_image_size(image) 538 """ 539 540 if isinstance(image, np.ndarray): 541 return cde.get_image_size(cde.Tensor(image)) 542 if isinstance(image, Image.Image): 543 size_list = list(image.size) 544 size_list[0], size_list[1] = size_list[1], size_list[0] 545 return size_list 546 547 raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, Image.Image, type(image))) 548 549 550def parse_padding(padding): 551 """ Parses and prepares the padding tuple""" 552 553 if isinstance(padding, numbers.Number): 554 padding = [padding] * 4 555 if len(padding) == 2: 556 left = right = padding[0] 557 top = bottom = padding[1] 558 padding = (left, top, right, bottom,) 559 if isinstance(padding, list): 560 padding = tuple(padding) 561 return padding 562 563 564def read_file(filename): 565 """ 566 Read a file in binary mode. 567 568 Args: 569 filename(str): The path to the file to be read. 570 571 Returns: 572 numpy.ndarray, the one dimension uint8 data. 573 574 Raises: 575 TypeError: If `filename` is not of type str. 576 RuntimeError: If `filename` does not exist or is not a common file. 577 578 Supported Platforms: 579 ``CPU`` 580 581 Examples: 582 >>> import mindspore.dataset.vision as vision 583 >>> output = vision.read_file("/path/to/file") 584 """ 585 if isinstance(filename, str): 586 return cde.read_file(filename).as_array() 587 raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename))) 588 589 590def read_image(filename, mode=ImageReadMode.UNCHANGED): 591 """ 592 Read a image file and decode it into one channel grayscale data or RGB color data. 593 Supported file types are JPEG, PNG, BMP, TIFF. 594 595 Args: 596 filename(str): The path to the image file to be read. 597 mode(ImageReadMode, optional): The mode used for decoding the image. It can be 598 ``ImageReadMode.UNCHANGED``, ``ImageReadMode.GRAYSCALE``, ``IMageReadMode.COLOR``. 599 Default: ``ImageReadMode.UNCHANGED``. 600 601 - ImageReadMode.UNCHANGED, remain the output in the original format. 602 603 - ImageReadMode.GRAYSCALE, convert the output into one channel grayscale data. 604 605 - IMageReadMode.COLOR, convert the output into three channels RGB color data. 606 607 Returns: 608 numpy.ndarray, three dimensions uint8 data in the shape of (Height, Width, Channels). 609 610 Raises: 611 TypeError: If `filename` is not of type str. 612 TypeError: If `mode` is not of type :class:`mindspore.dataset.vision.ImageReadMode` . 613 RuntimeError: If `filename` does not exist, or not a regular file, or not a supported image file. 614 615 Supported Platforms: 616 ``CPU`` 617 618 Examples: 619 >>> import mindspore.dataset.vision as vision 620 >>> from mindspore.dataset.vision import ImageReadMode 621 >>> output = vision.read_image("/path/to/image_file", ImageReadMode.UNCHANGED) 622 """ 623 if not isinstance(filename, str): 624 raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename))) 625 if not isinstance(mode, ImageReadMode): 626 raise TypeError("Input mode is not of type {0}, but got: {1}.".format(ImageReadMode, type(mode))) 627 return cde.read_image(filename, ImageReadMode.to_c_type(mode)).as_array() 628 629 630def read_video(filename, start_pts=0, end_pts=None, pts_unit="pts"): 631 """ 632 Read the video, audio, metadata from a video file. 633 634 It supports AVI, H264, H265, MOV, MP4, WMV file formats. 635 636 Args: 637 filename(str): The path to the video file to be read. 638 start_pts(Union[float, Fraction, int], optional): The start presentation timestamp of the video. Default: 0. 639 end_pts(Union[float, Fraction, int], optional): The end presentation timestamp of the video. Default: None. 640 The None is represented by 2147483647. 641 pts_unit(str, optional): The unit of the timestamps. It can be any of ["pts", "sec"]. Default: "pts". 642 643 Returns: 644 - numpy.ndarray, four dimensions uint8 data for video. The format is [T, H, W, C]. `T` is the number of frames, 645 `H` is the height, `W` is the width, `C` is the channel for RGB. 646 - numpy.ndarray, two dimensions float for audio. The format is [C, L]. `C` is the number of channels. 647 `L` is the length of the points in one channel. 648 - dict, metadata for the video and audio. 649 It contains video_fps data of type float and audio_fps data of type int. 650 651 Raises: 652 TypeError: If `filename` is not of type str. 653 TypeError: If `start_pts` is not of type [float, Fraction, int]. 654 TypeError: If `end_pts` is not of type [float, Fraction, int]. 655 TypeError: If `pts_unit` is not of type str. 656 RuntimeError: If `filename` does not exist, or not a regular file, or not a supported video file. 657 ValueError: If `start_pts` is less than 0. 658 ValueError: If `end_pts` is less than `start_pts`. 659 ValueError: If `pts_unit` is not in ["pts", "sec"]. 660 661 Supported Platforms: 662 ``CPU`` 663 664 Examples: 665 >>> import mindspore.dataset.vision as vision 666 >>> video_output, audio_output, metadata_output = vision.read_video("/path/to/file") 667 """ 668 if not isinstance(filename, str): 669 raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename))) 670 if not isinstance(start_pts, (float, Fraction, int)): 671 raise TypeError("Input start_pts is not of type [{0}, {1}, {2}], but got: {3}".format(float, Fraction, int, 672 type(start_pts))) 673 if start_pts < 0.0: 674 err_msg = "Not supported start_pts for " + str(start_pts) + ". The start_pts should be >= 0." 675 raise ValueError(err_msg) 676 if end_pts is None: 677 end_pts = 2147483647.0 678 if not isinstance(end_pts, (float, Fraction, int)): 679 raise TypeError("Input end_pts is not of type [{0}, {1}, {2}], but got: {3}".format(float, Fraction, int, 680 type(end_pts))) 681 if end_pts < start_pts: 682 err_msg = "Not supported end_pts for " + str(end_pts) + ". start_pts = " + str(start_pts) + "." 683 err_msg += " The end_pts should be >= start_pts." 684 raise ValueError(err_msg) 685 if not isinstance(pts_unit, str): 686 raise TypeError("Input pts_unit is not of type {0}, but got: {1}.".format(str, type(pts_unit))) 687 if pts_unit not in ["pts", "sec"]: 688 raise ValueError("Not supported pts_unit for " + pts_unit) 689 690 video_output, audio_output, raw_metadata = cde.read_video(filename, float(start_pts), float(end_pts), pts_unit) 691 692 if video_output is not None: 693 video_output = video_output.as_array() 694 if audio_output is not None: 695 audio_output = audio_output.as_array() 696 metadata_output = {} 697 for key in raw_metadata: 698 if key == "video_fps": 699 metadata_output[key] = float(raw_metadata[key]) 700 continue 701 if key == "audio_fps": 702 metadata_output[key] = int(raw_metadata[key]) 703 continue 704 metadata_output[key] = raw_metadata[key] 705 return video_output, audio_output, metadata_output 706 707 708def read_video_timestamps(filename, pts_unit="pts"): 709 """ 710 Read the timestamps and frames per second of a video file. 711 It supports AVI, H264, H265, MOV, MP4, WMV files. 712 713 Args: 714 filename(str): The path to the video file to be read. 715 pts_unit(str, optional): The unit of the timestamps. It can be any of ["pts", "sec"]. Default: "pts". 716 717 Returns: 718 - list, when `pts_unit` is set to "pts", list[int] is returned, when `pts_unit` is set to "sec", 719 list[float] is returned. 720 - float, the frames per second of the video file. 721 722 Raises: 723 TypeError: If `filename` is not of type str. 724 TypeError: If `pts_unit` is not of type str. 725 RuntimeError: If `filename` does not exist, or not a regular file, or not a supported video file. 726 RuntimeError: If `pts_unit` is not in ["pts", "sec"]. 727 728 Supported Platforms: 729 ``CPU`` 730 731 Examples: 732 >>> import mindspore.dataset.vision as vision 733 >>> video_timestamps, video_fps = vision.read_video_timestamps("/path/to/file") 734 """ 735 if not isinstance(filename, str): 736 raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename))) 737 if not isinstance(pts_unit, str): 738 raise TypeError("Input pts_unit is not of type {0}, but got: {1}.".format(str, type(pts_unit))) 739 740 video_pts, video_fps, time_base = cde.read_video_timestamps(filename, pts_unit) 741 742 if video_pts == []: 743 return video_pts, None 744 if pts_unit == "pts": 745 return video_pts, video_fps 746 return [x * time_base for x in video_pts], video_fps 747 748 749def write_file(filename, data): 750 """ 751 Write the one dimension uint8 data into a file using binary mode. 752 753 Args: 754 filename (str): The path to the file to be written. 755 data (Union[numpy.ndarray, mindspore.Tensor]): The one dimension uint8 data to be written. 756 757 Raises: 758 TypeError: If `filename` is not of type str. 759 TypeError: If `data` is not of type numpy.ndarray or mindspore.Tensor. 760 RuntimeError: If the `filename` is not a common file. 761 RuntimeError: If the data type of `data` is not uint8. 762 RuntimeError: If the shape of `data` is not a one-dimensional array. 763 764 Supported Platforms: 765 ``CPU`` 766 767 Examples: 768 >>> import mindspore.dataset.vision as vision 769 >>> import numpy as np 770 >>> # Generate a random data with 1024 bytes 771 >>> data = np.random.randint(256, size=(1024), dtype=np.uint8) 772 >>> vision.write_file("/path/to/file", data) 773 """ 774 if not isinstance(filename, str): 775 raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename))) 776 if isinstance(data, np.ndarray): 777 return cde.write_file(filename, cde.Tensor(data)) 778 if isinstance(data, mindspore.Tensor): 779 return cde.write_file(filename, cde.Tensor(data.asnumpy())) 780 raise TypeError("Input data is not of type {0} or {1}, but got: {2}.".format(np.ndarray, 781 mindspore.Tensor, type(data))) 782 783 784def write_jpeg(filename, image, quality=75): 785 """ 786 Write the image data into a JPEG file. 787 788 Args: 789 filename (str): The path to the file to be written. 790 image (Union[numpy.ndarray, mindspore.Tensor]): The image data to be written. 791 quality (int, optional): Quality of the resulting JPEG file, in range of [1, 100]. Default: ``75``. 792 793 Raises: 794 TypeError: If `filename` is not of type str. 795 TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor. 796 TypeError: If `quality` is not of type int. 797 RuntimeError: If the `filename` does not exist or not a common file. 798 RuntimeError: If the data type of `image` is not uint8. 799 RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>. 800 RuntimeError: If `quality` is less than 1 or greater than 100. 801 802 Supported Platforms: 803 ``CPU`` 804 805 Examples: 806 >>> import mindspore.dataset.vision as vision 807 >>> import numpy as np 808 >>> # Generate a random image with height=120, width=340, channels=3 809 >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8) 810 >>> vision.write_jpeg("/path/to/file", image) 811 """ 812 if not isinstance(filename, str): 813 raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename))) 814 if not isinstance(quality, int): 815 raise TypeError("Input quality is not of type {0}, but got: {1}.".format(int, type(quality))) 816 if isinstance(image, np.ndarray): 817 return cde.write_jpeg(filename, cde.Tensor(image), quality) 818 if isinstance(image, mindspore.Tensor): 819 return cde.write_jpeg(filename, cde.Tensor(image.asnumpy()), quality) 820 raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, 821 mindspore.Tensor, type(image))) 822 823 824def write_png(filename, image, compression_level=6): 825 """ 826 Write the image into a PNG file. 827 828 Args: 829 filename (str): The path to the file to be written. 830 image (Union[numpy.ndarray, mindspore.Tensor]): The image data to be written. 831 compression_level (int, optional): Compression level for the resulting PNG file, in range of [0, 9]. 832 Default: ``6``. 833 834 Raises: 835 TypeError: If `filename` is not of type str. 836 TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor. 837 TypeError: If `compression_level` is not of type int. 838 RuntimeError: If the `filename` does not exist or not a common file. 839 RuntimeError: If the data type of `image` is not uint8. 840 RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>. 841 RuntimeError: If `compression_level` is less than 0 or greater than 9. 842 843 Supported Platforms: 844 ``CPU`` 845 846 Examples: 847 >>> import mindspore.dataset.vision as vision 848 >>> import numpy as np 849 >>> # Generate a random image with height=120, width=340, channels=3 850 >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8) 851 >>> vision.write_png("/path/to/file", image) 852 """ 853 if not isinstance(filename, str): 854 raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename))) 855 if not isinstance(compression_level, int): 856 raise TypeError("Input compression_level is not of type {0}, but got: {1}.".format(int, 857 type(compression_level))) 858 if isinstance(image, np.ndarray): 859 return cde.write_png(filename, cde.Tensor(image), compression_level) 860 if isinstance(image, mindspore.Tensor): 861 return cde.write_png(filename, cde.Tensor(image.asnumpy()), compression_level) 862 raise TypeError("The input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, 863 mindspore.Tensor, type(image))) 864