1# Copyright 2019-2024 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15""" 16The module vision.transforms provides many kinds of image augmentation methods 17and image-related conversion methods 18(e.g. including with PIL.Image.Image and numpy.ndarray). 19to perform various computer vision tasks. 20Users can apply suitable augmentations on image data 21to improve their training models. 22Users can also self-define their own augmentation methods with Python Pillow (PIL) 23 24For the different methods in this module, implementation is based in C++ and/or Python. 25The C++ implementation is inherited from mindspore._c_dataengine, provides high performance 26and is mainly based on OpenCV. 27The Python implementation is mainly based on PIL. 28 29.. Note:: 30 A constructor's arguments for every class in this module must be saved into the 31 class attributes (self.xxx) to support save() and load(). 32 33Examples: 34 >>> import mindspore.dataset as ds 35 >>> import mindspore.dataset.vision as vision 36 >>> from mindspore.dataset.vision import Border, Inter 37 >>> import mindspore.dataset.transforms as transforms 38 >>> 39 >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory" 40 >>> # create a dataset that reads all files in dataset_dir with 8 threads 41 >>> image_folder_dataset = ds.ImageFolderDataset(image_folder_dataset_dir, 42 ... num_parallel_workers=8) 43 >>> # create a list of transformations to be applied to the image data 44 >>> transforms_list = [vision.Decode(), 45 ... vision.Resize((256, 256), interpolation=Inter.LINEAR), 46 ... vision.RandomCrop(200, padding_mode=Border.EDGE), 47 ... vision.RandomRotation((0, 15)), 48 ... vision.Normalize((100, 115.0, 121.0), (71.0, 68.0, 70.0)), 49 ... vision.HWC2CHW()] 50 >>> onehot_op = transforms.OneHot(num_classes=10) 51 >>> # apply the transformation to the dataset through data1.map() 52 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 53 ... input_columns="image") 54 >>> image_folder_dataset = image_folder_dataset.map(operations=onehot_op, 55 ... input_columns="label") 56""" 57 58# pylint: disable=too-few-public-methods 59import numbers 60import random 61import numpy as np 62from PIL import Image 63 64import mindspore._c_dataengine as cde 65from mindspore._c_expression import typing 66from . import py_transforms_util as util 67from .py_transforms_util import is_pil 68from .utils import AutoAugmentPolicy, Border, ConvertMode, ImageBatchFormat, Inter, SliceMode, parse_padding 69from .validators import check_adjust_brightness, check_adjust_contrast, check_adjust_gamma, check_adjust_hue, \ 70 check_adjust_saturation, check_adjust_sharpness, check_affine, check_alpha, check_auto_augment, \ 71 check_auto_contrast, check_bounding_box_augment_cpp, check_center_crop, check_convert_color, check_crop, \ 72 check_cut_mix_batch_c, check_cutout_new, check_decode, check_erase, check_five_crop, check_gaussian_blur, \ 73 check_hsv_to_rgb, check_linear_transform, check_mix_up, check_mix_up_batch_c, check_normalize, \ 74 check_normalizepad, check_num_channels, check_pad, check_pad_to_size, check_perspective, check_positive_degrees, \ 75 check_posterize, check_prob, check_rand_augment, check_random_adjust_sharpness, check_random_affine, \ 76 check_random_auto_contrast, check_random_color_adjust, check_random_crop, check_random_erasing, \ 77 check_random_perspective, check_random_posterize, check_random_resize_crop, check_random_rotation, \ 78 check_random_select_subpolicy_op, check_random_solarize, check_range, check_rescale, check_resize, \ 79 check_resize_interpolation, check_resized_crop, check_rgb_to_hsv, check_rotate, check_slice_patches, \ 80 check_solarize, check_ten_crop, check_trivial_augment_wide, check_uniform_augment, check_to_tensor, \ 81 check_device_target, FLOAT_MAX_INTEGER 82from ..core.datatypes import mstype_to_detype, nptype_to_detype 83from ..transforms.py_transforms_util import Implementation 84from ..transforms.transforms import CompoundOperation, PyTensorOperation, TensorOperation, TypeCast 85 86 87class ImageTensorOperation(TensorOperation): 88 """ 89 Base class of Image Tensor Ops. 90 """ 91 92 def __call__(self, *input_tensor_list): 93 for tensor in input_tensor_list: 94 if not isinstance(tensor, (np.ndarray, Image.Image)): 95 raise TypeError( 96 "Input should be NumPy or PIL image, got {}.".format(type(tensor))) 97 return super().__call__(*input_tensor_list) 98 99 def parse(self): 100 # Note: subclasses must implement `def parse(self)` so do not make ImageTensorOperation's parse a staticmethod. 101 raise NotImplementedError("ImageTensorOperation has to implement parse() method.") 102 103 104class VideoTensorOperation(TensorOperation): 105 """ 106 Base class of Video Tensor Ops 107 """ 108 109 def __call__(self, *input_tensor_list): 110 for tensor in input_tensor_list: 111 if not isinstance(tensor, np.ndarray): 112 raise TypeError( 113 "Input should be ndarray, got {}.".format(type(tensor))) 114 return super().__call__(*input_tensor_list) 115 116 def parse(self): 117 raise NotImplementedError("VideoTensorOperation has to implement parse() method.") 118 119 120class AdjustBrightness(ImageTensorOperation, PyTensorOperation): 121 """ 122 Adjust the brightness of the input image. 123 124 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 125 126 Args: 127 brightness_factor (float): How much to adjust the brightness, must be non negative. 128 ``0`` gives a black image, ``1`` gives the original image, 129 while ``2`` increases the brightness by a factor of 2. 130 131 Raises: 132 TypeError: If `brightness_factor` is not of type float. 133 ValueError: If `brightness_factor` is less than 0. 134 RuntimeError: If shape of the input image is not <H, W, C>. 135 136 Supported Platforms: 137 ``CPU`` ``Ascend`` 138 139 Examples: 140 >>> import numpy as np 141 >>> import mindspore.dataset as ds 142 >>> import mindspore.dataset.vision as vision 143 >>> 144 >>> # Use the transform in dataset pipeline mode 145 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 146 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 147 >>> transforms_list = [vision.AdjustBrightness(brightness_factor=2.0)] 148 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 149 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 150 ... print(item["image"].shape, item["image"].dtype) 151 ... break 152 (100, 100, 3) uint8 153 >>> 154 >>> # Use the transform in eager mode 155 >>> data = np.random.randint(0, 256, (20, 20, 3)) / 255.0 156 >>> data = data.astype(np.float32) 157 >>> output = vision.AdjustBrightness(2.666)(data) 158 >>> print(output.shape, output.dtype) 159 (20, 20, 3) float32 160 161 Tutorial Examples: 162 - `Illustration of vision transforms 163 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 164 """ 165 166 @check_adjust_brightness 167 def __init__(self, brightness_factor): 168 super().__init__() 169 self.brightness_factor = brightness_factor 170 171 @check_device_target 172 def device(self, device_target="CPU"): 173 """ 174 Set the device for the current operator execution. 175 176 - When the device is Ascend, input shape should be limited from [4, 6] to [8192, 4096]. 177 178 Args: 179 device_target (str, optional): The operator will be executed on this device. Currently supports 180 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 181 182 Raises: 183 TypeError: If `device_target` is not of type str. 184 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 185 186 Supported Platforms: 187 ``CPU`` ``Ascend`` 188 189 Examples: 190 >>> import numpy as np 191 >>> import mindspore.dataset as ds 192 >>> import mindspore.dataset.vision as vision 193 >>> 194 >>> # Use the transform in dataset pipeline mode 195 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 196 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 197 >>> transforms_list = [vision.AdjustBrightness(2.0).device("Ascend")] 198 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 199 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 200 ... print(item["image"].shape, item["image"].dtype) 201 ... break 202 (100, 100, 3) uint8 203 >>> 204 >>> # Use the transform in eager mode 205 >>> data = np.random.randint(0, 256, (20, 20, 3)) / 255.0 206 >>> data = data.astype(np.float32) 207 >>> output = vision.AdjustBrightness(2.666).device("Ascend")(data) 208 >>> print(output.shape, output.dtype) 209 (20, 20, 3) float32 210 211 Tutorial Examples: 212 - `Illustration of vision transforms 213 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 214 """ 215 self.device_target = device_target 216 return self 217 218 def parse(self): 219 return cde.AdjustBrightnessOperation(self.brightness_factor, self.device_target) 220 221 def _execute_py(self, img): 222 """ 223 Execute method. 224 225 Args: 226 img (PIL Image): Image to be brightness adjusted. 227 228 Returns: 229 PIL Image, brightness adjusted image. 230 """ 231 return util.adjust_brightness(img, self.brightness_factor) 232 233 234class AdjustContrast(ImageTensorOperation, PyTensorOperation): 235 """ 236 Adjust the contrast of the input image. 237 238 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 239 240 Args: 241 contrast_factor (float): How much to adjust the contrast, must be non negative. 242 ``0`` gives a solid gray image, ``1`` gives the original image, 243 while ``2`` increases the contrast by a factor of 2. 244 245 Raises: 246 TypeError: If `contrast_factor` is not of type float. 247 ValueError: If `contrast_factor` is less than 0. 248 RuntimeError: If shape of the input image is not <H, W, C>. 249 250 Supported Platforms: 251 ``CPU`` ``Ascend`` 252 253 Examples: 254 >>> import numpy as np 255 >>> import mindspore.dataset as ds 256 >>> import mindspore.dataset.vision as vision 257 >>> 258 >>> # Use the transform in dataset pipeline mode 259 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 260 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 261 >>> transforms_list = [vision.AdjustContrast(contrast_factor=2.0)] 262 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 263 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 264 ... print(item["image"].shape, item["image"].dtype) 265 ... break 266 (100, 100, 3) uint8 267 >>> 268 >>> # Use the transform in eager mode 269 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 270 >>> output = vision.AdjustContrast(2.0)(data) 271 >>> print(output.shape, output.dtype) 272 (2, 2, 3) uint8 273 274 Tutorial Examples: 275 - `Illustration of vision transforms 276 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 277 """ 278 279 @check_adjust_contrast 280 def __init__(self, contrast_factor): 281 super().__init__() 282 self.contrast_factor = contrast_factor 283 284 @check_device_target 285 def device(self, device_target="CPU"): 286 """ 287 Set the device for the current operator execution. 288 289 - When the device is Ascend, input shape should be limited from [4, 6] to [8192, 4096]. 290 291 Args: 292 device_target (str, optional): The operator will be executed on this device. Currently supports 293 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 294 295 Raises: 296 TypeError: If `device_target` is not of type str. 297 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 298 299 Supported Platforms: 300 ``CPU`` ``Ascend`` 301 302 Examples: 303 >>> import numpy as np 304 >>> import mindspore.dataset as ds 305 >>> import mindspore.dataset.vision as vision 306 >>> 307 >>> # Use the transform in dataset pipeline mode 308 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 309 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 310 >>> transforms_list = [vision.AdjustContrast(0).device("Ascend")] 311 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 312 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 313 ... print(item["image"].shape, item["image"].dtype) 314 ... break 315 (100, 100, 3) uint8 316 >>> 317 >>> # Use the transform in eager mode 318 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 319 >>> output = vision.AdjustContrast(2.0).device("Ascend")(data) 320 >>> print(output.shape, output.dtype) 321 (100, 100, 3) uint8 322 323 Tutorial Examples: 324 - `Illustration of vision transforms 325 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 326 """ 327 self.device_target = device_target 328 return self 329 330 def parse(self): 331 return cde.AdjustContrastOperation(self.contrast_factor, self.device_target) 332 333 def _execute_py(self, img): 334 """ 335 Execute method. 336 337 Args: 338 img (PIL Image): Image to be contrast adjusted. 339 340 Returns: 341 PIL Image, contrast adjusted image. 342 """ 343 return util.adjust_contrast(img, self.contrast_factor) 344 345 346class AdjustGamma(ImageTensorOperation, PyTensorOperation): 347 r""" 348 Apply gamma correction on input image. Input image is expected to be in <..., H, W, C> or <H, W> format. 349 350 .. math:: 351 I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma} 352 353 See `Gamma Correction`_ for more details. 354 355 .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction 356 357 Args: 358 gamma (float): Non negative real number. 359 The output image pixel value is exponentially related to the input image pixel value. 360 gamma larger than 1 make the shadows darker, 361 while gamma smaller than 1 make dark regions lighter. 362 gain (float, optional): The constant multiplier. Default: ``1.0``. 363 364 Raises: 365 TypeError: If `gain` is not of type float. 366 TypeError: If `gamma` is not of type float. 367 ValueError: If `gamma` is less than 0. 368 RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>. 369 370 Supported Platforms: 371 ``CPU`` 372 373 Examples: 374 >>> import numpy as np 375 >>> import mindspore.dataset as ds 376 >>> import mindspore.dataset.vision as vision 377 >>> 378 >>> # Use the transform in dataset pipeline mode 379 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 380 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 381 >>> transforms_list = [vision.AdjustGamma(gamma=10.0, gain=1.0)] 382 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 383 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 384 ... print(item["image"].shape, item["image"].dtype) 385 ... break 386 (100, 100, 3) uint8 387 >>> 388 >>> # Use the transform in eager mode 389 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 390 >>> output = vision.AdjustGamma(gamma=0.1, gain=1.0)(data) 391 >>> print(output.shape, output.dtype) 392 (2, 2, 3) uint8 393 394 Tutorial Examples: 395 - `Illustration of vision transforms 396 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 397 """ 398 399 @check_adjust_gamma 400 def __init__(self, gamma, gain=1): 401 super().__init__() 402 self.gamma = gamma 403 self.gain = gain 404 self.random = False 405 406 def parse(self): 407 return cde.AdjustGammaOperation(self.gamma, self.gain) 408 409 def _execute_py(self, img): 410 """ 411 Execute method. 412 413 Args: 414 img (PIL Image): Image to be gamma adjusted. 415 416 Returns: 417 PIL Image, gamma adjusted image. 418 """ 419 return util.adjust_gamma(img, self.gamma, self.gain) 420 421 422class AdjustHue(ImageTensorOperation, PyTensorOperation): 423 """ 424 Adjust the hue of the input image. 425 426 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 427 428 Args: 429 hue_factor (float): How much to add to the hue channel, 430 must be in range of [-0.5, 0.5]. 431 432 Raises: 433 TypeError: If `hue_factor` is not of type float. 434 ValueError: If `hue_factor` is not in the interval [-0.5, 0.5]. 435 RuntimeError: If shape of the input image is not <H, W, C>. 436 437 Supported Platforms: 438 ``CPU`` ``Ascend`` 439 440 Examples: 441 >>> import numpy as np 442 >>> import mindspore.dataset as ds 443 >>> import mindspore.dataset.vision as vision 444 >>> 445 >>> # Use the transform in dataset pipeline mode 446 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 447 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 448 >>> transforms_list = [vision.AdjustHue(hue_factor=0.2)] 449 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 450 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 451 ... print(item["image"].shape, item["image"].dtype) 452 ... break 453 (100, 100, 3) uint8 454 >>> 455 >>> # Use the transform in eager mode 456 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 457 >>> output = vision.AdjustHue(hue_factor=0.2)(data) 458 >>> print(output.shape, output.dtype) 459 (2, 2, 3) uint8 460 461 Tutorial Examples: 462 - `Illustration of vision transforms 463 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 464 """ 465 466 @check_adjust_hue 467 def __init__(self, hue_factor): 468 super().__init__() 469 self.hue_factor = hue_factor 470 471 @check_device_target 472 def device(self, device_target="CPU"): 473 """ 474 Set the device for the current operator execution. 475 476 - When the device is Ascend, input shape should be limited from [4, 6] to [8192, 4096]. 477 478 Args: 479 device_target (str, optional): The operator will be executed on this device. Currently supports 480 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 481 482 Raises: 483 TypeError: If `device_target` is not of type str. 484 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 485 486 Supported Platforms: 487 ``CPU`` ``Ascend`` 488 489 Examples: 490 >>> import numpy as np 491 >>> import mindspore.dataset as ds 492 >>> import mindspore.dataset.vision as vision 493 >>> 494 >>> # Use the transform in dataset pipeline mode 495 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 496 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 497 >>> transforms_list = [vision.AdjustHue(0.5).device("Ascend")] 498 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 499 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 500 ... print(item["image"].shape, item["image"].dtype) 501 ... break 502 (100, 100, 3) uint8 503 >>> 504 >>> # Use the transform in eager mode 505 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 506 >>> output = vision.AdjustHue(hue_factor=0.2).device("Ascend")(data) 507 >>> print(output.shape, output.dtype) 508 (100, 100, 3) uint8 509 510 Tutorial Examples: 511 - `Illustration of vision transforms 512 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 513 """ 514 self.device_target = device_target 515 return self 516 517 def parse(self): 518 return cde.AdjustHueOperation(self.hue_factor, self.device_target) 519 520 def _execute_py(self, img): 521 """ 522 Execute method. 523 524 Args: 525 img (PIL Image): Image to be hue adjusted. 526 527 Returns: 528 PIL Image, hue adjusted image. 529 """ 530 return util.adjust_hue(img, self.hue_factor) 531 532 533class AdjustSaturation(ImageTensorOperation, PyTensorOperation): 534 """ 535 Adjust the saturation of the input image. 536 537 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 538 539 Args: 540 saturation_factor (float): How much to adjust the saturation, must be non negative. 541 ``0`` gives a black image, ``1`` gives the original image 542 while ``2`` increases the saturation by a factor of 2. 543 544 Raises: 545 TypeError: If `saturation_factor` is not of type float. 546 ValueError: If `saturation_factor` is less than 0. 547 RuntimeError: If shape of the input image is not <H, W, C>. 548 RuntimeError: If channel of the input image is not 3. 549 550 Supported Platforms: 551 ``CPU`` ``Ascend`` 552 553 Examples: 554 >>> import numpy as np 555 >>> import mindspore.dataset as ds 556 >>> import mindspore.dataset.vision as vision 557 >>> 558 >>> # Use the transform in dataset pipeline mode 559 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 560 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 561 >>> transforms_list = [vision.AdjustSaturation(saturation_factor=2.0)] 562 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 563 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 564 ... print(item["image"].shape, item["image"].dtype) 565 ... break 566 (100, 100, 3) uint8 567 >>> 568 >>> # Use the transform in eager mode 569 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 570 >>> output = vision.AdjustSaturation(saturation_factor=2.0)(data) 571 >>> print(output.shape, output.dtype) 572 (2, 2, 3) uint8 573 574 Tutorial Examples: 575 - `Illustration of vision transforms 576 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 577 """ 578 579 @check_adjust_saturation 580 def __init__(self, saturation_factor): 581 super().__init__() 582 self.saturation_factor = saturation_factor 583 584 @check_device_target 585 def device(self, device_target="CPU"): 586 """ 587 Set the device for the current operator execution. 588 589 - When the device is Ascend, input shape should be limited from [4, 6] to [8192, 4096]. 590 591 Args: 592 device_target (str, optional): The operator will be executed on this device. Currently supports 593 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 594 595 Raises: 596 TypeError: If `device_target` is not of type str. 597 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 598 599 Supported Platforms: 600 ``CPU`` ``Ascend`` 601 602 Examples: 603 >>> import numpy as np 604 >>> import mindspore.dataset as ds 605 >>> import mindspore.dataset.vision as vision 606 >>> 607 >>> # Use the transform in dataset pipeline mode 608 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 609 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 610 >>> transforms_list = [vision.AdjustSaturation(2.0).device("Ascend")] 611 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 612 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 613 ... print(item["image"].shape, item["image"].dtype) 614 ... break 615 (100, 100, 3) uint8 616 >>> 617 >>> # Use the transform in eager mode 618 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 619 >>> output = vision.AdjustSaturation(saturation_factor=2.0).device("Ascend")(data) 620 >>> print(output.shape, output.dtype) 621 (100, 100, 3) uint8 622 623 Tutorial Examples: 624 - `Illustration of vision transforms 625 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 626 """ 627 self.device_target = device_target 628 return self 629 630 def parse(self): 631 return cde.AdjustSaturationOperation(self.saturation_factor, self.device_target) 632 633 def _execute_py(self, img): 634 """ 635 Execute method. 636 637 Args: 638 img (PIL Image): Image to be saturation adjusted. 639 640 Returns: 641 PIL Image, saturation adjusted image. 642 """ 643 return util.adjust_saturation(img, self.saturation_factor) 644 645 646class AdjustSharpness(ImageTensorOperation): 647 """ 648 Adjust the sharpness of the input image. 649 650 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 651 652 Args: 653 sharpness_factor (float): How much to adjust the sharpness, must be 654 non negative. ``0`` gives a blurred image, ``1`` gives the 655 original image while ``2`` increases the sharpness by a factor of 2. 656 657 Raises: 658 TypeError: If `sharpness_factor` is not of type float. 659 ValueError: If `sharpness_factor` is less than 0. 660 RuntimeError: If shape of the input image is not <H, W> or <H, W, C>. 661 662 Supported Platforms: 663 ``CPU`` ``Ascend`` 664 665 Examples: 666 >>> import numpy as np 667 >>> import mindspore.dataset as ds 668 >>> import mindspore.dataset.vision as vision 669 >>> 670 >>> # Use the transform in dataset pipeline mode 671 >>> # create a dataset that reads all files in dataset_dir with 8 threads 672 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 673 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 674 >>> transforms_list = [vision.AdjustSharpness(sharpness_factor=2.0)] 675 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 676 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 677 ... print(item["image"].shape, item["image"].dtype) 678 ... break 679 (100, 100, 3) uint8 680 >>> 681 >>> # Use the transform in eager mode 682 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((3, 4)) 683 >>> output = vision.AdjustSharpness(sharpness_factor=0)(data) 684 >>> print(output.shape, output.dtype) 685 (3, 4) uint8 686 687 Tutorial Examples: 688 - `Illustration of vision transforms 689 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 690 """ 691 692 @check_adjust_sharpness 693 def __init__(self, sharpness_factor): 694 super().__init__() 695 self.sharpness_factor = sharpness_factor 696 self.implementation = Implementation.C 697 698 @check_device_target 699 def device(self, device_target="CPU"): 700 """ 701 Set the device for the current operator execution. 702 703 - When the device is Ascend, input type supports `uint8` or `float32` , input channel supports 1 and 3. 704 The input data has a height limit of [4, 8192] and a width limit of [6, 4096]. 705 706 Args: 707 device_target (str, optional): The operator will be executed on this device. Currently supports 708 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 709 710 Raises: 711 TypeError: If `device_target` is not of type str. 712 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 713 714 Supported Platforms: 715 ``CPU`` ``Ascend`` 716 717 Examples: 718 >>> import numpy as np 719 >>> import mindspore.dataset as ds 720 >>> import mindspore.dataset.vision as vision 721 >>> 722 >>> # Use the transform in dataset pipeline mode 723 >>> # create a dataset that reads all files in dataset_dir with 8 threads 724 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 725 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 726 >>> transforms_list = [vision.AdjustSharpness(sharpness_factor=2.0).device("Ascend")] 727 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 728 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 729 ... print(item["image"].shape, item["image"].dtype) 730 ... break 731 (100, 100, 3) uint8 732 >>> 733 >>> # Use the transform in eager mode 734 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 735 >>> output = vision.AdjustSharpness(sharpness_factor=0).device("Ascend")(data) 736 >>> print(output.shape, output.dtype) 737 (100, 100, 3) uint8 738 739 Tutorial Examples: 740 - `Illustration of vision transforms 741 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 742 """ 743 self.device_target = device_target 744 return self 745 746 def parse(self): 747 return cde.AdjustSharpnessOperation(self.sharpness_factor, self.device_target) 748 749 750class Affine(ImageTensorOperation): 751 """ 752 Apply Affine transformation to the input image, keeping the center of the image unchanged. 753 754 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 755 756 Args: 757 degrees (float): Rotation angle in degrees between -180 and 180, clockwise direction. 758 translate (Sequence[float, float]): The horizontal and vertical translations, must be a sequence of size 2 759 and value between -1 and 1. 760 scale (float): Scaling factor, which must be positive. 761 shear (Union[float, Sequence[float, float]]): Shear angle value in degrees between -180 to 180. 762 If float is provided, shear along the x axis with this value, without shearing along the y axis; 763 If Sequence[float, float] is provided, shear along the x axis and y axis with these two values separately. 764 resample (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 765 Default: ``Inter.NEAREST``. 766 fill_value (Union[int, tuple[int, int, int]], optional): Optional `fill_value` to fill the area 767 outside the transform in the output image. There must be three elements in tuple and the value 768 of single element is [0, 255]. Default: ``0``. 769 770 Raises: 771 TypeError: If `degrees` is not of type float. 772 TypeError: If `translate` is not of type Sequence[float, float]. 773 TypeError: If `scale` is not of type float. 774 ValueError: If `scale` is non positive. 775 TypeError: If `shear` is not of float or Sequence[float, float]. 776 TypeError: If `resample` is not of type :class:`~.vision.Inter` . 777 TypeError: If `fill_value` is not of type int or tuple[int, int, int]. 778 RuntimeError: If shape of the input image is not <H, W> or <H, W, C>. 779 780 Supported Platforms: 781 ``CPU`` ``Ascend`` 782 783 Examples: 784 >>> import numpy as np 785 >>> import mindspore.dataset as ds 786 >>> import mindspore.dataset.vision as vision 787 >>> from mindspore.dataset.vision import Inter 788 >>> 789 >>> # Use the transform in dataset pipeline mode 790 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 791 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 792 >>> affine_op = vision.Affine(degrees=15, translate=[0.2, 0.2], scale=1.1, shear=[1.0, 1.0], 793 ... resample=Inter.BILINEAR) 794 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[affine_op], input_columns=["image"]) 795 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 796 ... print(item["image"].shape, item["image"].dtype) 797 ... break 798 (100, 100, 3) uint8 799 >>> 800 >>> # Use the transform in eager mode 801 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 802 >>> output = vision.Affine(degrees=15, translate=[0.2, 0.2], scale=1.1, 803 ... shear=[1.0, 1.0], resample=Inter.BILINEAR)(data) 804 >>> print(output.shape, output.dtype) 805 (2, 2, 3) uint8 806 807 Tutorial Examples: 808 - `Illustration of vision transforms 809 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 810 """ 811 812 @check_affine 813 def __init__(self, degrees, translate, scale, shear, resample=Inter.NEAREST, fill_value=0): 814 super().__init__() 815 # Parameter checking 816 if isinstance(shear, numbers.Number): 817 shear = (shear, 0.) 818 819 if isinstance(fill_value, numbers.Number): 820 fill_value = (fill_value, fill_value, fill_value) 821 822 self.degrees = degrees 823 self.translate = translate 824 self.scale_ = scale 825 self.shear = shear 826 self.resample = resample 827 self.fill_value = fill_value 828 self.implementation = Implementation.C 829 830 @check_device_target 831 def device(self, device_target="CPU"): 832 """ 833 Set the device for the current operator execution. 834 835 - When the device is Ascend, input shape should be limited from [4, 6] to [32768, 32768]. 836 837 Args: 838 device_target (str, optional): The operator will be executed on this device. Currently supports 839 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 840 841 Raises: 842 TypeError: If `device_target` is not of type str. 843 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 844 845 Supported Platforms: 846 ``CPU`` ``Ascend`` 847 848 Examples: 849 >>> import numpy as np 850 >>> import mindspore.dataset as ds 851 >>> import mindspore.dataset.vision as vision 852 >>> from mindspore.dataset.vision import Inter 853 >>> 854 >>> # Use the transform in dataset pipeline mode 855 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 856 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 857 >>> affine_op = vision.Affine(degrees=15, translate=[0.2, 0.2], scale=1.1, 858 ... shear=[1.0, 1.0], resample=Inter.BILINEAR).device("Ascend") 859 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[affine_op], input_columns=["image"]) 860 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 861 ... print(item["image"].shape, item["image"].dtype) 862 ... break 863 (100, 100, 3) uint8 864 >>> 865 >>> # Use the transform in eager mode 866 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 867 >>> output = vision.Affine(degrees=15, translate=[0.2, 0.2], scale=1.1, 868 ... shear=[1.0, 1.0], resample=Inter.BILINEAR).device("Ascend")(data) 869 >>> print(output.shape, output.dtype) 870 (100, 100, 3) uint8 871 872 Tutorial Examples: 873 - `Illustration of vision transforms 874 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 875 """ 876 self.device_target = device_target 877 if self.resample not in [Inter.BILINEAR, Inter.NEAREST] and self.device_target == "Ascend": 878 raise RuntimeError("Invalid interpolation mode, only support BILINEAR and NEAREST.") 879 return self 880 881 def parse(self): 882 return cde.AffineOperation(self.degrees, self.translate, self.scale_, self.shear, 883 Inter.to_c_type(self.resample), self.fill_value, self.device_target) 884 885 886class AutoAugment(ImageTensorOperation): 887 """ 888 Apply AutoAugment data augmentation method based on 889 `AutoAugment: Learning Augmentation Strategies from Data <https://arxiv.org/pdf/1805.09501.pdf>`_ . 890 This operation works only with 3-channel RGB images. 891 892 Args: 893 policy (AutoAugmentPolicy, optional): AutoAugment policies learned on different datasets. 894 Default: ``AutoAugmentPolicy.IMAGENET``. 895 It can be ``AutoAugmentPolicy.IMAGENET``, ``AutoAugmentPolicy.CIFAR10``, ``AutoAugmentPolicy.SVHN``. 896 Randomly apply 2 operations from a candidate set. See auto augmentation details in AutoAugmentPolicy. 897 898 - ``AutoAugmentPolicy.IMAGENET``, means to apply AutoAugment learned on ImageNet dataset. 899 900 - ``AutoAugmentPolicy.CIFAR10``, means to apply AutoAugment learned on Cifar10 dataset. 901 902 - ``AutoAugmentPolicy.SVHN``, means to apply AutoAugment learned on SVHN dataset. 903 904 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 905 Default: ``Inter.NEAREST``. 906 fill_value (Union[int, tuple[int]], optional): Pixel fill value for the area outside the transformed image. 907 It can be an int or a 3-tuple. If it is a 3-tuple, it is used to fill R, G, B channels respectively. 908 If it is an integer, it is used for all RGB channels. The fill_value values must be in range [0, 255]. 909 Default: ``0``. 910 911 Raises: 912 TypeError: If `policy` is not of type :class:`mindspore.dataset.vision.AutoAugmentPolicy` . 913 TypeError: If `interpolation` is not of type :class:`~.vision.Inter` . 914 TypeError: If `fill_value` is not an integer or a tuple of length 3. 915 RuntimeError: If given tensor shape is not <H, W, C>. 916 917 Supported Platforms: 918 ``CPU`` 919 920 Examples: 921 >>> import numpy as np 922 >>> import mindspore.dataset as ds 923 >>> import mindspore.dataset.vision as vision 924 >>> from mindspore.dataset.vision import AutoAugmentPolicy, Inter 925 >>> 926 >>> # Use the transform in dataset pipeline mode 927 >>> transforms_list = [vision.AutoAugment(policy=AutoAugmentPolicy.IMAGENET, 928 ... interpolation=Inter.NEAREST, 929 ... fill_value=0)] 930 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 931 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 932 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 933 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 934 ... print(item["image"].shape, item["image"].dtype) 935 ... break 936 (100, 100, 3) uint8 937 >>> 938 >>> # Use the transform in eager mode 939 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 940 >>> output = vision.AutoAugment()(data) 941 >>> print(output.shape, output.dtype) 942 (100, 100, 3) uint8 943 944 Tutorial Examples: 945 - `Illustration of vision transforms 946 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 947 """ 948 949 @check_auto_augment 950 def __init__(self, policy=AutoAugmentPolicy.IMAGENET, interpolation=Inter.NEAREST, fill_value=0): 951 super().__init__() 952 self.policy = policy 953 self.interpolation = interpolation 954 if isinstance(fill_value, int): 955 fill_value = tuple([fill_value] * 3) 956 self.fill_value = fill_value 957 self.implementation = Implementation.C 958 959 def parse(self): 960 return cde.AutoAugmentOperation(AutoAugmentPolicy.to_c_type(self.policy), Inter.to_c_type(self.interpolation), 961 self.fill_value) 962 963 964class AutoContrast(ImageTensorOperation, PyTensorOperation): 965 """ 966 Apply automatic contrast on input image. This operation calculates histogram of image, reassign cutoff percent 967 of the lightest pixels from histogram to 255, and reassign cutoff percent of the darkest pixels from histogram to 0. 968 969 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 970 971 Args: 972 cutoff (float, optional): Percent of lightest and darkest pixels to cut off from 973 the histogram of input image. The value must be in the range [0.0, 50.0]. Default: ``0.0``. 974 ignore (Union[int, sequence], optional): The background pixel values to ignore, 975 The ignore values must be in range [0, 255]. Default: ``None``. 976 977 Raises: 978 TypeError: If `cutoff` is not of type float. 979 TypeError: If `ignore` is not of type int or sequence. 980 ValueError: If `cutoff` is not in range [0, 50.0). 981 ValueError: If `ignore` is not in range [0, 255]. 982 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 983 984 Supported Platforms: 985 ``CPU`` ``Ascend`` 986 987 Examples: 988 >>> import numpy as np 989 >>> import mindspore.dataset as ds 990 >>> import mindspore.dataset.vision as vision 991 >>> 992 >>> # Use the transform in dataset pipeline mode 993 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 994 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 995 >>> transforms_list = [vision.AutoContrast(cutoff=10.0, ignore=[10, 20])] 996 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 997 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 998 ... print(item["image"].shape, item["image"].dtype) 999 ... break 1000 (100, 100, 3) uint8 1001 >>> 1002 >>> # Use the transform in eager mode 1003 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 1004 >>> output = vision.AutoContrast(cutoff=10.0, ignore=[10, 20])(data) 1005 >>> print(output.shape, output.dtype) 1006 (2, 2, 3) uint8 1007 1008 Tutorial Examples: 1009 - `Illustration of vision transforms 1010 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1011 """ 1012 1013 @check_auto_contrast 1014 def __init__(self, cutoff=0.0, ignore=None): 1015 super().__init__() 1016 if ignore is None: 1017 ignore = [] 1018 if isinstance(ignore, int): 1019 ignore = [ignore] 1020 self.cutoff = cutoff 1021 self.ignore = ignore 1022 self.random = False 1023 1024 @check_device_target 1025 def device(self, device_target="CPU"): 1026 """ 1027 Set the device for the current operator execution. 1028 1029 - When the device is Ascend, input type supports `uint8` or `float32` , input channel supports 1 and 3. 1030 If the data type is float32, the expected input value is in the range [0, 1]. 1031 The input data has a height limit of [4, 8192] and a width limit of [6, 4096]. 1032 1033 Args: 1034 device_target (str, optional): The operator will be executed on this device. Currently supports 1035 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 1036 1037 Raises: 1038 TypeError: If `device_target` is not of type str. 1039 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 1040 1041 Supported Platforms: 1042 ``CPU`` ``Ascend`` 1043 1044 Examples: 1045 >>> import numpy as np 1046 >>> import mindspore.dataset as ds 1047 >>> import mindspore.dataset.vision as vision 1048 >>> 1049 >>> # Use the transform in dataset pipeline mode 1050 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 1051 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1052 >>> transforms_list = [vision.AutoContrast(cutoff=10.0, ignore=[10, 20]).device("Ascend")] 1053 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 1054 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1055 ... print(item["image"].shape, item["image"].dtype) 1056 ... break 1057 (100, 100, 3) uint8 1058 >>> 1059 >>> # Use the transform in eager mode 1060 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 1061 >>> output = vision.AutoContrast(cutoff=10.0, ignore=[10, 20]).device("Ascend")(data) 1062 >>> print(output.shape, output.dtype) 1063 (100, 100, 3) uint8 1064 1065 Tutorial Examples: 1066 - `Illustration of vision transforms 1067 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1068 """ 1069 self.device_target = device_target 1070 return self 1071 1072 def parse(self): 1073 return cde.AutoContrastOperation(self.cutoff, self.ignore, self.device_target) 1074 1075 def _execute_py(self, img): 1076 """ 1077 Execute method. 1078 1079 Args: 1080 img (PIL Image): Image to be automatically contrasted. 1081 1082 Returns: 1083 PIL Image, automatically contrasted image. 1084 """ 1085 return util.auto_contrast(img, self.cutoff, self.ignore) 1086 1087 1088class BoundingBoxAugment(ImageTensorOperation): 1089 """ 1090 Apply a given image processing operation on a random selection of bounding box regions of a given image. 1091 1092 Args: 1093 transform (TensorOperation): Transformation operation to be applied on random selection 1094 of bounding box regions of a given image. 1095 ratio (float, optional): Ratio of bounding boxes to apply augmentation on. 1096 Range: [0.0, 1.0]. Default: ``0.3``. 1097 1098 Raises: 1099 TypeError: If `transform` is an image processing operation in `mindspore.dataset.vision` . 1100 TypeError: If `ratio` is not of type float. 1101 ValueError: If `ratio` is not in range [0.0, 1.0]. 1102 RuntimeError: If given bounding box is invalid. 1103 1104 Supported Platforms: 1105 ``CPU`` 1106 1107 Examples: 1108 >>> import numpy as np 1109 >>> import mindspore.dataset as ds 1110 >>> import mindspore.dataset.vision as vision 1111 >>> 1112 >>> # Use the transform in dataset pipeline mode 1113 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 1114 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1115 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32)) 1116 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func], 1117 ... input_columns=["image"], 1118 ... output_columns=["image", "bbox"]) 1119 >>> # set bounding box operation with ratio of 1 to apply rotation on all bounding boxes 1120 >>> bbox_aug_op = vision.BoundingBoxAugment(vision.RandomRotation(90), 1) 1121 >>> # map to apply ops 1122 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[bbox_aug_op], 1123 ... input_columns=["image", "bbox"], 1124 ... output_columns=["image", "bbox"]) 1125 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1126 ... print(item["image"].shape, item["image"].dtype) 1127 ... print(item["bbox"].shape, item["bbox"].dtype) 1128 ... break 1129 (100, 100, 3) float32 1130 (1, 4) float32 1131 >>> 1132 >>> # Use the transform in eager mode 1133 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((3, 4)) 1134 >>> data = data.astype(np.float32) 1135 >>> func = lambda img, bboxes: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(bboxes.dtype)) 1136 >>> func_data, func_bboxes = func(data, data) 1137 >>> output = vision.BoundingBoxAugment(transforms.Fill(100), 1.0)(func_data, func_bboxes) 1138 >>> print(output[0].shape, output[0].dtype) 1139 (3, 4) float32 1140 >>> print(output[1].shape, output[1].dtype) 1141 (1, 4) float32 1142 1143 Tutorial Examples: 1144 - `Illustration of vision transforms 1145 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1146 """ 1147 1148 @check_bounding_box_augment_cpp 1149 def __init__(self, transform, ratio=0.3): 1150 super().__init__() 1151 self.ratio = ratio 1152 self.transform = transform 1153 self.implementation = Implementation.C 1154 1155 def parse(self): 1156 if self.transform and getattr(self.transform, 'parse', None): 1157 transform = self.transform.parse() 1158 else: 1159 transform = self.transform 1160 return cde.BoundingBoxAugmentOperation(transform, self.ratio) 1161 1162 1163class CenterCrop(ImageTensorOperation, PyTensorOperation): 1164 """ 1165 Crop the input image at the center to the given size. If input image size is smaller than output size, 1166 input image will be padded with 0 before cropping. 1167 1168 Args: 1169 size (Union[int, sequence]): The output size of the cropped image. 1170 If size is an integer, a square crop of size (size, size) is returned. 1171 If size is a sequence of length 2, it should be (height, width). 1172 The size value(s) must be larger than 0. 1173 1174 Raises: 1175 TypeError: If `size` is not of type integer or sequence. 1176 ValueError: If `size` is less than or equal to 0. 1177 RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>. 1178 1179 Supported Platforms: 1180 ``CPU`` 1181 1182 Examples: 1183 >>> import numpy as np 1184 >>> import mindspore.dataset as ds 1185 >>> import mindspore.dataset.vision as vision 1186 >>> 1187 >>> # Use the transform in dataset pipeline mode 1188 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 1189 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1190 >>> 1191 >>> # crop image to a square 1192 >>> transforms_list1 = [vision.CenterCrop(50)] 1193 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list1, input_columns=["image"]) 1194 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1195 ... print(item["image"].shape, item["image"].dtype) 1196 ... break 1197 (50, 50, 3) uint8 1198 >>> 1199 >>> # crop image to portrait style 1200 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1201 >>> transforms_list2 = [vision.CenterCrop((60, 40))] 1202 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list2, input_columns=["image"]) 1203 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1204 ... print(item["image"].shape, item["image"].dtype) 1205 ... break 1206 (60, 40, 3) uint8 1207 >>> 1208 >>> # Use the transform in eager mode 1209 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 1210 >>> output = vision.CenterCrop(1)(data) 1211 >>> print(output.shape, output.dtype) 1212 (1, 1, 3) uint8 1213 1214 Tutorial Examples: 1215 - `Illustration of vision transforms 1216 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1217 """ 1218 1219 @check_center_crop 1220 def __init__(self, size): 1221 super().__init__() 1222 if isinstance(size, int): 1223 size = (size, size) 1224 self.size = size 1225 self.random = False 1226 1227 def parse(self): 1228 return cde.CenterCropOperation(self.size) 1229 1230 def _execute_py(self, img): 1231 """ 1232 Execute method. 1233 1234 Args: 1235 img (PIL Image): Image to be center cropped. 1236 1237 Returns: 1238 PIL Image, cropped image. 1239 """ 1240 return util.center_crop(img, self.size) 1241 1242 1243class ConvertColor(ImageTensorOperation): 1244 """ 1245 Change the color space of the image. 1246 1247 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 1248 1249 Args: 1250 convert_mode (ConvertMode): The mode of image channel conversion. 1251 1252 - ConvertMode.COLOR_BGR2BGRA, Convert BGR image to BGRA image. 1253 1254 - ConvertMode.COLOR_RGB2RGBA, Convert RGB image to RGBA image. 1255 1256 - ConvertMode.COLOR_BGRA2BGR, Convert BGRA image to BGR image. 1257 1258 - ConvertMode.COLOR_RGBA2RGB, Convert RGBA image to RGB image. 1259 1260 - ConvertMode.COLOR_BGR2RGBA, Convert BGR image to RGBA image. 1261 1262 - ConvertMode.COLOR_RGB2BGRA, Convert RGB image to BGRA image. 1263 1264 - ConvertMode.COLOR_RGBA2BGR, Convert RGBA image to BGR image. 1265 1266 - ConvertMode.COLOR_BGRA2RGB, Convert BGRA image to RGB image. 1267 1268 - ConvertMode.COLOR_BGR2RGB, Convert BGR image to RGB image. 1269 1270 - ConvertMode.COLOR_RGB2BGR, Convert RGB image to BGR image. 1271 1272 - ConvertMode.COLOR_BGRA2RGBA, Convert BGRA image to RGBA image. 1273 1274 - ConvertMode.COLOR_RGBA2BGRA, Convert RGBA image to BGRA image. 1275 1276 - ConvertMode.COLOR_BGR2GRAY, Convert BGR image to GRAY image. 1277 1278 - ConvertMode.COLOR_RGB2GRAY, Convert RGB image to GRAY image. 1279 1280 - ConvertMode.COLOR_GRAY2BGR, Convert GRAY image to BGR image. 1281 1282 - ConvertMode.COLOR_GRAY2RGB, Convert GRAY image to RGB image. 1283 1284 - ConvertMode.COLOR_GRAY2BGRA, Convert GRAY image to BGRA image. 1285 1286 - ConvertMode.COLOR_GRAY2RGBA, Convert GRAY image to RGBA image. 1287 1288 - ConvertMode.COLOR_BGRA2GRAY, Convert BGRA image to GRAY image. 1289 1290 - ConvertMode.COLOR_RGBA2GRAY, Convert RGBA image to GRAY image. 1291 1292 Raises: 1293 TypeError: If `convert_mode` is not of type :class:`mindspore.dataset.vision.ConvertMode` . 1294 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 1295 1296 Supported Platforms: 1297 ``CPU`` ``Ascend`` 1298 1299 Examples: 1300 >>> import numpy as np 1301 >>> import mindspore.dataset as ds 1302 >>> import mindspore.dataset.vision as vision 1303 >>> 1304 >>> # Use the transform in dataset pipeline mode 1305 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 1306 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1307 >>> 1308 >>> # Convert RGB images to GRAY images 1309 >>> convert_op = vision.ConvertColor(vision.ConvertMode.COLOR_RGB2GRAY) 1310 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=convert_op, input_columns=["image"]) 1311 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1312 ... print(item["image"].shape, item["image"].dtype) 1313 ... break 1314 (100, 100) uint8 1315 >>> # Convert RGB images to BGR images 1316 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1317 >>> convert_op = vision.ConvertColor(vision.ConvertMode.COLOR_RGB2BGR) 1318 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=convert_op, input_columns=["image"]) 1319 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1320 ... print(item["image"].shape, item["image"].dtype) 1321 ... break 1322 (100, 100, 3) uint8 1323 >>> 1324 >>> # Use the transform in eager mode 1325 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 1326 >>> output = vision.ConvertColor(vision.ConvertMode.COLOR_RGB2GRAY)(data) 1327 >>> print(output.shape, output.dtype) 1328 (100, 100) uint8 1329 1330 Tutorial Examples: 1331 - `Illustration of vision transforms 1332 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1333 """ 1334 1335 @check_convert_color 1336 def __init__(self, convert_mode): 1337 super().__init__() 1338 self.convert_mode = convert_mode 1339 self.implementation = Implementation.C 1340 1341 @check_device_target 1342 def device(self, device_target="CPU"): 1343 """ 1344 Set the device for the current operator execution. 1345 1346 - When the device is Ascend, input type only supports `uint8` , input channel supports 1 and 3. 1347 The input data has a height limit of [4, 8192] and a width limit of [6, 4096]. 1348 1349 Args: 1350 device_target (str, optional): The operator will be executed on this device. Currently supports 1351 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 1352 1353 Raises: 1354 TypeError: If `device_target` is not of type str. 1355 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 1356 1357 Supported Platforms: 1358 ``CPU`` ``Ascend`` 1359 1360 Examples: 1361 >>> import numpy as np 1362 >>> import mindspore.dataset as ds 1363 >>> import mindspore.dataset.vision as vision 1364 >>> 1365 >>> # Use the transform in dataset pipeline mode 1366 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 1367 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1368 >>> transforms_list = [vision.ConvertColor(vision.ConvertMode.COLOR_RGB2BGR).device("Ascend")] 1369 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 1370 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1371 ... print(item["image"].shape, item["image"].dtype) 1372 ... break 1373 (100, 100, 3) uint8 1374 >>> 1375 >>> # Use the transform in eager mode 1376 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 1377 >>> output = vision.ConvertColor(vision.ConvertMode.COLOR_RGB2BGR).device("Ascend")(data) 1378 >>> print(output.shape, output.dtype) 1379 (100, 100, 3) uint8 1380 1381 Tutorial Examples: 1382 - `Illustration of vision transforms 1383 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1384 """ 1385 self.device_target = device_target 1386 return self 1387 1388 def parse(self): 1389 return cde.ConvertColorOperation(ConvertMode.to_c_type(self.convert_mode), self.device_target) 1390 1391 1392class Crop(ImageTensorOperation): 1393 """ 1394 Crop the input image at a specific location. 1395 1396 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 1397 1398 Args: 1399 coordinates(sequence): Coordinates of the upper left corner of the cropping image. Must be a sequence of two 1400 values, in the form of (top, left). 1401 size (Union[int, sequence]): The output size of the cropped image. 1402 If size is an integer, a square crop of size (size, size) is returned. 1403 If size is a sequence of length 2, it should be (height, width). 1404 The size value(s) must be larger than 0. 1405 1406 Raises: 1407 TypeError: If `coordinates` is not of type sequence. 1408 TypeError: If `size` is not of type integer or sequence. 1409 ValueError: If `coordinates` is less than 0. 1410 ValueError: If `size` is less than or equal to 0. 1411 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 1412 1413 Supported Platforms: 1414 ``CPU`` ``Ascend`` 1415 1416 Examples: 1417 >>> import numpy as np 1418 >>> import mindspore.dataset as ds 1419 >>> import mindspore.dataset.vision as vision 1420 >>> 1421 >>> # Use the transform in dataset pipeline mode 1422 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 1423 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1424 >>> crop_op = vision.Crop((0, 0), 32) 1425 >>> transforms_list = [crop_op] 1426 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 1427 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1428 ... print(item["image"].shape, item["image"].dtype) 1429 ... break 1430 (32, 32, 3) uint8 1431 >>> 1432 >>> # Use the transform in eager mode 1433 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 1434 >>> output = vision.Crop((0, 0), 1)(data) 1435 >>> print(output.shape, output.dtype) 1436 (1, 1, 3) uint8 1437 1438 Tutorial Examples: 1439 - `Illustration of vision transforms 1440 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1441 """ 1442 1443 @check_crop 1444 def __init__(self, coordinates, size): 1445 super().__init__() 1446 if isinstance(size, int): 1447 size = (size, size) 1448 self.coordinates = coordinates 1449 self.size = size 1450 self.implementation = Implementation.C 1451 1452 @check_device_target 1453 def device(self, device_target="CPU"): 1454 """ 1455 Set the device for the current operator execution. 1456 1457 - When the device is Ascend, input/output shape should be limited from [4, 6] to [32768, 32768]. 1458 1459 Args: 1460 device_target (str, optional): The operator will be executed on this device. Currently supports 1461 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 1462 1463 Raises: 1464 TypeError: If `device_target` is not of type str. 1465 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 1466 1467 Supported Platforms: 1468 ``CPU`` ``Ascend`` 1469 1470 Examples: 1471 >>> import numpy as np 1472 >>> import mindspore.dataset as ds 1473 >>> import mindspore.dataset.vision as vision 1474 >>> 1475 >>> # Use the transform in dataset pipeline mode 1476 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 1477 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1478 >>> crop_op = vision.Crop((0, 0), (100, 75)).device("Ascend") 1479 >>> transforms_list = [crop_op] 1480 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 1481 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1482 ... print(item["image"].shape, item["image"].dtype) 1483 ... break 1484 (100, 75, 3) uint8 1485 >>> 1486 >>> # Use the transform in eager mode 1487 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 1488 >>> output = vision.Crop((0, 0), 64).device("Ascend")(data) 1489 >>> print(output.shape, output.dtype) 1490 (64, 64, 3) uint8 1491 1492 Tutorial Examples: 1493 - `Illustration of vision transforms 1494 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1495 """ 1496 self.device_target = device_target 1497 return self 1498 1499 def parse(self): 1500 return cde.CropOperation(self.coordinates, self.size, self.device_target) 1501 1502 1503class CutMixBatch(ImageTensorOperation): 1504 """ 1505 Apply CutMix transformation on input batch of images and labels. 1506 Note that you need to make labels into one-hot format and batched before calling this operation. 1507 1508 Args: 1509 image_batch_format (ImageBatchFormat): The method of padding. Can be any of 1510 [ImageBatchFormat.NHWC, ImageBatchFormat.NCHW]. 1511 alpha (float, optional): Hyperparameter of beta distribution, must be larger than 0. Default: ``1.0``. 1512 prob (float, optional): The probability by which CutMix is applied to each image, 1513 which must be in range: [0.0, 1.0]. Default: ``1.0``. 1514 1515 Raises: 1516 TypeError: If `image_batch_format` is not of type :class:`mindspore.dataset.vision.ImageBatchFormat` . 1517 TypeError: If `alpha` is not of type float. 1518 TypeError: If `prob` is not of type float. 1519 ValueError: If `alpha` is less than or equal 0. 1520 ValueError: If `prob` is not in range [0.0, 1.0]. 1521 RuntimeError: If given tensor shape is not <H, W, C>. 1522 1523 Supported Platforms: 1524 ``CPU`` 1525 1526 Examples: 1527 >>> import numpy as np 1528 >>> import mindspore.dataset as ds 1529 >>> import mindspore.dataset.transforms as transforms 1530 >>> import mindspore.dataset.vision as vision 1531 >>> from mindspore.dataset.vision import ImageBatchFormat 1532 >>> 1533 >>> # Use the transform in dataset pipeline mode 1534 >>> data = np.random.randint(0, 255, size=(28, 28, 3)).astype(np.uint8) 1535 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1536 >>> numpy_slices_dataset = numpy_slices_dataset.map( 1537 ... operations=lambda img: (data, np.random.randint(0, 5, (3, 1))), 1538 ... input_columns=["image"], 1539 ... output_columns=["image", "label"]) 1540 >>> onehot_op = transforms.OneHot(num_classes=10) 1541 >>> numpy_slices_dataset= numpy_slices_dataset.map(operations=onehot_op, input_columns=["label"]) 1542 >>> cutmix_batch_op = vision.CutMixBatch(ImageBatchFormat.NHWC, 1.0, 0.5) 1543 >>> numpy_slices_dataset = numpy_slices_dataset.batch(5) 1544 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=cutmix_batch_op, 1545 ... input_columns=["image", "label"]) 1546 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1547 ... print(item["image"].shape, item["image"].dtype) 1548 ... print(item["label"].shape, item["label"].dtype) 1549 ... break 1550 (5, 28, 28, 3) uint8 1551 (5, 3, 10) float32 1552 >>> 1553 >>> # Use the transform in eager mode 1554 >>> data = np.random.randint(0, 255, (3, 3, 10, 10)).astype(np.uint8) 1555 >>> label = np.array([[0, 1], [1, 0], [1, 0]]) 1556 >>> output = vision.CutMixBatch(vision.ImageBatchFormat.NCHW, 1.0, 1.0)(data, label) 1557 >>> print(output[0].shape, output[0].dtype) 1558 (3, 3, 10, 10) uint8 1559 >>> print(output[1].shape, output[1].dtype) 1560 (3, 2) float32 1561 1562 Tutorial Examples: 1563 - `Illustration of vision transforms 1564 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1565 """ 1566 1567 @check_cut_mix_batch_c 1568 def __init__(self, image_batch_format, alpha=1.0, prob=1.0): 1569 super().__init__() 1570 self.image_batch_format = image_batch_format.value 1571 self.alpha = alpha 1572 self.prob = prob 1573 self.implementation = Implementation.C 1574 1575 def parse(self): 1576 return cde.CutMixBatchOperation(ImageBatchFormat.to_c_type(self.image_batch_format), self.alpha, self.prob) 1577 1578 1579class CutOut(ImageTensorOperation): 1580 """ 1581 Randomly cut (mask) out a given number of square patches from the input image array. 1582 1583 Args: 1584 length (int): The side length of each square patch, must be larger than 0. 1585 num_patches (int, optional): Number of patches to be cut out of an image, must be larger than 0. Default: ``1``. 1586 is_hwc (bool, optional): Whether the input image is in HWC format. 1587 ``True`` - HWC format, ``False`` - CHW format. Default: ``True``. 1588 1589 Raises: 1590 TypeError: If `length` is not of type integer. 1591 TypeError: If `is_hwc` is not of type bool. 1592 TypeError: If `num_patches` is not of type integer. 1593 ValueError: If `length` is less than or equal 0. 1594 ValueError: If `num_patches` is less than or equal 0. 1595 RuntimeError: If given tensor shape is not <H, W, C>. 1596 1597 Supported Platforms: 1598 ``CPU`` 1599 1600 Examples: 1601 >>> import numpy as np 1602 >>> import mindspore.dataset as ds 1603 >>> import mindspore.dataset.vision as vision 1604 >>> 1605 >>> # Use the transform in dataset pipeline mode 1606 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 1607 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1608 >>> transforms_list = [vision.CutOut(80, num_patches=10)] 1609 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 1610 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1611 ... print(item["image"].shape, item["image"].dtype) 1612 ... break 1613 (100, 100, 3) uint8 1614 >>> 1615 >>> # Use the transform in eager mode 1616 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 1617 >>> output = vision.CutOut(20)(data) 1618 >>> print(output.shape, output.dtype) 1619 (100, 100, 3) uint8 1620 1621 Tutorial Examples: 1622 - `Illustration of vision transforms 1623 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1624 """ 1625 1626 @check_cutout_new 1627 def __init__(self, length, num_patches=1, is_hwc=True): 1628 super().__init__() 1629 self.length = length 1630 self.num_patches = num_patches 1631 self.is_hwc = is_hwc 1632 self.random = False 1633 self.implementation = Implementation.C 1634 1635 def parse(self): 1636 return cde.CutOutOperation(self.length, self.num_patches, self.is_hwc) 1637 1638 1639class Decode(ImageTensorOperation, PyTensorOperation): 1640 """ 1641 Decode the input image in RGB mode. 1642 Supported image formats: JPEG, BMP, PNG, TIFF, GIF(need `to_pil=True` ), WEBP(need `to_pil=True` ). 1643 1644 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 1645 1646 Args: 1647 to_pil (bool, optional): Whether to decode the image to the PIL data type. If ``True``, 1648 the image will be decoded to the PIL data type, otherwise it will be decoded to the 1649 NumPy data type. Default: ``False``. 1650 1651 Raises: 1652 RuntimeError: If given tensor is not a 1D sequence. 1653 RuntimeError: If the input is not raw image bytes. 1654 RuntimeError: If the input image is already decoded. 1655 1656 Supported Platforms: 1657 ``CPU`` ``Ascend`` 1658 1659 Examples: 1660 >>> import os 1661 >>> import numpy as np 1662 >>> from PIL import Image, ImageDraw 1663 >>> import mindspore.dataset as ds 1664 >>> import mindspore.dataset.vision as vision 1665 >>> 1666 >>> # Use the transform in dataset pipeline mode 1667 >>> class MyDataset: 1668 ... def __init__(self): 1669 ... self.data = [] 1670 ... img = Image.new("RGB", (300, 300), (255, 255, 255)) 1671 ... draw = ImageDraw.Draw(img) 1672 ... draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5) 1673 ... img.save("./1.jpg") 1674 ... data = np.fromfile("./1.jpg", np.uint8) 1675 ... self.data.append(data) 1676 ... 1677 ... def __getitem__(self, index): 1678 ... return self.data[0] 1679 ... 1680 ... def __len__(self): 1681 ... return 5 1682 >>> 1683 >>> my_dataset = MyDataset() 1684 >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image") 1685 >>> transforms_list = [vision.Decode(), vision.RandomHorizontalFlip()] 1686 >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns=["image"]) 1687 >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1688 ... print(item["image"].shape, item["image"].dtype) 1689 ... break 1690 (300, 300, 3) uint8 1691 >>> os.remove("./1.jpg") 1692 >>> 1693 >>> # Use the transform in eager mode 1694 >>> img = Image.new("RGB", (300, 300), (255, 255, 255)) 1695 >>> draw = ImageDraw.Draw(img) 1696 >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0)) 1697 >>> img.save("./2.jpg") 1698 >>> data = np.fromfile("./2.jpg", np.uint8) 1699 >>> output = vision.Decode()(data) 1700 >>> print(output.shape, output.dtype) 1701 (300, 300, 3) uint8 1702 >>> os.remove("./2.jpg") 1703 1704 Tutorial Examples: 1705 - `Illustration of vision transforms 1706 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1707 """ 1708 1709 @check_decode 1710 def __init__(self, to_pil=False): 1711 super().__init__() 1712 self.to_pil = to_pil 1713 if to_pil: 1714 self.implementation = Implementation.PY 1715 else: 1716 self.implementation = Implementation.C 1717 1718 def __call__(self, img): 1719 """ 1720 Call method for input conversion for eager mode with C++ implementation. 1721 """ 1722 if isinstance(img, bytes): 1723 img = np.frombuffer(img, dtype=np.uint8) 1724 if not isinstance(img, np.ndarray): 1725 raise TypeError("The type of the encoded image should be {0}, but got {1}.".format(np.ndarray, type(img))) 1726 if img.dtype.type is np.str_: 1727 raise TypeError("The data type of the encoded image can not be {}.".format(img.dtype.type)) 1728 if img.ndim != 1: 1729 raise TypeError("The number of array dimensions of the encoded image should be 1, " 1730 "but got {0}.".format(img.ndim)) 1731 return super().__call__(img) 1732 1733 @check_device_target 1734 def device(self, device_target="CPU"): 1735 """ 1736 Set the device for the current operator execution. 1737 1738 Args: 1739 device_target (str, optional): The operator will be executed on this device. Currently supports 1740 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 1741 1742 Raises: 1743 TypeError: If `device_target` is not of type str. 1744 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 1745 1746 Supported Platforms: 1747 ``CPU`` ``Ascend`` 1748 1749 Examples: 1750 >>> import os 1751 >>> import numpy as np 1752 >>> from PIL import Image, ImageDraw 1753 >>> import mindspore.dataset as ds 1754 >>> import mindspore.dataset.vision as vision 1755 >>> from mindspore.dataset.vision import Inter 1756 >>> 1757 >>> # Use the transform in dataset pipeline mode 1758 >>> class MyDataset: 1759 ... def __init__(self): 1760 ... self.data = [] 1761 ... img = Image.new("RGB", (300, 300), (255, 255, 255)) 1762 ... draw = ImageDraw.Draw(img) 1763 ... draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5) 1764 ... img.save("./1.jpg") 1765 ... data = np.fromfile("./1.jpg", np.uint8) 1766 ... self.data.append(data) 1767 ... 1768 ... def __getitem__(self, index): 1769 ... return self.data[0] 1770 ... 1771 ... def __len__(self): 1772 ... return 5 1773 >>> 1774 >>> my_dataset = MyDataset() 1775 >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image") 1776 >>> decode_op = vision.Decode().device("Ascend") 1777 >>> resize_op = vision.Resize([100, 75], Inter.BICUBIC) 1778 >>> transforms_list = [decode_op, resize_op] 1779 >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns=["image"]) 1780 >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1781 ... print(item["image"].shape, item["image"].dtype) 1782 ... break 1783 (100, 75, 3) uint8 1784 >>> os.remove("./1.jpg") 1785 >>> 1786 >>> # Use the transform in eager mode 1787 >>> img = Image.new("RGB", (300, 300), (255, 255, 255)) 1788 >>> draw = ImageDraw.Draw(img) 1789 >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0)) 1790 >>> img.save("./2.jpg") 1791 >>> data = np.fromfile("./2.jpg", np.uint8) 1792 >>> output = vision.Decode().device("Ascend")(data) 1793 >>> print(output.shape, output.dtype) 1794 (300, 300, 3) uint8 1795 >>> os.remove("./2.jpg") 1796 1797 Tutorial Examples: 1798 - `Illustration of vision transforms 1799 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1800 """ 1801 if self.implementation == Implementation.PY and device_target == "Ascend": 1802 raise ValueError("The transform \"Decode(to_pil=True)\" cannot be performed on Ascend device, " + 1803 "please set \"to_pil=False\".") 1804 1805 self.device_target = device_target 1806 return self 1807 1808 def parse(self): 1809 return cde.DecodeOperation(True, self.device_target) 1810 1811 def _execute_py(self, img): 1812 """ 1813 Execute method. 1814 1815 Args: 1816 img (NumPy): Image to be decoded. 1817 1818 Returns: 1819 img (NumPy, PIL Image), Decoded image. 1820 """ 1821 return util.decode(img) 1822 1823 1824class DecodeVideo(VideoTensorOperation): 1825 """ 1826 Decode the input raw video bytes. 1827 1828 Supported video formats: AVI, H264, H265, MOV, MP4, WMV. 1829 1830 Raises: 1831 RuntimeError: If the input ndarray is not 1D array. 1832 RuntimeError: If data type of the elements is not uint8. 1833 RuntimeError: If the input ndarray is empty. 1834 1835 Supported Platforms: 1836 ``CPU`` 1837 1838 Examples: 1839 >>> import numpy as np 1840 >>> import mindspore.dataset as ds 1841 >>> import mindspore.dataset.vision as vision 1842 >>> 1843 >>> # Use the transform in dataset pipeline mode 1844 >>> # Custom class to generate and read video dataset 1845 >>> class VideoDataset: 1846 ... def __init__(self, file_list): 1847 ... self.file_list = file_list 1848 ... 1849 ... def __getitem__(self, index): 1850 ... filename = self.file_list[index] 1851 ... return np.fromfile(filename, np.uint8) 1852 ... 1853 ... def __len__(self): 1854 ... return len(self.file_list) 1855 >>> 1856 >>> dataset = ds.GeneratorDataset(VideoDataset(["/path/to/video/file"]), ["data"]) 1857 >>> decode_video = vision.DecodeVideo() 1858 >>> dataset = dataset.map(operations=[decode_video], input_columns=["data"], output_columns=["video", "audio"]) 1859 >>> 1860 >>> # Use the transform in eager mode 1861 >>> filename = "/path/to/video/file" 1862 >>> raw_ndarray = np.fromfile(filename, np.uint8) 1863 >>> mindspore_output = vision.DecodeVideo()(raw_ndarray) 1864 """ 1865 1866 def __init__(self): 1867 super().__init__() 1868 self.implementation = Implementation.C 1869 1870 def parse(self): 1871 return cde.DecodeVideoOperation() 1872 1873 1874class Equalize(ImageTensorOperation, PyTensorOperation): 1875 """ 1876 Apply histogram equalization on input image. 1877 1878 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 1879 1880 Raises: 1881 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 1882 1883 Supported Platforms: 1884 ``CPU`` ``Ascend`` 1885 1886 Examples: 1887 >>> import numpy as np 1888 >>> import mindspore.dataset as ds 1889 >>> import mindspore.dataset.vision as vision 1890 >>> 1891 >>> # Use the transform in dataset pipeline mode 1892 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 1893 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1894 >>> transforms_list = [vision.Equalize()] 1895 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 1896 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1897 ... print(item["image"].shape, item["image"].dtype) 1898 ... break 1899 (100, 100, 3) uint8 1900 >>> 1901 >>> # Use the transform in eager mode 1902 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 1903 >>> output = vision.Equalize()(data) 1904 >>> print(output.shape, output.dtype) 1905 (2, 2, 3) uint8 1906 1907 Tutorial Examples: 1908 - `Illustration of vision transforms 1909 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1910 """ 1911 1912 def __init__(self): 1913 super().__init__() 1914 self.random = False 1915 1916 @check_device_target 1917 def device(self, device_target="CPU"): 1918 """ 1919 Set the device for the current operator execution. 1920 1921 - When the device is Ascend, input type only supports `uint8` , input channel supports 1 and 3. 1922 The input data has a height limit of [4, 8192] and a width limit of [6, 4096]. 1923 1924 Args: 1925 device_target (str, optional): The operator will be executed on this device. Currently supports 1926 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 1927 1928 Raises: 1929 TypeError: If `device_target` is not of type str. 1930 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 1931 1932 Supported Platforms: 1933 ``CPU`` ``Ascend`` 1934 1935 Examples: 1936 >>> import numpy as np 1937 >>> import mindspore.dataset as ds 1938 >>> import mindspore.dataset.vision as vision 1939 >>> 1940 >>> # Use the transform in dataset pipeline mode 1941 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 1942 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 1943 >>> transforms_list = [vision.Equalize().device("Ascend")] 1944 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 1945 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 1946 ... print(item["image"].shape, item["image"].dtype) 1947 ... break 1948 (100, 100, 3) uint8 1949 >>> 1950 >>> # Use the transform in eager mode 1951 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 1952 >>> output = vision.Equalize().device("Ascend")(data) 1953 >>> print(output.shape, output.dtype) 1954 (100, 100, 3) uint8 1955 1956 Tutorial Examples: 1957 - `Illustration of vision transforms 1958 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 1959 """ 1960 self.device_target = device_target 1961 return self 1962 1963 def parse(self): 1964 return cde.EqualizeOperation(self.device_target) 1965 1966 def _execute_py(self, img): 1967 """ 1968 Execute method. 1969 1970 Args: 1971 img (PIL Image): Image to be equalized. 1972 1973 Returns: 1974 PIL Image, equalized image. 1975 """ 1976 1977 return util.equalize(img) 1978 1979 1980class Erase(ImageTensorOperation): 1981 """ 1982 Erase the input image with given value. 1983 1984 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 1985 1986 Args: 1987 top (int): Vertical ordinate of the upper left corner of erased region. 1988 left (int): Horizontal ordinate of the upper left corner of erased region. 1989 height (int): Height of erased region. 1990 width (int): Width of erased region. 1991 value (Union[float, Sequence[float, float, float]], optional): Pixel value used to pad the erased area. 1992 Default: ``0``. If float is provided, it will be used for all RGB channels. 1993 If Sequence[float, float, float] is provided, it will be used for R, G, B channels respectively. 1994 inplace (bool, optional): Whether to apply erasing inplace. Default: ``False``. 1995 1996 Raises: 1997 TypeError: If `top` is not of type int. 1998 ValueError: If `top` is negative. 1999 TypeError: If `left` is not of type int. 2000 ValueError: If `left` is negative. 2001 TypeError: If `height` is not of type int. 2002 ValueError: If `height` is not positive. 2003 TypeError: If `width` is not of type int. 2004 ValueError: If `width` is not positive. 2005 TypeError: If `value` is not of type float or Sequence[float, float, float]. 2006 ValueError: If `value` is not in range of [0, 255]. 2007 TypeError: If `inplace` is not of type bool. 2008 RuntimeError: If shape of the input image is not <H, W, C>. 2009 2010 Supported Platforms: 2011 ``CPU`` ``Ascend`` 2012 2013 Examples: 2014 >>> import numpy as np 2015 >>> import mindspore.dataset as ds 2016 >>> import mindspore.dataset.vision as vision 2017 >>> 2018 >>> # Use the transform in dataset pipeline mode 2019 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2020 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2021 >>> transforms_list = [vision.Erase(10,10,10,10)] 2022 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 2023 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2024 ... print(item["image"].shape, item["image"].dtype) 2025 ... break 2026 (100, 100, 3) uint8 2027 >>> 2028 >>> # Use the transform in eager mode 2029 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 2030 >>> output = vision.Erase(10, 10, 10, 10)(data) 2031 >>> print(output.shape, output.dtype) 2032 (100, 100, 3) uint8 2033 2034 Tutorial Examples: 2035 - `Illustration of vision transforms 2036 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2037 """ 2038 2039 @check_erase 2040 def __init__(self, top, left, height, width, value=0, inplace=False): 2041 super().__init__() 2042 self.top = top 2043 self.left = left 2044 self.height = height 2045 self.width = width 2046 if isinstance(value, (int, float)): 2047 value = tuple([value]) 2048 self.value = value 2049 self.inplace = inplace 2050 2051 @check_device_target 2052 def device(self, device_target="CPU"): 2053 """ 2054 Set the device for the current operator execution. 2055 2056 - When the device is Ascend, input type supports `uint8` or `float32` , input channel supports 1 and 3. 2057 The input data has a height limit of [4, 8192] and a width limit of [6, 4096]. 2058 The inplace parameter is not supported. 2059 2060 Args: 2061 device_target (str, optional): The operator will be executed on this device. Currently supports 2062 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 2063 2064 Raises: 2065 TypeError: If `device_target` is not of type str. 2066 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 2067 2068 Supported Platforms: 2069 ``CPU`` ``Ascend`` 2070 2071 Examples: 2072 >>> import numpy as np 2073 >>> import mindspore.dataset as ds 2074 >>> import mindspore.dataset.vision as vision 2075 >>> 2076 >>> # Use the transform in dataset pipeline mode 2077 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2078 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2079 >>> transforms_list = [vision.Erase(10, 10, 10, 10, (100, 100, 100)).device("Ascend")] 2080 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 2081 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2082 ... print(item["image"].shape, item["image"].dtype) 2083 ... break 2084 (100, 100, 3) uint8 2085 >>> 2086 >>> # Use the transform in eager mode 2087 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 2088 >>> output = vision.Erase(10, 10, 10, 10, (100, 100, 100)).device("Ascend")(data) 2089 >>> print(output.shape, output.dtype) 2090 (100, 100, 3) uint8 2091 2092 Tutorial Examples: 2093 - `Illustration of vision transforms 2094 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2095 """ 2096 self.device_target = device_target 2097 return self 2098 2099 def parse(self): 2100 return cde.EraseOperation(self.top, self.left, self.height, self.width, self.value, self.inplace, 2101 self.device_target) 2102 2103 2104class FiveCrop(PyTensorOperation): 2105 """ 2106 Crop the given image into one central crop and four corners. 2107 2108 Args: 2109 size (Union[int, Sequence[int, int]]): The size of the cropped image. 2110 If a single integer is provided, a square of size (size, size) will be cropped with this value. 2111 If a Sequence of length 2 is provided, an image of size (height, width) will be cropped. 2112 2113 Raises: 2114 TypeError: If `size` is not of type integer or Sequence of integer. 2115 ValueError: If `size` is not positive. 2116 2117 Supported Platforms: 2118 ``CPU`` 2119 2120 Examples: 2121 >>> import os 2122 >>> import numpy as np 2123 >>> from PIL import Image, ImageDraw 2124 >>> import mindspore.dataset as ds 2125 >>> import mindspore.dataset.vision as vision 2126 >>> from mindspore.dataset.transforms import Compose 2127 >>> 2128 >>> # Use the transform in dataset pipeline mode 2129 >>> class MyDataset: 2130 ... def __init__(self): 2131 ... self.data = [] 2132 ... img = Image.new("RGB", (300, 300), (255, 255, 255)) 2133 ... draw = ImageDraw.Draw(img) 2134 ... draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5) 2135 ... img.save("./1.jpg") 2136 ... data = np.fromfile("./1.jpg", np.uint8) 2137 ... self.data.append(data) 2138 ... 2139 ... def __getitem__(self, index): 2140 ... return self.data[0] 2141 ... 2142 ... def __len__(self): 2143 ... return 5 2144 >>> 2145 >>> my_dataset = MyDataset() 2146 >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image") 2147 >>> transforms_list = Compose([vision.Decode(to_pil=True), 2148 ... vision.FiveCrop(size=200), 2149 ... # 4D stack of 5 images 2150 ... lambda *images: np.stack([vision.ToTensor()(image) for image in images])]) 2151 >>> # apply the transform to dataset through map function 2152 >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image") 2153 >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2154 ... print(item["image"].shape, item["image"].dtype) 2155 ... break 2156 (5, 3, 200, 200) float32 2157 >>> os.remove("./1.jpg") 2158 >>> 2159 >>> # Use the transform in eager mode 2160 >>> img = Image.new("RGB", (300, 300), (255, 255, 255)) 2161 >>> draw = ImageDraw.Draw(img) 2162 >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0)) 2163 >>> img.save("./2.jpg") 2164 >>> data = Image.open("./2.jpg") 2165 >>> output = vision.FiveCrop(size=20)(data) 2166 >>> for cropped_img in output: 2167 ... print(cropped_img.size) 2168 ... break 2169 (20, 20) 2170 >>> os.remove("./2.jpg") 2171 2172 2173 Tutorial Examples: 2174 - `Illustration of vision transforms 2175 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2176 """ 2177 2178 @check_five_crop 2179 def __init__(self, size): 2180 super().__init__() 2181 self.size = size 2182 self.random = False 2183 self.implementation = Implementation.PY 2184 2185 def _execute_py(self, img): 2186 """ 2187 Execute method. 2188 2189 Args: 2190 img (PIL Image): Image to be cropped. 2191 2192 Returns: 2193 tuple, a tuple of five PIL Image in order of top_left, top_right, bottom_left, bottom_right, center. 2194 """ 2195 return util.five_crop(img, self.size) 2196 2197 2198class GaussianBlur(ImageTensorOperation): 2199 r""" 2200 Blur input image with the specified Gaussian kernel. 2201 2202 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 2203 2204 Args: 2205 kernel_size (Union[int, Sequence[int, int]]): The size of the Gaussian kernel. Must be positive and odd. 2206 If the input type is int, the value will be used as both the width and height of the Gaussian kernel. 2207 If the input type is Sequence[int, int], the two elements will be used as the width and height of the 2208 Gaussian kernel respectively. 2209 sigma (Union[float, Sequence[float, float]], optional): The standard deviation of the Gaussian kernel. 2210 Must be positive. 2211 If the input type is float, the value will be used as the standard deviation of both the width and 2212 height of the Gaussian kernel. 2213 If the input type is Sequence[float, float], the two elements will be used as the standard deviation 2214 of the width and height of the Gaussian kernel respectively. 2215 Default: ``None`` , the standard deviation of the Gaussian kernel will be obtained by the 2216 formula :math:`((kernel\_size - 1) * 0.5 - 1) * 0.3 + 0.8` . 2217 2218 Raises: 2219 TypeError: If `kernel_size` is not of type int or Sequence[int]. 2220 TypeError: If `sigma` is not of type float or Sequence[float]. 2221 ValueError: If `kernel_size` is not positive and odd. 2222 ValueError: If `sigma` is not positive. 2223 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 2224 2225 Supported Platforms: 2226 ``CPU`` ``Ascend`` 2227 2228 Examples: 2229 >>> import numpy as np 2230 >>> import mindspore.dataset as ds 2231 >>> import mindspore.dataset.vision as vision 2232 >>> 2233 >>> # Use the transform in dataset pipeline mode 2234 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2235 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2236 >>> transforms_list = [vision.GaussianBlur(3, 3)] 2237 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 2238 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2239 ... print(item["image"].shape, item["image"].dtype) 2240 ... break 2241 (100, 100, 3) uint8 2242 >>> 2243 >>> # Use the transform in eager mode 2244 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 2245 >>> output = vision.GaussianBlur(3, 3)(data) 2246 >>> print(output.shape, output.dtype) 2247 (2, 2, 3) uint8 2248 2249 Tutorial Examples: 2250 - `Illustration of vision transforms 2251 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2252 """ 2253 2254 @check_gaussian_blur 2255 def __init__(self, kernel_size, sigma=None): 2256 super().__init__() 2257 if isinstance(kernel_size, int): 2258 kernel_size = (kernel_size,) 2259 if sigma is None: 2260 sigma = (0,) 2261 elif isinstance(sigma, (int, float)): 2262 sigma = (float(sigma),) 2263 self.kernel_size = kernel_size 2264 self.sigma = sigma 2265 self.implementation = Implementation.C 2266 2267 @check_device_target 2268 def device(self, device_target="CPU"): 2269 """ 2270 Set the device for the current operator execution. 2271 2272 - When the device is Ascend, the parameter `kernel_size` only supports values 1, 3, and 5. 2273 input shape should be limited from [4, 6] to [8192, 4096]. 2274 2275 Args: 2276 device_target (str, optional): The operator will be executed on this device. Currently supports 2277 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 2278 2279 Raises: 2280 TypeError: If `device_target` is not of type str. 2281 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 2282 2283 Supported Platforms: 2284 ``CPU`` ``Ascend`` 2285 2286 Examples: 2287 >>> import numpy as np 2288 >>> import mindspore.dataset as ds 2289 >>> import mindspore.dataset.vision as vision 2290 >>> 2291 >>> # Use the transform in dataset pipeline mode 2292 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2293 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2294 >>> blur_op = vision.GaussianBlur(3, 3).device("Ascend") 2295 >>> transforms_list = [blur_op] 2296 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 2297 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2298 ... print(item["image"].shape, item["image"].dtype) 2299 ... break 2300 (100, 100, 3) uint8 2301 >>> 2302 >>> # Use the transform in eager mode 2303 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 2304 >>> output = vision.GaussianBlur(3, 3).device("Ascend")(data) 2305 >>> print(output.shape, output.dtype) 2306 (100, 100, 3) uint8 2307 2308 Tutorial Examples: 2309 - `Illustration of vision transforms 2310 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2311 """ 2312 self.device_target = device_target 2313 if device_target == "Ascend": 2314 for k in self.kernel_size: 2315 if k not in [1, 3, 5]: 2316 raise RuntimeError("When target is Ascend, `kernel_size` only supports values 1, 3, and 5.") 2317 return self 2318 2319 def parse(self): 2320 return cde.GaussianBlurOperation(self.kernel_size, self.sigma, self.device_target) 2321 2322 2323class Grayscale(PyTensorOperation): 2324 """ 2325 Convert the input PIL Image to grayscale. 2326 2327 Args: 2328 num_output_channels (int): The number of channels desired for the output image, must be ``1`` or ``3``. 2329 If ``3`` is provided, the returned image will have 3 identical RGB channels. Default: ``1``. 2330 2331 Raises: 2332 TypeError: If `num_output_channels` is not of type integer. 2333 ValueError: If `num_output_channels` is not ``1`` or ``3``. 2334 2335 Supported Platforms: 2336 ``CPU`` 2337 2338 Examples: 2339 >>> import os 2340 >>> import numpy as np 2341 >>> from PIL import Image, ImageDraw 2342 >>> import mindspore.dataset as ds 2343 >>> import mindspore.dataset.vision as vision 2344 >>> from mindspore.dataset.transforms import Compose 2345 >>> 2346 >>> # Use the transform in dataset pipeline mode 2347 >>> class MyDataset: 2348 ... def __init__(self): 2349 ... self.data = [] 2350 ... img = Image.new("RGB", (300, 300), (255, 255, 255)) 2351 ... draw = ImageDraw.Draw(img) 2352 ... draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5) 2353 ... img.save("./1.jpg") 2354 ... data = np.fromfile("./1.jpg", np.uint8) 2355 ... self.data.append(data) 2356 ... 2357 ... def __getitem__(self, index): 2358 ... return self.data[0] 2359 ... 2360 ... def __len__(self): 2361 ... return 5 2362 >>> 2363 >>> my_dataset = MyDataset() 2364 >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image") 2365 >>> transforms_list = Compose([vision.Decode(to_pil=True), 2366 ... vision.Grayscale(3), 2367 ... vision.ToTensor()]) 2368 >>> # apply the transform to dataset through map function 2369 >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image") 2370 >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2371 ... print(item["image"].shape, item["image"].dtype) 2372 ... break 2373 (3, 300, 300) float32 2374 >>> os.remove("./1.jpg") 2375 >>> 2376 >>> # Use the transform in eager mode 2377 >>> img = Image.new("RGB", (300, 300), (255, 255, 255)) 2378 >>> draw = ImageDraw.Draw(img) 2379 >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0)) 2380 >>> img.save("./2.jpg") 2381 >>> data = Image.open("./2.jpg") 2382 >>> output = vision.Grayscale(3)(data) 2383 >>> print(np.array(output).shape, np.array(output).dtype) 2384 (300, 300, 3) uint8 2385 >>> os.remove("./2.jpg") 2386 2387 Tutorial Examples: 2388 - `Illustration of vision transforms 2389 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2390 """ 2391 2392 @check_num_channels 2393 def __init__(self, num_output_channels=1): 2394 super().__init__() 2395 self.num_output_channels = num_output_channels 2396 self.random = False 2397 self.implementation = Implementation.PY 2398 2399 def _execute_py(self, img): 2400 """ 2401 Execute method. 2402 2403 Args: 2404 img (PIL Image): Image to be converted to grayscale. 2405 2406 Returns: 2407 PIL Image, converted grayscale image. 2408 """ 2409 return util.grayscale(img, num_output_channels=self.num_output_channels) 2410 2411 2412class HorizontalFlip(ImageTensorOperation): 2413 """ 2414 Flip the input image horizontally. 2415 2416 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 2417 2418 Raises: 2419 RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>. 2420 2421 Supported Platforms: 2422 ``CPU`` ``Ascend`` 2423 2424 Examples: 2425 >>> import numpy as np 2426 >>> import mindspore.dataset as ds 2427 >>> import mindspore.dataset.vision as vision 2428 >>> 2429 >>> # Use the transform in dataset pipeline mode 2430 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2431 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2432 >>> transforms_list = [vision.HorizontalFlip()] 2433 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 2434 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2435 ... print(item["image"].shape, item["image"].dtype) 2436 ... break 2437 (100, 100, 3) uint8 2438 >>> 2439 >>> # Use the transform in eager mode 2440 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 2441 >>> output = vision.HorizontalFlip()(data) 2442 >>> print(output.shape, output.dtype) 2443 (2, 2, 3) uint8 2444 2445 Tutorial Examples: 2446 - `Illustration of vision transforms 2447 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2448 """ 2449 2450 def __init__(self): 2451 super().__init__() 2452 self.implementation = Implementation.C 2453 2454 @check_device_target 2455 def device(self, device_target="CPU"): 2456 """ 2457 Set the device for the current operator execution. 2458 2459 - When the device is Ascend, input type supports `uint8` and `float32`, 2460 input channel supports 1 and 3. The input data has a height limit of [4, 8192] 2461 and a width limit of [6, 4096]. 2462 2463 Args: 2464 device_target (str, optional): The operator will be executed on this device. Currently supports 2465 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 2466 2467 Raises: 2468 TypeError: If `device_target` is not of type str. 2469 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 2470 2471 Supported Platforms: 2472 ``CPU`` ``Ascend`` 2473 2474 Examples: 2475 >>> import numpy as np 2476 >>> import mindspore.dataset as ds 2477 >>> import mindspore.dataset.vision as vision 2478 >>> 2479 >>> # Use the transform in dataset pipeline mode 2480 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2481 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2482 >>> horizontal_flip_op = vision.HorizontalFlip().device("Ascend") 2483 >>> transforms_list = [horizontal_flip_op] 2484 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 2485 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2486 ... print(item["image"].shape, item["image"].dtype) 2487 ... break 2488 (100, 100, 3) uint8 2489 >>> 2490 >>> # Use the transform in eager mode 2491 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 2492 >>> output = vision.HorizontalFlip().device("Ascend")(data) 2493 >>> print(output.shape, output.dtype) 2494 (100, 100, 3) uint8 2495 2496 Tutorial Examples: 2497 - `Illustration of vision transforms 2498 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2499 """ 2500 self.device_target = device_target 2501 return self 2502 2503 def parse(self): 2504 return cde.HorizontalFlipOperation(self.device_target) 2505 2506 2507class HsvToRgb(PyTensorOperation): 2508 """ 2509 Convert the input numpy.ndarray images from HSV to RGB. 2510 2511 Args: 2512 is_hwc (bool): If ``True``, means the input image is in shape of <H, W, C> or <N, H, W, C>. 2513 Otherwise, it is in shape of <C, H, W> or <N, C, H, W>. Default: ``False``. 2514 2515 Raises: 2516 TypeError: If `is_hwc` is not of type bool. 2517 2518 Supported Platforms: 2519 ``CPU`` 2520 2521 Examples: 2522 >>> import numpy as np 2523 >>> import mindspore.dataset as ds 2524 >>> import mindspore.dataset.vision as vision 2525 >>> from mindspore.dataset.transforms import Compose 2526 >>> 2527 >>> # Use the transform in dataset pipeline mode 2528 >>> transforms_list = Compose([vision.CenterCrop(20), 2529 ... vision.ToTensor(), 2530 ... vision.HsvToRgb()]) 2531 >>> # apply the transform to dataset through map function 2532 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2533 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2534 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 2535 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2536 ... print(item["image"].shape, item["image"].dtype) 2537 ... break 2538 (3, 20, 20) float64 2539 >>> 2540 >>> # Use the transform in eager mode 2541 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 2542 >>> output = vision.HsvToRgb(is_hwc=True)(data) 2543 >>> print(output.shape, output.dtype) 2544 (2, 2, 3) float64 2545 2546 Tutorial Examples: 2547 - `Illustration of vision transforms 2548 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2549 """ 2550 2551 @check_hsv_to_rgb 2552 def __init__(self, is_hwc=False): 2553 super().__init__() 2554 self.is_hwc = is_hwc 2555 self.random = False 2556 self.implementation = Implementation.PY 2557 2558 def _execute_py(self, hsv_imgs): 2559 """ 2560 Execute method. 2561 2562 Args: 2563 hsv_imgs (numpy.ndarray): HSV images to be converted. 2564 2565 Returns: 2566 numpy.ndarray, converted RGB images. 2567 """ 2568 return util.hsv_to_rgbs(hsv_imgs, self.is_hwc) 2569 2570 2571class HWC2CHW(ImageTensorOperation): 2572 """ 2573 Transpose the input image from shape <H, W, C> to <C, H, W>. 2574 If the input image is of shape <H, W>, it will remain unchanged. 2575 2576 Note: 2577 This operation is executed on the CPU by default, but it is also supported 2578 to be executed on the GPU or Ascend via heterogeneous acceleration. 2579 2580 Raises: 2581 RuntimeError: If shape of the input image is not <H, W> or <H, W, C>. 2582 2583 Supported Platforms: 2584 ``CPU`` ``GPU`` ``Ascend`` 2585 2586 Examples: 2587 >>> import numpy as np 2588 >>> import mindspore.dataset as ds 2589 >>> import mindspore.dataset.vision as vision 2590 >>> 2591 >>> # Use the transform in dataset pipeline mode 2592 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2593 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2594 >>> transforms_list = [vision.RandomHorizontalFlip(0.75), 2595 ... vision.RandomCrop(64), 2596 ... vision.HWC2CHW()] 2597 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 2598 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2599 ... print(item["image"].shape, item["image"].dtype) 2600 ... break 2601 (3, 64, 64) uint8 2602 >>> 2603 >>> # Use the transform in eager mode 2604 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 2605 >>> output = vision.HWC2CHW()(data) 2606 >>> print(output.shape, output.dtype) 2607 (3, 2, 2) uint8 2608 2609 Tutorial Examples: 2610 - `Illustration of vision transforms 2611 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2612 """ 2613 2614 def __init__(self): 2615 super().__init__() 2616 self.implementation = Implementation.C 2617 self.random = False 2618 2619 def parse(self): 2620 return cde.HwcToChwOperation() 2621 2622 2623class Invert(ImageTensorOperation, PyTensorOperation): 2624 """ 2625 Invert the colors of the input RGB image. 2626 2627 For each pixel in the image, if the original pixel value is `pixel`, 2628 the inverted pixel value will be `255 - pixel`. 2629 2630 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 2631 2632 Raises: 2633 RuntimeError: If the input image is not in shape of <H, W, C>. 2634 2635 Supported Platforms: 2636 ``CPU`` ``Ascend`` 2637 2638 Examples: 2639 >>> import numpy as np 2640 >>> import mindspore.dataset as ds 2641 >>> import mindspore.dataset.vision as vision 2642 >>> 2643 >>> # Use the transform in dataset pipeline mode 2644 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2645 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2646 >>> transforms_list = [vision.Invert()] 2647 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 2648 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2649 ... print(item["image"].shape, item["image"].dtype) 2650 ... break 2651 (100, 100, 3) uint8 2652 >>> 2653 >>> # Use the transform in eager mode 2654 >>> data = np.array([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 2, 3)) 2655 >>> output = vision.Invert()(data) 2656 >>> print(output.shape, output.dtype) 2657 (2, 2, 3) uint8 2658 2659 Tutorial Examples: 2660 - `Illustration of vision transforms 2661 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2662 """ 2663 2664 def __init__(self): 2665 super().__init__() 2666 self.random = False 2667 2668 @check_device_target 2669 def device(self, device_target="CPU"): 2670 """ 2671 Set the device for the current operator execution. 2672 2673 - When the device is CPU, input type only support `uint8` , input channel support 1/2/3. 2674 - When the device is Ascend, input type supports `uint8`/`float32`, input channel supports 1/3. 2675 input shape should be limited from [4, 6] to [8192, 4096]. 2676 2677 Args: 2678 device_target (str, optional): The operator will be executed on this device. Currently supports 2679 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 2680 2681 Raises: 2682 TypeError: If `device_target` is not of type str. 2683 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 2684 2685 Supported Platforms: 2686 ``CPU`` ``Ascend`` 2687 2688 Examples: 2689 >>> import numpy as np 2690 >>> import mindspore.dataset as ds 2691 >>> import mindspore.dataset.vision as vision 2692 >>> from mindspore.dataset.vision import Inter 2693 >>> 2694 >>> # Use the transform in dataset pipeline mode 2695 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2696 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2697 >>> invert_op = vision.Invert() 2698 >>> transforms_list = [invert_op] 2699 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 2700 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2701 ... print(item["image"].shape, item["image"].dtype) 2702 ... break 2703 (100, 100, 3) uint8 2704 >>> 2705 >>> # Use the transform in eager mode 2706 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 2707 >>> output = vision.Invert().device("Ascend")(data) 2708 >>> print(output.shape, output.dtype) 2709 (100, 100, 3) uint8 2710 2711 Tutorial Examples: 2712 - `Illustration of vision transforms 2713 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2714 """ 2715 self.device_target = device_target 2716 return self 2717 2718 def parse(self): 2719 return cde.InvertOperation(self.device_target) 2720 2721 def _execute_py(self, img): 2722 """ 2723 Execute method. 2724 2725 Args: 2726 img (PIL Image): Image to be color inverted. 2727 2728 Returns: 2729 PIL Image, color inverted image. 2730 """ 2731 2732 return util.invert_color(img) 2733 2734 2735class LinearTransformation(PyTensorOperation): 2736 r""" 2737 Linearly transform the input numpy.ndarray image with a square transformation matrix and a mean vector. 2738 2739 It will first flatten the input image and subtract the mean vector from it, then compute the dot 2740 product with the transformation matrix, finally reshape it back to its original shape. 2741 2742 Args: 2743 transformation_matrix (numpy.ndarray): A square transformation matrix in shape of (D, D), where 2744 :math:`D = C \times H \times W` . 2745 mean_vector (numpy.ndarray): A mean vector in shape of (D,), where :math:`D = C \times H \times W` . 2746 2747 Raises: 2748 TypeError: If `transformation_matrix` is not of type :class:`numpy.ndarray` . 2749 TypeError: If `mean_vector` is not of type :class:`numpy.ndarray` . 2750 2751 Supported Platforms: 2752 ``CPU`` 2753 2754 Examples: 2755 >>> import numpy as np 2756 >>> import mindspore.dataset as ds 2757 >>> import mindspore.dataset.vision as vision 2758 >>> from mindspore.dataset.transforms import Compose 2759 >>> 2760 >>> # Use the transform in dataset pipeline mode 2761 >>> height, width = 32, 32 2762 >>> dim = 3 * height * width 2763 >>> transformation_matrix = np.ones([dim, dim]) 2764 >>> mean_vector = np.zeros(dim) 2765 >>> transforms_list = Compose([vision.Resize((height,width)), 2766 ... vision.ToTensor(), 2767 ... vision.LinearTransformation(transformation_matrix, mean_vector)]) 2768 >>> # apply the transform to dataset through map function 2769 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 2770 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2771 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 2772 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2773 ... print(item["image"].shape, item["image"].dtype) 2774 ... break 2775 (3, 32, 32) float64 2776 >>> 2777 >>> # Use the transform in eager mode 2778 >>> data = np.random.randn(10, 10, 3) 2779 >>> transformation_matrix = np.random.randn(300, 300) 2780 >>> mean_vector = np.random.randn(300,) 2781 >>> output = vision.LinearTransformation(transformation_matrix, mean_vector)(data) 2782 >>> print(output.shape, output.dtype) 2783 (10, 10, 3) float64 2784 2785 Tutorial Examples: 2786 - `Illustration of vision transforms 2787 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2788 """ 2789 2790 @check_linear_transform 2791 def __init__(self, transformation_matrix, mean_vector): 2792 super().__init__() 2793 self.transformation_matrix = transformation_matrix 2794 self.mean_vector = mean_vector 2795 self.random = False 2796 self.implementation = Implementation.PY 2797 2798 def _execute_py(self, np_img): 2799 """ 2800 Execute method. 2801 2802 Args: 2803 np_img (numpy.ndarray): Image in shape of <C, H, W> to be linearly transformed. 2804 2805 Returns: 2806 numpy.ndarray, linearly transformed image. 2807 """ 2808 return util.linear_transform(np_img, self.transformation_matrix, self.mean_vector) 2809 2810 2811class MixUp(PyTensorOperation): 2812 """ 2813 Randomly mix up a batch of numpy.ndarray images together with its labels. 2814 2815 Each image will be multiplied by a random weight :math:`lambda` generated from the Beta distribution and then added 2816 to another image multiplied by :math:`1 - lambda`. The same transformation will be applied to their labels with the 2817 same value of :math:`lambda`. Make sure that the labels are one-hot encoded in advance. 2818 2819 Args: 2820 batch_size (int): The number of images in a batch. 2821 alpha (float): The alpha and beta parameter for the Beta distribution. 2822 is_single (bool, optional): If ``True``, it will randomly mix up [img0, ..., img(n-1), img(n)] with 2823 [img1, ..., img(n), img0] in each batch. Otherwise, it will randomly mix up images with the 2824 output of the previous batch. Default: ``True``. 2825 2826 Raises: 2827 TypeError: If `batch_size` is not of type integer. 2828 TypeError: If `alpha` is not of type float. 2829 TypeError: If `is_single` is not of type boolean. 2830 ValueError: If `batch_size` is not positive. 2831 ValueError: If `alpha` is not positive. 2832 2833 Supported Platforms: 2834 ``CPU`` 2835 2836 Examples: 2837 >>> import numpy as np 2838 >>> import mindspore.dataset as ds 2839 >>> import mindspore.dataset.vision as vision 2840 >>> import mindspore.dataset.transforms as transforms 2841 >>> 2842 >>> # Use the transform in dataset pipeline mode 2843 >>> data = np.random.randint(0, 255, size=(64, 64, 3)).astype(np.uint8) 2844 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2845 >>> numpy_slices_dataset = numpy_slices_dataset.map( 2846 ... operations=lambda img: (data, np.random.randint(0, 5, (3, 1))), 2847 ... input_columns=["image"], 2848 ... output_columns=["image", "label"]) 2849 >>> # ont hot decode the label 2850 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms.OneHot(10), input_columns="label") 2851 >>> # batch the samples 2852 >>> numpy_slices_dataset = numpy_slices_dataset.batch(batch_size=4) 2853 >>> # finally mix up the images and labels 2854 >>> numpy_slices_dataset = numpy_slices_dataset.map( 2855 ... operations=vision.MixUp(batch_size=1, alpha=0.2), 2856 ... input_columns=["image", "label"]) 2857 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2858 ... print(item["image"].shape, item["image"].dtype) 2859 ... print(item["label"].shape, item["label"].dtype) 2860 ... break 2861 (4, 64, 64, 3) float64 2862 (4, 3, 10) float64 2863 >>> 2864 >>> # Use the transform in eager mode 2865 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 2866 >>> label = np.array([[0, 1]]) 2867 >>> output = vision.MixUp(batch_size=2, alpha=0.2, is_single=False)(data, label) 2868 >>> print(output[0].shape, output[0].dtype) 2869 (2, 100, 100, 3) float64 2870 >>> print(output[1].shape, output[1].dtype) 2871 (2, 2) float64 2872 2873 Tutorial Examples: 2874 - `Illustration of vision transforms 2875 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2876 """ 2877 2878 @check_mix_up 2879 def __init__(self, batch_size, alpha, is_single=True): 2880 super().__init__() 2881 self.image = 0 2882 self.label = 0 2883 self.is_first = True 2884 self.batch_size = batch_size 2885 self.alpha = alpha 2886 self.is_single = is_single 2887 self.random = False 2888 self.implementation = Implementation.PY 2889 2890 def __call__(self, image, label): 2891 """ 2892 Call method to apply mix up transformation to image and label. 2893 2894 Note: No execute method for MixUp 2895 2896 Args: 2897 image (numpy.ndarray): Images to be mixed up. 2898 label (numpy.ndarray): Labels to be mixed up. 2899 2900 Returns: 2901 numpy.ndarray, images after mixing up. 2902 numpy.ndarray, labels after mixing up. 2903 """ 2904 if self.is_single: 2905 return util.mix_up_single(self.batch_size, image, label, self.alpha) 2906 return util.mix_up_muti(self, self.batch_size, image, label, self.alpha) 2907 2908 2909class MixUpBatch(ImageTensorOperation): 2910 """ 2911 Apply MixUp transformation on input batch of images and labels. Each image is 2912 multiplied by a random weight (lambda) and then added to a randomly selected image from the batch 2913 multiplied by (1 - lambda). The same formula is also applied to the one-hot labels. 2914 2915 The lambda is generated based on the specified alpha value. Two coefficients x1, x2 are randomly generated 2916 in the range [alpha, 1], and lambda = (x1 / (x1 + x2)). 2917 2918 Note that you need to make labels into one-hot format and batched before calling this operation. 2919 2920 Args: 2921 alpha (float, optional): Hyperparameter of beta distribution. The value must be positive. 2922 Default: ``1.0``. 2923 2924 Raises: 2925 TypeError: If `alpha` is not of type float. 2926 ValueError: If `alpha` is not positive. 2927 RuntimeError: If given tensor shape is not <N, H, W, C> or <N, C, H, W>. 2928 2929 Supported Platforms: 2930 ``CPU`` 2931 2932 Examples: 2933 >>> import numpy as np 2934 >>> import mindspore.dataset as ds 2935 >>> import mindspore.dataset.vision as vision 2936 >>> import mindspore.dataset.transforms as transforms 2937 >>> 2938 >>> # Use the transform in dataset pipeline mode 2939 >>> data = np.random.randint(0, 255, size=(64, 64, 3)).astype(np.uint8) 2940 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 2941 >>> numpy_slices_dataset = numpy_slices_dataset.map( 2942 ... operations=lambda img: (data, np.random.randint(0, 5, (3, 1))), 2943 ... input_columns=["image"], 2944 ... output_columns=["image", "label"]) 2945 >>> onehot_op = transforms.OneHot(num_classes=10) 2946 >>> numpy_slices_dataset= numpy_slices_dataset.map(operations=onehot_op, 2947 ... input_columns=["label"]) 2948 >>> mixup_batch_op = vision.MixUpBatch(alpha=0.9) 2949 >>> numpy_slices_dataset = numpy_slices_dataset.batch(5) 2950 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=mixup_batch_op, 2951 ... input_columns=["image", "label"]) 2952 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 2953 ... print(item["image"].shape, item["image"].dtype) 2954 ... print(item["label"].shape, item["label"].dtype) 2955 ... break 2956 (5, 64, 64, 3) uint8 2957 (5, 3, 10) float32 2958 >>> 2959 >>> # Use the transform in eager mode 2960 >>> data = np.random.randint(0, 255, (2, 10, 10, 3)).astype(np.uint8) 2961 >>> label = np.array([[0, 1], [1, 0]]) 2962 >>> output = vision.MixUpBatch(1)(data, label) 2963 >>> print(output[0].shape, output[0].dtype) 2964 (2, 10, 10, 3) uint8 2965 >>> print(output[1].shape, output[1].dtype) 2966 (2, 2) float32 2967 2968 Tutorial Examples: 2969 - `Illustration of vision transforms 2970 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 2971 """ 2972 2973 @check_mix_up_batch_c 2974 def __init__(self, alpha=1.0): 2975 super().__init__() 2976 self.alpha = alpha 2977 self.implementation = Implementation.C 2978 2979 def parse(self): 2980 return cde.MixUpBatchOperation(self.alpha) 2981 2982 2983class Normalize(ImageTensorOperation): 2984 """ 2985 Normalize the input image with respect to mean and standard deviation. This operation will normalize 2986 the input image with: output[channel] = (input[channel] - mean[channel]) / std[channel], where channel >= 1. 2987 2988 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 2989 2990 Note: 2991 This operation is executed on the CPU by default, but it is also supported 2992 to be executed on the GPU or Ascend via heterogeneous acceleration. 2993 2994 Args: 2995 mean (sequence): List or tuple of mean values for each channel, with respect to channel order. 2996 The mean values must be in range [0.0, 255.0]. 2997 std (sequence): List or tuple of standard deviations for each channel, with respect to channel order. 2998 The standard deviation values must be in range (0.0, 255.0]. 2999 is_hwc (bool, optional): Whether the input image is HWC. 3000 ``True`` - HWC format, ``False`` - CHW format. Default: ``True``. 3001 3002 Raises: 3003 TypeError: If `mean` is not of type sequence. 3004 TypeError: If `std` is not of type sequence. 3005 TypeError: If `is_hwc` is not of type bool. 3006 ValueError: If `mean` is not in range [0.0, 255.0]. 3007 ValueError: If `std` is not in range (0.0, 255.0]. 3008 RuntimeError: If given tensor format is not <H, W> or <..., H, W, C>. 3009 3010 Supported Platforms: 3011 ``CPU`` ``GPU`` ``Ascend`` 3012 3013 Examples: 3014 >>> import numpy as np 3015 >>> import mindspore.dataset as ds 3016 >>> import mindspore.dataset.vision as vision 3017 >>> 3018 >>> # Use the transform in dataset pipeline mode 3019 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3020 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3021 >>> normalize_op = vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0], is_hwc=True) 3022 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[normalize_op], 3023 ... input_columns=["image"]) 3024 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3025 ... print(item["image"].shape, item["image"].dtype) 3026 ... break 3027 (100, 100, 3) float32 3028 >>> 3029 >>> # Use the transform in eager mode 3030 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3031 >>> output = vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0])(data) 3032 >>> print(output.shape, output.dtype) 3033 (100, 100, 3) float32 3034 3035 Tutorial Examples: 3036 - `Illustration of vision transforms 3037 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3038 """ 3039 3040 @check_normalize 3041 def __init__(self, mean, std, is_hwc=True): 3042 super().__init__() 3043 self.mean = mean 3044 self.std = std 3045 self.is_hwc = is_hwc 3046 self.random = False 3047 self.implementation = Implementation.C 3048 3049 @check_device_target 3050 def device(self, device_target="CPU"): 3051 """ 3052 Set the device for the current operator execution. 3053 3054 - When the device is CPU, input type support `uint8`/`float32`/`float64`, input channel support 1/2/3. 3055 - When the device is Ascend, input type supports `uint8`/`float32`, input channel supports 1/3. 3056 input shape should be limited from [4, 6] to [8192, 4096]. 3057 3058 Args: 3059 device_target (str, optional): The operator will be executed on this device. Currently supports 3060 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 3061 3062 Raises: 3063 TypeError: If `device_target` is not of type str. 3064 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 3065 3066 Supported Platforms: 3067 ``CPU`` ``Ascend`` 3068 3069 Examples: 3070 >>> import numpy as np 3071 >>> import mindspore.dataset as ds 3072 >>> import mindspore.dataset.vision as vision 3073 >>> from mindspore.dataset.vision import Inter 3074 >>> 3075 >>> # Use the transform in dataset pipeline mode 3076 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3077 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3078 >>> resize_op = vision.Resize([100, 75], Inter.BICUBIC) 3079 >>> transforms_list = [resize_op] 3080 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3081 >>> normalize_op = vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0]).device("Ascend") 3082 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=normalize_op, input_columns=["image"]) 3083 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3084 ... print(item["image"].shape, item["image"].dtype) 3085 ... break 3086 (100, 75, 3) float32 3087 >>> 3088 >>> # Use the transform in eager mode 3089 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3090 >>> output = vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0]).device("Ascend")(data) 3091 >>> print(output.shape, output.dtype) 3092 (100, 100, 3) float32 3093 3094 Tutorial Examples: 3095 - `Illustration of vision transforms 3096 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3097 """ 3098 self.device_target = device_target 3099 return self 3100 3101 def parse(self): 3102 return cde.NormalizeOperation(self.mean, self.std, self.is_hwc, self.device_target) 3103 3104 3105class NormalizePad(ImageTensorOperation): 3106 """ 3107 Normalize the input image with respect to mean and standard deviation then pad an extra channel with value zero. 3108 3109 Args: 3110 mean (sequence): List or tuple of mean values for each channel, with respect to channel order. 3111 The mean values must be in range (0.0, 255.0]. 3112 std (sequence): List or tuple of standard deviations for each channel, with respect to channel order. 3113 The standard deviation values must be in range (0.0, 255.0]. 3114 dtype (str, optional): Set the output data type of normalized image. Default: ``"float32"``. 3115 is_hwc (bool, optional): Specify the format of input image. 3116 ``True`` - HW(C) format, ``False`` - CHW format. Default: ``True``. 3117 3118 Raises: 3119 TypeError: If `mean` is not of type sequence. 3120 TypeError: If `std` is not of type sequence. 3121 TypeError: If `dtype` is not of type string. 3122 TypeError: If `is_hwc` is not of type bool. 3123 ValueError: If `mean` is not in range [0.0, 255.0]. 3124 ValueError: If `mean` is not in range (0.0, 255.0]. 3125 RuntimeError: If given tensor shape is not <H, W>, <H, W, C> or <C, H, W>. 3126 3127 Supported Platforms: 3128 ``CPU`` 3129 3130 Examples: 3131 >>> import numpy as np 3132 >>> import mindspore.dataset as ds 3133 >>> import mindspore.dataset.vision as vision 3134 >>> 3135 >>> # Use the transform in dataset pipeline mode 3136 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3137 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3138 >>> normalize_pad_op = vision.NormalizePad(mean=[121.0, 115.0, 100.0], 3139 ... std=[70.0, 68.0, 71.0], 3140 ... dtype="float32") 3141 >>> transforms_list = [normalize_pad_op] 3142 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3143 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3144 ... print(item["image"].shape, item["image"].dtype) 3145 ... break 3146 (100, 100, 4) float32 3147 >>> 3148 >>> # Use the transform in eager mode 3149 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3150 >>> output = vision.NormalizePad(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0], dtype="float32")(data) 3151 >>> print(output.shape, output.dtype) 3152 (100, 100, 4) float32 3153 """ 3154 3155 @check_normalizepad 3156 def __init__(self, mean, std, dtype="float32", is_hwc=True): 3157 super().__init__() 3158 self.mean = mean 3159 self.std = std 3160 self.dtype = dtype 3161 self.is_hwc = is_hwc 3162 self.random = False 3163 self.implementation = Implementation.C 3164 3165 def parse(self): 3166 return cde.NormalizePadOperation(self.mean, self.std, self.dtype, self.is_hwc) 3167 3168 3169class Pad(ImageTensorOperation, PyTensorOperation): 3170 """ 3171 Pad the image according to padding parameters. 3172 3173 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 3174 3175 Args: 3176 padding (Union[int, Sequence[int, int], Sequence[int, int, int, int]]): The number of pixels 3177 to pad each border of the image. 3178 If a single number is provided, it pads all borders with this value. 3179 If a tuple or lists of 2 values are provided, it pads the (left and right) 3180 with the first value and (top and bottom) with the second value. 3181 If 4 values are provided as a list or tuple, it pads the left, top, right and bottom respectively. 3182 The pad values must be non-negative. 3183 fill_value (Union[int, tuple[int]], optional): The pixel intensity of the borders, only valid for 3184 `padding_mode` ``Border.CONSTANT``. If it is a 3-tuple, it is used to fill R, G, B channels respectively. 3185 If it is an integer, it is used for all RGB channels. 3186 The fill_value values must be in range [0, 255]. Default: ``0``. 3187 padding_mode (Border, optional): The method of padding. Default: ``Border.CONSTANT``. Can be 3188 ``Border.CONSTANT``, ``Border.EDGE``, ``Border.REFLECT``, ``Border.SYMMETRIC``. 3189 3190 - ``Border.CONSTANT`` , means it fills the border with constant values. 3191 3192 - ``Border.EDGE`` , means it pads with the last value on the edge. 3193 3194 - ``Border.REFLECT`` , means it reflects the values on the edge omitting the last 3195 value of edge. 3196 3197 - ``Border.SYMMETRIC`` , means it reflects the values on the edge repeating the last 3198 value of edge. 3199 3200 Raises: 3201 TypeError: If `padding` is not of type int or Sequence[int, int], Sequence[int, int, int, int]. 3202 TypeError: If `fill_value` is not of type int or tuple[int]. 3203 TypeError: If `padding_mode` is not of type :class:`mindspore.dataset.vision.Border` . 3204 ValueError: If `padding` is negative. 3205 ValueError: If `fill_value` is not in range [0, 255]. 3206 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 3207 3208 Supported Platforms: 3209 ``CPU`` ``Ascend`` 3210 3211 Examples: 3212 >>> import numpy as np 3213 >>> import mindspore.dataset as ds 3214 >>> import mindspore.dataset.vision as vision 3215 >>> 3216 >>> # Use the transform in dataset pipeline mode 3217 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3218 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3219 >>> transforms_list = [vision.Pad([100, 100, 100, 100])] 3220 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3221 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3222 ... print(item["image"].shape, item["image"].dtype) 3223 ... break 3224 (300, 300, 3) uint8 3225 >>> 3226 >>> # Use the transform in eager mode 3227 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3228 >>> output = vision.Pad([100, 100, 100, 100])(data) 3229 >>> print(output.shape, output.dtype) 3230 (300, 300, 3) uint8 3231 3232 Tutorial Examples: 3233 - `Illustration of vision transforms 3234 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3235 """ 3236 3237 @check_pad 3238 def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT): 3239 super().__init__() 3240 padding = parse_padding(padding) 3241 if isinstance(fill_value, int): 3242 fill_value = tuple([fill_value] * 3) 3243 self.padding = padding 3244 self.fill_value = fill_value 3245 self.random = False 3246 self.padding_mode = padding_mode 3247 3248 @check_device_target 3249 def device(self, device_target="CPU"): 3250 """ 3251 Set the device for the current operator execution. 3252 3253 - When the device is Ascend, input/output shape should be limited from [4, 6] to [32768, 32768]. 3254 3255 Args: 3256 device_target (str, optional): The operator will be executed on this device. Currently supports 3257 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 3258 3259 Raises: 3260 TypeError: If `device_target` is not of type str. 3261 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 3262 3263 Supported Platforms: 3264 ``CPU`` ``Ascend`` 3265 3266 Examples: 3267 >>> import numpy as np 3268 >>> import mindspore.dataset as ds 3269 >>> import mindspore.dataset.vision as vision 3270 >>> 3271 >>> # Use the transform in dataset pipeline mode 3272 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3273 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3274 >>> pad_op = vision.Pad([100, 100, 100, 100]).device("Ascend") 3275 >>> transforms_list = [pad_op] 3276 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3277 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3278 ... print(item["image"].shape, item["image"].dtype) 3279 ... break 3280 (300, 300, 3) uint8 3281 >>> 3282 >>> # Use the transform in eager mode 3283 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3284 >>> output = vision.Pad([100, 100, 100, 100]).device("Ascend")(data) 3285 >>> print(output.shape, output.dtype) 3286 (300, 300, 3) uint8 3287 3288 Tutorial Examples: 3289 - `Illustration of vision transforms 3290 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3291 """ 3292 self.device_target = device_target 3293 return self 3294 3295 def parse(self): 3296 return cde.PadOperation(self.padding, self.fill_value, Border.to_c_type(self.padding_mode), self.device_target) 3297 3298 def _execute_py(self, img): 3299 """ 3300 Execute method. 3301 3302 Args: 3303 img (PIL Image): Image to be padded. 3304 3305 Returns: 3306 PIL Image, padded image. 3307 """ 3308 return util.pad(img, self.padding, self.fill_value, Border.to_python_type(self.padding_mode)) 3309 3310 3311class PadToSize(ImageTensorOperation): 3312 """ 3313 Pad the image to a fixed size. 3314 3315 Args: 3316 size (Union[int, Sequence[int, int]]): The target size to pad. 3317 If int is provided, pad the image to [size, size]. 3318 If Sequence[int, int] is provided, it should be in order of [height, width]. 3319 offset (Union[int, Sequence[int, int]], optional): The lengths to pad on the top and left. 3320 If int is provided, pad both top and left borders with this value. 3321 If Sequence[int, int] is provided, is should be in order of [top, left]. 3322 Default: ``None``, means to pad symmetrically, keeping the original image in center. 3323 fill_value (Union[int, tuple[int, int, int]], optional): Pixel value used to pad the borders, 3324 only valid when `padding_mode` is ``Border.CONSTANT``. 3325 If int is provided, it will be used for all RGB channels. 3326 If tuple[int, int, int] is provided, it will be used for R, G, B channels respectively. Default: 0. 3327 padding_mode (Border, optional): Method of padding. It can be ``Border.CONSTANT``, ``Border.EDGE``, 3328 ``Border.REFLECT`` or Border.SYMMETRIC. Default: ``Border.CONSTANT``. 3329 3330 - ``Border.CONSTANT`` , pads with a constant value. 3331 - ``Border.EDGE`` , pads with the last value at the edge of the image. 3332 - ``Border.REFLECT`` , pads with reflection of the image omitting the last value on the edge. 3333 - ``Border.SYMMETRIC`` , pads with reflection of the image repeating the last value on the edge. 3334 3335 Raises: 3336 TypeError: If `size` is not of type int or Sequence[int, int]. 3337 TypeError: If `offset` is not of type int or Sequence[int, int]. 3338 TypeError: If `fill_value` is not of type int or tuple[int, int, int]. 3339 TypeError: If `padding_mode` is not of type :class:`mindspore.dataset.vision.Border` . 3340 ValueError: If `size` is not positive. 3341 ValueError: If `offset` is negative. 3342 ValueError: If `fill_value` is not in range of [0, 255]. 3343 RuntimeError: If shape of the input image is not <H, W> or <H, W, C>. 3344 3345 Supported Platforms: 3346 ``CPU`` 3347 3348 Examples: 3349 >>> import numpy as np 3350 >>> import mindspore.dataset as ds 3351 >>> import mindspore.dataset.vision as vision 3352 >>> 3353 >>> # Use the transform in dataset pipeline mode 3354 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3355 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3356 >>> transforms_list = [vision.PadToSize([256, 256])] 3357 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3358 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3359 ... print(item["image"].shape, item["image"].dtype) 3360 ... break 3361 (256, 256, 3) uint8 3362 >>> 3363 >>> # Use the transform in eager mode 3364 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3365 >>> output = vision.PadToSize([256, 256])(data) 3366 >>> print(output.shape, output.dtype) 3367 (256, 256, 3) uint8 3368 3369 Tutorial Examples: 3370 - `Illustration of vision transforms 3371 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3372 """ 3373 3374 @check_pad_to_size 3375 def __init__(self, size, offset=None, fill_value=0, padding_mode=Border.CONSTANT): 3376 super().__init__() 3377 self.size = [size, size] if isinstance(size, int) else size 3378 if offset is None: 3379 self.offset = [] 3380 else: 3381 self.offset = [offset, offset] if isinstance(offset, int) else offset 3382 self.fill_value = tuple([fill_value] * 3) if isinstance(fill_value, int) else fill_value 3383 self.padding_mode = padding_mode 3384 self.implementation = Implementation.C 3385 3386 def parse(self): 3387 return cde.PadToSizeOperation(self.size, self.offset, self.fill_value, Border.to_c_type(self.padding_mode)) 3388 3389 3390class Perspective(ImageTensorOperation, PyTensorOperation): 3391 """ 3392 Apply perspective transformation on input image. 3393 3394 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 3395 3396 Args: 3397 start_points (Sequence[Sequence[int, int]]): Sequence of the starting point coordinates, containing four 3398 two-element subsequences, corresponding to [top-left, top-right, bottom-right, bottom-left] of the 3399 quadrilateral in the original image. 3400 end_points (Sequence[Sequence[int, int]]): Sequence of the ending point coordinates, containing four 3401 two-element subsequences, corresponding to [top-left, top-right, bottom-right, bottom-left] of the 3402 quadrilateral in the target image. 3403 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 3404 Default: ``Inter.BILINEAR``. 3405 3406 Raises: 3407 TypeError: If `start_points` is not of type Sequence[Sequence[int, int]]. 3408 TypeError: If `end_points` is not of type Sequence[Sequence[int, int]]. 3409 TypeError: If `interpolation` is not of type :class:`~.vision.Inter` . 3410 RuntimeError: If shape of the input image is not <H, W> or <H, W, C>. 3411 3412 Supported Platforms: 3413 ``CPU`` ``Ascend`` 3414 3415 Examples: 3416 >>> import numpy as np 3417 >>> import mindspore.dataset as ds 3418 >>> import mindspore.dataset.vision as vision 3419 >>> from mindspore.dataset.vision import Inter 3420 >>> 3421 >>> # Use the transform in dataset pipeline mode 3422 >>> start_points = [[0, 63], [63, 63], [63, 0], [0, 0]] 3423 >>> end_points = [[0, 32], [32, 32], [32, 0], [0, 0]] 3424 >>> transforms_list = [vision.Perspective(start_points, end_points, Inter.BILINEAR)] 3425 >>> # apply the transform to dataset through map function 3426 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3427 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3428 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 3429 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3430 ... print(item["image"].shape, item["image"].dtype) 3431 ... break 3432 (100, 100, 3) uint8 3433 >>> 3434 >>> # Use the transform in eager mode 3435 >>> start_points = [[0, 63], [63, 63], [63, 0], [0, 0]] 3436 >>> end_points = [[0, 32], [32, 32], [32, 0], [0, 0]] 3437 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3438 >>> output = vision.Perspective(start_points, end_points, Inter.BILINEAR)(data) 3439 >>> print(output.shape, output.dtype) 3440 (100, 100, 3) uint8 3441 3442 Tutorial Examples: 3443 - `Illustration of vision transforms 3444 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3445 """ 3446 3447 @check_perspective 3448 def __init__(self, start_points, end_points, interpolation=Inter.BILINEAR): 3449 super().__init__() 3450 self.start_points = start_points 3451 self.end_points = end_points 3452 self.interpolation = interpolation 3453 if interpolation in [Inter.AREA, Inter.PILCUBIC]: 3454 self.implementation = Implementation.C 3455 elif interpolation == Inter.ANTIALIAS: 3456 self.implementation = Implementation.PY 3457 self.random = False 3458 3459 @check_device_target 3460 def device(self, device_target="CPU"): 3461 """ 3462 Set the device for the current operator execution. 3463 3464 - When the device is Ascend, input type supports `uint8` and `float32`, 3465 input channel supports 1 and 3. The input data has a height limit of [6, 8192] 3466 and a width limit of [10, 4096]. 3467 3468 Args: 3469 device_target (str, optional): The operator will be executed on this device. Currently supports 3470 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 3471 3472 Raises: 3473 TypeError: If `device_target` is not of type str. 3474 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 3475 3476 Supported Platforms: 3477 ``CPU`` ``Ascend`` 3478 3479 Examples: 3480 >>> import numpy as np 3481 >>> import mindspore.dataset as ds 3482 >>> import mindspore.dataset.vision as vision 3483 >>> from mindspore.dataset.vision import Inter 3484 >>> 3485 >>> # Use the transform in dataset pipeline mode 3486 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3487 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3488 >>> start_points = [[0, 63], [63, 63], [63, 0], [0, 0]] 3489 >>> end_points = [[0, 32], [32, 32], [32, 0], [0, 0]] 3490 >>> perspective_op = vision.Perspective(start_points, end_points).device("Ascend") 3491 >>> transforms_list = [perspective_op] 3492 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3493 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3494 ... print(item["image"].shape, item["image"].dtype) 3495 ... break 3496 (100, 100, 3) uint8 3497 >>> 3498 >>> # Use the transform in eager mode 3499 >>> start_points = [[0, 63], [63, 63], [63, 0], [0, 0]] 3500 >>> end_points = [[0, 32], [32, 32], [32, 0], [0, 0]] 3501 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3502 >>> output = vision.Perspective(start_points, end_points, Inter.BILINEAR).device("Ascend")(data) 3503 >>> print(output.shape, output.dtype) 3504 (100, 100, 3) uint8 3505 3506 Tutorial Examples: 3507 - `Illustration of vision transforms 3508 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3509 """ 3510 self.device_target = device_target 3511 if self.interpolation not in [Inter.BILINEAR, Inter.NEAREST] and self.device_target == "Ascend": 3512 raise RuntimeError("Invalid interpolation mode, only support BILINEAR and NEAREST.") 3513 return self 3514 3515 def parse(self): 3516 if self.interpolation == Inter.ANTIALIAS: 3517 raise TypeError("Current Interpolation is not supported with NumPy input.") 3518 return cde.PerspectiveOperation(self.start_points, self.end_points, 3519 Inter.to_c_type(self.interpolation), self.device_target) 3520 3521 def _execute_py(self, img): 3522 """ 3523 Execute method. 3524 3525 Args: 3526 img (PIL Image): Image to be perspectived. 3527 3528 Returns: 3529 PIL Image, perspectived image. 3530 """ 3531 if self.interpolation in [Inter.AREA, Inter.PILCUBIC]: 3532 raise TypeError("Current Interpolation is not supported with PIL input.") 3533 return util.perspective(img, self.start_points, self.end_points, Inter.to_python_type(self.interpolation)) 3534 3535 3536class Posterize(ImageTensorOperation): 3537 """ 3538 Reduce the bit depth of the color channels of image to create a high contrast and vivid color effect, 3539 similar to that seen in posters or printed materials. 3540 3541 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 3542 3543 Args: 3544 bits (int): The number of bits to keep for each channel, should be in range of [0, 8]. 3545 3546 Raises: 3547 TypeError: If `bits` is not of type int. 3548 ValueError: If `bits` is not in range [0, 8]. 3549 RuntimeError: If shape of the input image is not <H, W> or <H, W, C>. 3550 3551 Examples: 3552 >>> import numpy as np 3553 >>> import mindspore.dataset as ds 3554 >>> import mindspore.dataset.vision as vision 3555 >>> 3556 >>> # Use the transform in dataset pipeline mode 3557 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3558 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3559 >>> transforms_list = [vision.Posterize(4)] 3560 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3561 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3562 ... print(item["image"].shape, item["image"].dtype) 3563 ... break 3564 (100, 100, 3) uint8 3565 >>> 3566 >>> # Use the transform in eager mode 3567 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3568 >>> output = vision.Posterize(4)(data) 3569 >>> print(output.shape, output.dtype) 3570 (100, 100, 3) uint8 3571 3572 Tutorial Examples: 3573 - `Illustration of vision transforms 3574 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3575 """ 3576 3577 @check_posterize 3578 def __init__(self, bits): 3579 super().__init__() 3580 self.bits = bits 3581 self.implementation = Implementation.C 3582 3583 @check_device_target 3584 def device(self, device_target="CPU"): 3585 """ 3586 Set the device for the current operator execution. 3587 3588 - When the device is Ascend, input type supports `uint8`/`float32`, input channel supports 1 and 3. 3589 The input data has a height limit of [4, 8192] and a width limit of [6, 4096]. 3590 3591 Args: 3592 device_target (str, optional): The operator will be executed on this device. Currently supports 3593 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 3594 3595 Raises: 3596 TypeError: If `device_target` is not of type str. 3597 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 3598 3599 Supported Platforms: 3600 ``CPU`` ``Ascend`` 3601 3602 Examples: 3603 >>> import numpy as np 3604 >>> import mindspore.dataset as ds 3605 >>> import mindspore.dataset.vision as vision 3606 >>> 3607 >>> # Use the transform in dataset pipeline mode 3608 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3609 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3610 >>> posterize_op = vision.Posterize(4).device("Ascend") 3611 >>> transforms_list = [posterize_op] 3612 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3613 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3614 ... print(item["image"].shape, item["image"].dtype) 3615 ... break 3616 (100, 100, 3) uint8 3617 >>> 3618 >>> # Use the transform in eager mode 3619 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3620 >>> output = vision.Posterize(4).device("Ascend")(data) 3621 >>> print(output.shape, output.dtype) 3622 (100, 100, 3) uint8 3623 3624 Tutorial Examples: 3625 - `Illustration of vision transforms 3626 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3627 """ 3628 self.device_target = device_target 3629 return self 3630 3631 def parse(self): 3632 return cde.PosterizeOperation(self.bits, self.device_target) 3633 3634 3635class RandAugment(ImageTensorOperation): 3636 """ 3637 Apply RandAugment data augmentation method on the input image. 3638 3639 Refer to `RandAugment: Learning Augmentation Strategies from Data <https://arxiv.org/pdf/1909.13719.pdf>`_ . 3640 3641 Only support 3-channel RGB image. 3642 3643 Args: 3644 num_ops (int, optional): Number of augmentation transformations to apply sequentially. Default: ``2``. 3645 magnitude (int, optional): Magnitude for all the transformations, must be smaller than 3646 `num_magnitude_bins`. Default: ``9``. 3647 num_magnitude_bins (int, optional): The number of different magnitude values, 3648 must be no less than 2. Default: ``31``. 3649 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 3650 Default: ``Inter.NEAREST``. 3651 fill_value (Union[int, tuple[int, int, int]], optional): Pixel fill value for the area outside the 3652 transformed image, must be in range of [0, 255]. Default: ``0``. 3653 If int is provided, pad all RGB channels with this value. 3654 If tuple[int, int, int] is provided, pad R, G, B channels respectively. 3655 3656 Raises: 3657 TypeError: If `num_ops` is not of type int. 3658 ValueError: If `num_ops` is negative. 3659 TypeError: If `magnitude` is not of type int. 3660 ValueError: If `magnitude` is not positive. 3661 TypeError: If `num_magnitude_bins` is not of type int. 3662 ValueError: If `num_magnitude_bins` is less than 2. 3663 TypeError: If `interpolation` not of type :class:`~.vision.Inter` . 3664 TypeError: If `fill_value` is not of type int or tuple[int, int, int]. 3665 ValueError: If `fill_value` is not in range of [0, 255]. 3666 RuntimeError: If shape of the input image is not <H, W, C>. 3667 3668 Supported Platforms: 3669 ``CPU`` 3670 3671 Examples: 3672 >>> import numpy as np 3673 >>> import mindspore.dataset as ds 3674 >>> import mindspore.dataset.vision as vision 3675 >>> from mindspore.dataset.vision import Inter 3676 >>> 3677 >>> # Use the transform in dataset pipeline mode 3678 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3679 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3680 >>> transforms_list = [vision.RandAugment()] 3681 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3682 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3683 ... print(item["image"].shape, item["image"].dtype) 3684 ... break 3685 (100, 100, 3) uint8 3686 >>> 3687 >>> # Use the transform in eager mode 3688 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3689 >>> output = vision.RandAugment(interpolation=Inter.BILINEAR, fill_value=255)(data) 3690 >>> print(output.shape, output.dtype) 3691 (100, 100, 3) uint8 3692 3693 Tutorial Examples: 3694 - `Illustration of vision transforms 3695 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3696 """ 3697 3698 @check_rand_augment 3699 def __init__(self, num_ops=2, magnitude=9, num_magnitude_bins=31, interpolation=Inter.NEAREST, fill_value=0): 3700 super().__init__() 3701 self.num_ops = num_ops 3702 self.magnitude = magnitude 3703 self.num_magnitude_bins = num_magnitude_bins 3704 self.interpolation = interpolation 3705 if isinstance(fill_value, int): 3706 fill_value = tuple([fill_value] * 3) 3707 self.fill_value = fill_value 3708 self.implementation = Implementation.C 3709 3710 def parse(self): 3711 return cde.RandAugmentOperation(self.num_ops, self.magnitude, self.num_magnitude_bins, 3712 Inter.to_c_type(self.interpolation), self.fill_value) 3713 3714 3715class RandomAdjustSharpness(ImageTensorOperation): 3716 """ 3717 Randomly adjust the sharpness of the input image with a given probability. 3718 3719 Args: 3720 degree (float): Sharpness adjustment degree, which must be non negative. 3721 Degree of ``0.0`` gives a blurred image, degree of ``1.0`` gives the original image, 3722 and degree of ``2.0`` increases the sharpness by a factor of 2. 3723 prob (float, optional): Probability of the image being sharpness adjusted, which 3724 must be in range of [0.0, 1.0]. Default: ``0.5``. 3725 3726 Raises: 3727 TypeError: If `degree` is not of type float. 3728 TypeError: If `prob` is not of type float. 3729 ValueError: If `degree` is negative. 3730 ValueError: If `prob` is not in range [0.0, 1.0]. 3731 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 3732 3733 Supported Platforms: 3734 ``CPU`` 3735 3736 Examples: 3737 >>> import numpy as np 3738 >>> import mindspore.dataset as ds 3739 >>> import mindspore.dataset.vision as vision 3740 >>> 3741 >>> # Use the transform in dataset pipeline mode 3742 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3743 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3744 >>> transforms_list = [vision.RandomAdjustSharpness(2.0, 0.5)] 3745 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3746 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3747 ... print(item["image"].shape, item["image"].dtype) 3748 ... break 3749 (100, 100, 3) uint8 3750 >>> 3751 >>> # Use the transform in eager mode 3752 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3753 >>> output = vision.RandomAdjustSharpness(2.0, 1.0)(data) 3754 >>> print(output.shape, output.dtype) 3755 (100, 100, 3) uint8 3756 3757 Tutorial Examples: 3758 - `Illustration of vision transforms 3759 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3760 """ 3761 3762 @check_random_adjust_sharpness 3763 def __init__(self, degree, prob=0.5): 3764 super().__init__() 3765 self.prob = prob 3766 self.degree = degree 3767 self.implementation = Implementation.C 3768 3769 def parse(self): 3770 return cde.RandomAdjustSharpnessOperation(self.degree, self.prob) 3771 3772 3773class RandomAffine(ImageTensorOperation, PyTensorOperation): 3774 """ 3775 Apply Random affine transformation to the input image. 3776 3777 Args: 3778 degrees (Union[int, float, sequence]): Range of the rotation degrees. 3779 If `degrees` is a number, the range will be (-degrees, degrees). 3780 If `degrees` is a sequence, it should be (min, max). 3781 translate (sequence, optional): Sequence (tx_min, tx_max, ty_min, ty_max) of minimum/maximum translation in 3782 x(horizontal) and y(vertical) directions, range [-1.0, 1.0]. Default: ``None``. 3783 The horizontal and vertical shift is selected randomly from the range: 3784 (tx_min*width, tx_max*width) and (ty_min*height, ty_max*height), respectively. 3785 If a tuple or list of size 2, then a translate parallel to the X axis in the range of 3786 (translate[0], translate[1]) is applied. 3787 If a tuple or list of size 4, then a translate parallel to the X axis in the range of 3788 (translate[0], translate[1]) and a translate parallel to the Y axis in the range of 3789 (translate[2], translate[3]) are applied. 3790 If ``None``, no translation is applied. 3791 scale (sequence, optional): Scaling factor interval, which must be non negative. 3792 Default: ``None``, original scale is used. 3793 shear (Union[float, Sequence[float, float], Sequence[float, float, float, float]], optional): 3794 Range of shear factor to select from. 3795 If float is provided, a shearing parallel to X axis with a factor selected from 3796 ( `-shear` , `shear` ) will be applied. 3797 If Sequence[float, float] is provided, a shearing parallel to X axis with a factor selected 3798 from ( `shear` [0], `shear` [1]) will be applied. 3799 If Sequence[float, float, float, float] is provided, a shearing parallel to X axis with a factor selected 3800 from ( `shear` [0], `shear` [1]) and a shearing parallel to Y axis with a factor selected from 3801 ( `shear` [2], `shear` [3]) will be applied. Default: ``None``, means no shearing. 3802 resample (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 3803 Default: ``Inter.NEAREST``. 3804 fill_value (Union[int, tuple[int]], optional): Optional fill_value to fill the area outside the transform 3805 in the output image. There must be three elements in tuple and the value of single element is [0, 255]. 3806 Default: ``0``, filling is performed. 3807 3808 Raises: 3809 TypeError: If `degrees` is not of type int, float or sequence. 3810 TypeError: If `translate` is not of type sequence. 3811 TypeError: If `scale` is not of type sequence. 3812 TypeError: If `shear` is not of type int, float or sequence. 3813 TypeError: If `resample` is not of type :class:`~.vision.Inter` . 3814 TypeError: If `fill_value` is not of type int or tuple[int]. 3815 ValueError: If `degrees` is negative. 3816 ValueError: If `translate` is not in range [-1.0, 1.0]. 3817 ValueError: If `scale` is negative. 3818 ValueError: If `shear` is not positive. 3819 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 3820 3821 Supported Platforms: 3822 ``CPU`` 3823 3824 Examples: 3825 >>> import numpy as np 3826 >>> import mindspore.dataset as ds 3827 >>> import mindspore.dataset.vision as vision 3828 >>> from mindspore.dataset.vision import Inter 3829 >>> 3830 >>> # Use the transform in dataset pipeline mode 3831 >>> random_affine_op = vision.RandomAffine(degrees=15, 3832 ... translate=(-0.1, 0.1, 0, 0), 3833 ... scale=(0.9, 1.1), 3834 ... resample=Inter.NEAREST) 3835 >>> transforms_list = [random_affine_op] 3836 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3837 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3838 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3839 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3840 ... print(item["image"].shape, item["image"].dtype) 3841 ... break 3842 (100, 100, 3) uint8 3843 >>> 3844 >>> # Use the transform in eager mode 3845 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3846 >>> output = vision.RandomAffine(degrees=15, translate=(-0.1, 0.1, 0, 0), 3847 ... scale=(0.9, 1.1), resample=Inter.NEAREST)(data) 3848 >>> print(output.shape, output.dtype) 3849 (100, 100, 3) uint8 3850 3851 Tutorial Examples: 3852 - `Illustration of vision transforms 3853 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3854 """ 3855 3856 @check_random_affine 3857 def __init__(self, degrees, translate=None, scale=None, shear=None, resample=Inter.NEAREST, fill_value=0): 3858 super().__init__() 3859 # Parameter checking 3860 if shear is not None: 3861 if isinstance(shear, numbers.Number): 3862 shear = (-1 * shear, shear, 0., 0.) 3863 else: 3864 if len(shear) == 2: 3865 shear = [shear[0], shear[1], 0., 0.] 3866 elif len(shear) == 4: 3867 shear = [s for s in shear] 3868 3869 if isinstance(degrees, numbers.Number): 3870 degrees = (-1 * degrees, degrees) 3871 3872 if isinstance(fill_value, numbers.Number): 3873 fill_value = (fill_value, fill_value, fill_value) 3874 3875 # translation 3876 if translate is None: 3877 translate = (0.0, 0.0, 0.0, 0.0) 3878 3879 # scale 3880 if scale is None: 3881 scale = (1.0, 1.0) 3882 3883 # shear 3884 if shear is None: 3885 shear = (0.0, 0.0, 0.0, 0.0) 3886 3887 self.degrees = degrees 3888 self.translate = translate 3889 self.scale = scale 3890 self.shear = shear 3891 self.resample = resample 3892 if resample in [Inter.AREA, Inter.PILCUBIC]: 3893 self.implementation = Implementation.C 3894 elif resample == Inter.ANTIALIAS: 3895 self.implementation = Implementation.PY 3896 self.fill_value = fill_value 3897 3898 def parse(self): 3899 if self.resample == Inter.ANTIALIAS: 3900 raise TypeError("Current Interpolation is not supported with NumPy input.") 3901 return cde.RandomAffineOperation(self.degrees, self.translate, self.scale, self.shear, 3902 Inter.to_c_type(self.resample), self.fill_value) 3903 3904 def _execute_py(self, img): 3905 """ 3906 Execute method. 3907 3908 Args: 3909 img (PIL Image): Image to be randomly affine transformed. 3910 3911 Returns: 3912 PIL Image, randomly affine transformed image. 3913 """ 3914 if self.resample in [Inter.AREA, Inter.PILCUBIC]: 3915 raise TypeError("Current Interpolation is not supported with PIL input.") 3916 return util.random_affine(img, 3917 self.degrees, 3918 self.translate, 3919 self.scale, 3920 self.shear, 3921 Inter.to_python_type(self.resample), 3922 self.fill_value) 3923 3924 3925class RandomAutoContrast(ImageTensorOperation): 3926 """ 3927 Automatically adjust the contrast of the image with a given probability. 3928 3929 Args: 3930 cutoff (float, optional): Percent of the lightest and darkest pixels to be cut off from 3931 the histogram of the input image. The value must be in range of [0.0, 50.0]. Default: ``0.0``. 3932 ignore (Union[int, sequence], optional): The background pixel values to be ignored, each of 3933 which must be in range of [0, 255]. Default: ``None``. 3934 prob (float, optional): Probability of the image being automatically contrasted, which 3935 must be in range of [0.0, 1.0]. Default: ``0.5``. 3936 3937 Raises: 3938 TypeError: If `cutoff` is not of type float. 3939 TypeError: If `ignore` is not of type integer or sequence of integer. 3940 TypeError: If `prob` is not of type float. 3941 ValueError: If `cutoff` is not in range [0.0, 50.0). 3942 ValueError: If `ignore` is not in range [0, 255]. 3943 ValueError: If `prob` is not in range [0.0, 1.0]. 3944 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 3945 3946 Supported Platforms: 3947 ``CPU`` 3948 3949 Examples: 3950 >>> import numpy as np 3951 >>> import mindspore.dataset as ds 3952 >>> import mindspore.dataset.vision as vision 3953 >>> 3954 >>> # Use the transform in dataset pipeline mode 3955 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 3956 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 3957 >>> transforms_list = [vision.RandomAutoContrast(cutoff=0.0, ignore=None, prob=0.5)] 3958 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 3959 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 3960 ... print(item["image"].shape, item["image"].dtype) 3961 ... break 3962 (100, 100, 3) uint8 3963 >>> 3964 >>> # Use the transform in eager mode 3965 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 3966 >>> output = vision.RandomAutoContrast(cutoff=0.0, ignore=None, prob=1.0)(data) 3967 >>> print(output.shape, output.dtype) 3968 (100, 100, 3) uint8 3969 3970 Tutorial Examples: 3971 - `Illustration of vision transforms 3972 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 3973 """ 3974 3975 @check_random_auto_contrast 3976 def __init__(self, cutoff=0.0, ignore=None, prob=0.5): 3977 super().__init__() 3978 if ignore is None: 3979 ignore = [] 3980 if isinstance(ignore, int): 3981 ignore = [ignore] 3982 self.cutoff = cutoff 3983 self.ignore = ignore 3984 self.prob = prob 3985 self.implementation = Implementation.C 3986 3987 def parse(self): 3988 return cde.RandomAutoContrastOperation(self.cutoff, self.ignore, self.prob) 3989 3990 3991class RandomColor(ImageTensorOperation, PyTensorOperation): 3992 """ 3993 Adjust the color of the input image by a fixed or random degree. 3994 This operation works only with 3-channel color images. 3995 3996 Args: 3997 degrees (Sequence[float], optional): Range of random color adjustment degrees, which must be non-negative. 3998 It should be in (min, max) format. If min=max, then it is a 3999 single fixed magnitude operation. Default: ``(0.1, 1.9)``. 4000 4001 Raises: 4002 TypeError: If `degrees` is not of type Sequence[float]. 4003 ValueError: If `degrees` is negative. 4004 RuntimeError: If given tensor shape is not <H, W, C>. 4005 4006 Supported Platforms: 4007 ``CPU`` 4008 4009 Examples: 4010 >>> import numpy as np 4011 >>> import mindspore.dataset as ds 4012 >>> import mindspore.dataset.vision as vision 4013 >>> 4014 >>> # Use the transform in dataset pipeline mode 4015 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 4016 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4017 >>> transforms_list = [vision.RandomColor((0.5, 2.0))] 4018 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 4019 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4020 ... print(item["image"].shape, item["image"].dtype) 4021 ... break 4022 (100, 100, 3) uint8 4023 >>> 4024 >>> # Use the transform in eager mode 4025 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 4026 >>> output = vision.RandomColor((0.1, 1.9))(data) 4027 >>> print(output.shape, output.dtype) 4028 (100, 100, 3) uint8 4029 4030 Tutorial Examples: 4031 - `Illustration of vision transforms 4032 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4033 """ 4034 4035 @check_positive_degrees 4036 def __init__(self, degrees=(0.1, 1.9)): 4037 super().__init__() 4038 self.degrees = degrees 4039 4040 def parse(self): 4041 return cde.RandomColorOperation(*self.degrees) 4042 4043 def _execute_py(self, img): 4044 """ 4045 Execute method. 4046 4047 Args: 4048 img (PIL Image): Image to be color adjusted. 4049 4050 Returns: 4051 PIL Image, color adjusted image. 4052 """ 4053 4054 return util.random_color(img, self.degrees) 4055 4056 4057class RandomColorAdjust(ImageTensorOperation, PyTensorOperation): 4058 """ 4059 Randomly adjust the brightness, contrast, saturation, and hue of the input image. 4060 4061 Note: 4062 This operation is executed on the CPU by default, but it is also supported 4063 to be executed on the GPU or Ascend via heterogeneous acceleration. 4064 4065 Args: 4066 brightness (Union[float, Sequence[float]], optional): Brightness adjustment factor. Default: ``(1, 1)``. 4067 Cannot be negative. 4068 If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness]. 4069 If it is a sequence, it should be [min, max] for the range. 4070 contrast (Union[float, Sequence[float]], optional): Contrast adjustment factor. Default: ``(1, 1)``. 4071 Cannot be negative. 4072 If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast]. 4073 If it is a sequence, it should be [min, max] for the range. 4074 saturation (Union[float, Sequence[float]], optional): Saturation adjustment factor. Default: ``(1, 1)``. 4075 Cannot be negative. 4076 If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation]. 4077 If it is a sequence, it should be [min, max] for the range. 4078 hue (Union[float, Sequence[float]], optional): Hue adjustment factor. Default: ``(0, 0)``. 4079 If it is a float, the range will be [-hue, hue]. Value should be 0 <= hue <= 0.5. 4080 If it is a sequence, it should be [min, max] where -0.5 <= min <= max <= 0.5. 4081 4082 Raises: 4083 TypeError: If `brightness` is not of type float or Sequence[float]. 4084 TypeError: If `contrast` is not of type float or Sequence[float]. 4085 TypeError: If `saturation` is not of type float or Sequence[float]. 4086 TypeError: If `hue` is not of type float or Sequence[float]. 4087 ValueError: If `brightness` is negative. 4088 ValueError: If `contrast` is negative. 4089 ValueError: If `saturation` is negative. 4090 ValueError: If `hue` is not in range [-0.5, 0.5]. 4091 RuntimeError: If given tensor shape is not <H, W, C>. 4092 4093 Supported Platforms: 4094 ``CPU`` ``GPU`` ``Ascend`` 4095 4096 Examples: 4097 >>> import numpy as np 4098 >>> import mindspore.dataset as ds 4099 >>> import mindspore.dataset.vision as vision 4100 >>> 4101 >>> # Use the transform in dataset pipeline mode 4102 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 4103 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4104 >>> transform_op = vision.RandomColorAdjust(brightness=(0.5, 1), 4105 ... contrast=(0.4, 1), 4106 ... saturation=(0.3, 1)) 4107 >>> transforms_list = [transform_op] 4108 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 4109 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4110 ... print(item["image"].shape, item["image"].dtype) 4111 ... break 4112 (100, 100, 3) uint8 4113 >>> 4114 >>> # Use the transform in eager mode 4115 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 4116 >>> output = vision.RandomColorAdjust(brightness=(0.5, 1), contrast=(0.4, 1), saturation=(0.3, 1))(data) 4117 >>> print(output.shape, output.dtype) 4118 (100, 100, 3) uint8 4119 4120 Tutorial Examples: 4121 - `Illustration of vision transforms 4122 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4123 """ 4124 4125 @check_random_color_adjust 4126 def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)): 4127 super().__init__() 4128 brightness = self.__expand_values(brightness) 4129 contrast = self.__expand_values(contrast) 4130 saturation = self.__expand_values(saturation) 4131 hue = self.__expand_values( 4132 hue, center=0, bound=(-0.5, 0.5), non_negative=False) 4133 4134 self.brightness = brightness 4135 self.contrast = contrast 4136 self.saturation = saturation 4137 self.hue = hue 4138 4139 def parse(self): 4140 return cde.RandomColorAdjustOperation(self.brightness, self.contrast, self.saturation, self.hue) 4141 4142 def _execute_py(self, img): 4143 """ 4144 Execute method. 4145 4146 Args: 4147 img (PIL image): Image to be randomly color adjusted. 4148 4149 Returns: 4150 PIL Image, randomly color adjusted image. 4151 """ 4152 return util.random_color_adjust(img, self.brightness, self.contrast, self.saturation, self.hue) 4153 4154 def __expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True): 4155 """Expand input value for vision adjustment factor.""" 4156 if isinstance(value, numbers.Number): 4157 value = [center - value, center + value] 4158 if non_negative: 4159 value[0] = max(0, value[0]) 4160 check_range(value, bound) 4161 return (value[0], value[1]) 4162 4163 4164class RandomCrop(ImageTensorOperation, PyTensorOperation): 4165 """ 4166 Crop the input image at a random location. If input image size is smaller than output size, 4167 input image will be padded before cropping. 4168 4169 Note: 4170 If the input image is more than one, then make sure that the image size is the same. 4171 4172 4173 Args: 4174 size (Union[int, Sequence[int]]): The output size of the cropped image. The size value(s) must be positive. 4175 If size is an integer, a square crop of size (size, size) is returned. 4176 If size is a sequence of length 2, an image of size (height, width) will be cropped. 4177 padding (Union[int, Sequence[int]], optional): The number of pixels to pad each border of the image. 4178 The padding value(s) must be non-negative. Default: ``None``. 4179 If `padding` is not ``None``, pad image first with padding values. 4180 If a single number is provided, pad all borders with this value. 4181 If a tuple or lists of 2 values are provided, pad the (left and right) 4182 with the first value and (top and bottom) with the second value. 4183 If 4 values are provided as a list or tuple, 4184 pad the left, top, right and bottom respectively. 4185 pad_if_needed (bool, optional): Pad the image if either side is smaller than 4186 the given output size. Default: ``False``. 4187 fill_value (Union[int, tuple[int]], optional): The pixel intensity of the borders, only valid for 4188 padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively. 4189 If it is an integer, it is used for all RGB channels. 4190 The fill_value values must be in range [0, 255]. Default: ``0``. 4191 padding_mode (Border, optional): The method of padding. Default: ``Border.CONSTANT``. It can be any of 4192 ``Border.CONSTANT``, ``Border.EDGE``, ``Border.REFLECT``, ``Border.SYMMETRIC``. 4193 4194 - ``Border.CONSTANT`` , means it fills the border with constant values. 4195 4196 - ``Border.EDGE`` , means it pads with the last value on the edge. 4197 4198 - ``Border.REFLECT`` , means it reflects the values on the edge omitting the last 4199 value of edge. 4200 4201 - ``Border.SYMMETRIC`` , means it reflects the values on the edge repeating the last 4202 value of edge. 4203 4204 Raises: 4205 TypeError: If `size` is not of type int or Sequence[int]. 4206 TypeError: If `padding` is not of type int or Sequence[int]. 4207 TypeError: If `pad_if_needed` is not of type boolean. 4208 TypeError: If `fill_value` is not of type int or tuple[int]. 4209 TypeError: If `padding_mode` is not of type :class:`mindspore.dataset.vision.Border` . 4210 ValueError: If `size` is not positive. 4211 ValueError: If `padding` is negative. 4212 ValueError: If `fill_value` is not in range [0, 255]. 4213 RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>. 4214 4215 Supported Platforms: 4216 ``CPU`` 4217 4218 Examples: 4219 >>> import numpy as np 4220 >>> import mindspore.dataset as ds 4221 >>> import mindspore.dataset.vision as vision 4222 >>> from mindspore.dataset.vision import Border 4223 >>> 4224 >>> # Use the transform in dataset pipeline mode 4225 >>> random_crop_op = vision.RandomCrop(64, [16, 16, 16, 16], padding_mode=Border.EDGE) 4226 >>> transforms_list = [random_crop_op] 4227 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 4228 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4229 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 4230 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4231 ... print(item["image"].shape, item["image"].dtype) 4232 ... break 4233 (64, 64, 3) uint8 4234 >>> 4235 >>> # Use the transform in eager mode 4236 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 4237 >>> output = vision.RandomCrop(8, [10, 10, 10, 10], padding_mode=Border.EDGE)(data) 4238 >>> print(output.shape, output.dtype) 4239 (8, 8, 3) uint8 4240 4241 Tutorial Examples: 4242 - `Illustration of vision transforms 4243 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4244 """ 4245 4246 @check_random_crop 4247 def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT): 4248 super().__init__() 4249 if isinstance(size, int): 4250 size = (size, size) 4251 if padding is None: 4252 padding = (0, 0, 0, 0) 4253 else: 4254 padding = parse_padding(padding) 4255 if isinstance(fill_value, int): 4256 fill_value = tuple([fill_value] * 3) 4257 4258 self.size = size 4259 self.padding = padding 4260 self.pad_if_needed = pad_if_needed 4261 self.fill_value = fill_value 4262 self.padding_mode = padding_mode 4263 4264 def parse(self): 4265 return cde.RandomCropOperation(self.size, self.padding, self.pad_if_needed, self.fill_value, 4266 Border.to_c_type(self.padding_mode)) 4267 4268 def _execute_py(self, img): 4269 """ 4270 Execute method. 4271 4272 Args: 4273 img (PIL Image): Image to be randomly cropped. 4274 4275 Returns: 4276 PIL Image, cropped image. 4277 """ 4278 return util.random_crop(img, self.size, self.padding, self.pad_if_needed, 4279 self.fill_value, Border.to_python_type(self.padding_mode)) 4280 4281 4282class RandomCropDecodeResize(ImageTensorOperation): 4283 """ 4284 A combination of `Crop` , `Decode` and `Resize` . It will get better performance for JPEG images. This operation 4285 will crop the input image at a random location, decode the cropped image in RGB mode, and resize the decoded image. 4286 4287 Args: 4288 size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive. 4289 If size is an integer, a square crop of size (size, size) is returned. 4290 If size is a sequence of length 2, it should be (height, width). 4291 scale (Union[list, tuple], optional): Range [min, max) of respective size of the 4292 original size to be cropped, which must be non-negative. Default: ``(0.08, 1.0)``. 4293 ratio (Union[list, tuple], optional): Range [min, max) of aspect ratio to be 4294 cropped, which must be non-negative. Default: ``(3. / 4., 4. / 3.)``. 4295 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 4296 Default: ``Inter.BILINEAR``. 4297 max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area. Default: ``10``. 4298 If exceeded, fall back to use center_crop instead. The `max_attempts` value must be positive. 4299 4300 Raises: 4301 TypeError: If `size` is not of type int or Sequence[int]. 4302 TypeError: If `scale` is not of type tuple. 4303 TypeError: If `ratio` is not of type tuple. 4304 TypeError: If `interpolation` is not of type :class:`~.vision.Inter` . 4305 TypeError: If `max_attempts` is not of type integer. 4306 ValueError: If `size` is not positive. 4307 ValueError: If `scale` is negative. 4308 ValueError: If `ratio` is negative. 4309 ValueError: If `max_attempts` is not positive. 4310 RuntimeError: If given tensor is not a 1D sequence. 4311 4312 Supported Platforms: 4313 ``CPU`` 4314 4315 Examples: 4316 >>> import os 4317 >>> import numpy as np 4318 >>> from PIL import Image, ImageDraw 4319 >>> import mindspore.dataset as ds 4320 >>> import mindspore.dataset.vision as vision 4321 >>> from mindspore.dataset.vision import Inter 4322 >>> 4323 >>> # Use the transform in dataset pipeline mode 4324 >>> class MyDataset: 4325 ... def __init__(self): 4326 ... self.data = [] 4327 ... img = Image.new("RGB", (300, 300), (255, 255, 255)) 4328 ... draw = ImageDraw.Draw(img) 4329 ... draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5) 4330 ... img.save("./1.jpg") 4331 ... data = np.fromfile("./1.jpg", np.uint8) 4332 ... self.data.append(data) 4333 ... 4334 ... def __getitem__(self, index): 4335 ... return self.data[0] 4336 ... 4337 ... def __len__(self): 4338 ... return 5 4339 >>> 4340 >>> my_dataset = MyDataset() 4341 >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image") 4342 >>> resize_crop_decode_op = vision.RandomCropDecodeResize(size=(50, 75), 4343 ... scale=(0.25, 0.5), 4344 ... interpolation=Inter.NEAREST, 4345 ... max_attempts=5) 4346 >>> transforms_list = [resize_crop_decode_op] 4347 >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns=["image"]) 4348 >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4349 ... print(item["image"].shape, item["image"].dtype) 4350 ... break 4351 (50, 75, 3) uint8 4352 >>> os.remove("./1.jpg") 4353 >>> 4354 >>> # Use the transform in eager mode 4355 >>> img = Image.new("RGB", (300, 300), (255, 255, 255)) 4356 >>> draw = ImageDraw.Draw(img) 4357 >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0)) 4358 >>> img.save("./2.jpg") 4359 >>> data = np.fromfile("./2.jpg", np.uint8) 4360 >>> output = vision.RandomCropDecodeResize(size=(50, 75), scale=(0, 10.0), ratio=(0.5, 0.5), 4361 ... interpolation=Inter.BILINEAR, max_attempts=1)(data) 4362 >>> print(np.array(output).shape, np.array(output).dtype) 4363 (50, 75, 3) uint8 4364 >>> os.remove("./2.jpg") 4365 4366 Tutorial Examples: 4367 - `Illustration of vision transforms 4368 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4369 """ 4370 4371 @check_random_resize_crop 4372 def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), 4373 interpolation=Inter.BILINEAR, max_attempts=10): 4374 super().__init__() 4375 if isinstance(size, int): 4376 size = (size, size) 4377 self.size = size 4378 self.scale = scale 4379 self.ratio = ratio 4380 self.interpolation = interpolation 4381 self.max_attempts = max_attempts 4382 self.implementation = Implementation.C 4383 4384 def __call__(self, img): 4385 if not isinstance(img, np.ndarray): 4386 raise TypeError( 4387 "Input should be an encoded image in 1-D NumPy format, got {}.".format(type(img))) 4388 if img.ndim != 1 or img.dtype.type is not np.uint8: 4389 raise TypeError("Input should be an encoded image with uint8 type in 1-D NumPy format, " + 4390 "got format:{}, dtype:{}.".format(type(img), img.dtype.type)) 4391 return super().__call__(img) 4392 4393 def parse(self): 4394 return cde.RandomCropDecodeResizeOperation(self.size, self.scale, self.ratio, 4395 Inter.to_c_type(self.interpolation), 4396 self.max_attempts) 4397 4398 4399class RandomCropWithBBox(ImageTensorOperation): 4400 """ 4401 Crop the input image at a random location and adjust bounding boxes accordingly. 4402 4403 Args: 4404 size (Union[int, Sequence[int]]): The output size of the cropped image. The size value(s) must be positive. 4405 If size is an integer, a square crop of size (size, size) is returned. 4406 If size is a sequence of length 2, an image of size (height, width) will be cropped. 4407 padding (Union[int, Sequence[int]], optional): The number of pixels to pad the image 4408 The padding value(s) must be non-negative. Default: ``None``. 4409 If `padding` is not ``None``, first pad image with padding values. 4410 If a single number is provided, pad all borders with this value. 4411 If a tuple or lists of 2 values are provided, pad the (left and right) 4412 with the first value and (top and bottom) with the second value. 4413 If 4 values are provided as a list or tuple, pad the left, top, right and bottom respectively. 4414 pad_if_needed (bool, optional): Pad the image if either side is smaller than 4415 the given output size. Default: ``False``. 4416 fill_value (Union[int, tuple[int]], optional): The pixel intensity of the borders, only valid for 4417 padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively. 4418 If it is an integer, it is used for all RGB channels. 4419 The fill_value values must be in range [0, 255]. Default: ``0``. 4420 padding_mode (Border, optional): The method of padding. Default: ``Border.CONSTANT``. It can be any of 4421 ``Border.CONSTANT``, ``Border.EDGE``, ``Border.REFLECT``, ``Border.SYMMETRIC``. 4422 4423 - ``Border.CONSTANT`` , means it fills the border with constant values. 4424 4425 - ``Border.EDGE`` , means it pads with the last value on the edge. 4426 4427 - ``Border.REFLECT`` , means it reflects the values on the edge omitting the last 4428 value of edge. 4429 4430 - ``Border.SYMMETRIC`` , means it reflects the values on the edge repeating the last 4431 4432 value of edge. 4433 4434 Raises: 4435 TypeError: If `size` is not of type int or Sequence[int]. 4436 TypeError: If `padding` is not of type int or Sequence[int]. 4437 TypeError: If `pad_if_needed` is not of type boolean. 4438 TypeError: If `fill_value` is not of type int or tuple[int]. 4439 TypeError: If `padding_mode` is not of type :class:`mindspore.dataset.vision.Border` . 4440 ValueError: If `size` is not positive. 4441 ValueError: If `padding` is negative. 4442 ValueError: If `fill_value` is not in range [0, 255]. 4443 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 4444 4445 Supported Platforms: 4446 ``CPU`` 4447 4448 Examples: 4449 >>> import numpy as np 4450 >>> import mindspore.dataset as ds 4451 >>> import mindspore.dataset.vision as vision 4452 >>> 4453 >>> # Use the transform in dataset pipeline mode 4454 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 4455 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4456 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32)) 4457 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func], 4458 ... input_columns=["image"], 4459 ... output_columns=["image", "bbox"]) 4460 >>> random_crop_with_bbox_op = vision.RandomCropWithBBox([64, 64], [20, 20, 20, 20]) 4461 >>> transforms_list = [random_crop_with_bbox_op] 4462 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image", "bbox"]) 4463 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4464 ... print(item["image"].shape, item["image"].dtype) 4465 ... print(item["bbox"].shape, item["bbox"].dtype) 4466 ... break 4467 (64, 64, 3) float32 4468 (1, 4) float32 4469 >>> 4470 >>> # Use the transform in eager mode 4471 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 4472 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype)) 4473 >>> func_data, func_bboxes = func(data) 4474 >>> output = vision.RandomCropWithBBox([64, 64], [20, 20, 20, 20])(func_data, func_bboxes) 4475 >>> print(output[0].shape, output[0].dtype) 4476 (64, 64, 3) float32 4477 >>> print(output[1].shape, output[1].dtype) 4478 (1, 4) float32 4479 4480 Tutorial Examples: 4481 - `Illustration of vision transforms 4482 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4483 """ 4484 4485 @check_random_crop 4486 def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT): 4487 super().__init__() 4488 if isinstance(size, int): 4489 size = (size, size) 4490 if padding is None: 4491 padding = (0, 0, 0, 0) 4492 else: 4493 padding = parse_padding(padding) 4494 4495 if isinstance(fill_value, int): 4496 fill_value = tuple([fill_value] * 3) 4497 4498 self.size = size 4499 self.padding = padding 4500 self.pad_if_needed = pad_if_needed 4501 self.fill_value = fill_value 4502 self.padding_mode = padding_mode 4503 self.implementation = Implementation.C 4504 4505 def parse(self): 4506 border_type = Border.to_c_type(self.padding_mode) 4507 return cde.RandomCropWithBBoxOperation(self.size, self.padding, self.pad_if_needed, self.fill_value, 4508 border_type) 4509 4510 4511class RandomEqualize(ImageTensorOperation): 4512 """ 4513 Apply histogram equalization on the input image with a given probability. 4514 4515 Args: 4516 prob (float, optional): Probability of the image being equalized, which 4517 must be in range of [0.0, 1.0]. Default: ``0.5``. 4518 4519 Raises: 4520 TypeError: If `prob` is not of type float. 4521 ValueError: If `prob` is not in range [0.0, 1.0]. 4522 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 4523 4524 Supported Platforms: 4525 ``CPU`` 4526 4527 Examples: 4528 >>> import numpy as np 4529 >>> import mindspore.dataset as ds 4530 >>> import mindspore.dataset.vision as vision 4531 >>> 4532 >>> # Use the transform in dataset pipeline mode 4533 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 4534 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4535 >>> transforms_list = [vision.RandomEqualize(0.5)] 4536 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 4537 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4538 ... print(item["image"].shape, item["image"].dtype) 4539 ... break 4540 (100, 100, 3) uint8 4541 >>> 4542 >>> # Use the transform in eager mode 4543 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 4544 >>> output = vision.RandomEqualize(1.0)(data) 4545 >>> print(output.shape, output.dtype) 4546 (100, 100, 3) uint8 4547 4548 Tutorial Examples: 4549 - `Illustration of vision transforms 4550 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4551 """ 4552 4553 @check_prob 4554 def __init__(self, prob=0.5): 4555 super().__init__() 4556 self.prob = prob 4557 self.implementation = Implementation.C 4558 4559 def parse(self): 4560 return cde.RandomEqualizeOperation(self.prob) 4561 4562 4563class RandomErasing(PyTensorOperation): 4564 """ 4565 Randomly erase pixels within a random selected rectangle erea on the input numpy.ndarray image. 4566 4567 See `Random Erasing Data Augmentation <https://arxiv.org/pdf/1708.04896.pdf>`_ . 4568 4569 Args: 4570 prob (float, optional): Probability of performing erasing, which 4571 must be in range of [0.0, 1.0]. Default: ``0.5``. 4572 scale (Sequence[float, float], optional): Range of area scale of the erased area relative 4573 to the original image to select from, arranged in order of (min, max). 4574 Default: ``(0.02, 0.33)``. 4575 ratio (Sequence[float, float], optional): Range of aspect ratio of the erased area to select 4576 from, arraged in order of (min, max). Default: ``(0.3, 3.3)``. 4577 value (Union[int, str, Sequence[int, int, int]]): Pixel value used to pad the erased area. 4578 If a single integer is provided, it will be used for all RGB channels. 4579 If a sequence of length 3 is provided, it will be used for R, G, B channels respectively. 4580 If a string of ``'random'`` is provided, each pixel will be erased with a random value obtained 4581 from a standard normal distribution. Default: ``0``. 4582 inplace (bool, optional): Whether to apply erasing inplace. Default: ``False``. 4583 max_attempts (int, optional): The maximum number of attempts to propose a valid 4584 erased area, beyond which the original image will be returned. Default: ``10``. 4585 4586 Raises: 4587 TypeError: If `prob` is not of type float. 4588 TypeError: If `scale` is not of type sequence. 4589 TypeError: If `ratio` is not of type sequence. 4590 TypeError: If `value` is not of type integer, string, or sequence. 4591 TypeError: If `inplace` is not of type boolean. 4592 TypeError: If `max_attempts` is not of type integer. 4593 ValueError: If `prob` is not in range of [0.0, 1.0]. 4594 ValueError: If `scale` is negative. 4595 ValueError: If `ratio` is negative. 4596 ValueError: If `value` is not in range of [0, 255]. 4597 ValueError: If `max_attempts` is not positive. 4598 4599 Supported Platforms: 4600 ``CPU`` 4601 4602 Examples: 4603 >>> import numpy as np 4604 >>> import mindspore.dataset as ds 4605 >>> import mindspore.dataset.vision as vision 4606 >>> from mindspore.dataset.transforms import Compose 4607 >>> 4608 >>> # Use the transform in dataset pipeline mode 4609 >>> transforms_list = Compose([vision.ToTensor(), 4610 ... vision.RandomErasing(value='random')]) 4611 >>> # apply the transform to dataset through map function 4612 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 4613 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4614 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 4615 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4616 ... print(item["image"].shape, item["image"].dtype) 4617 ... break 4618 (3, 100, 100) float32 4619 >>> 4620 >>> # Use the transform in eager mode 4621 >>> data = np.random.randint(254, 255, size=(3, 100, 100)).astype(np.uint8) 4622 >>> output = vision.RandomErasing(prob=1.0, max_attempts=1)(data) 4623 >>> print(output.shape, output.dtype) 4624 (3, 100, 100) uint8 4625 4626 Tutorial Examples: 4627 - `Illustration of vision transforms 4628 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4629 """ 4630 4631 @check_random_erasing 4632 def __init__(self, prob=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False, max_attempts=10): 4633 super().__init__() 4634 self.prob = prob 4635 self.scale = scale 4636 self.ratio = ratio 4637 self.value = value 4638 self.inplace = inplace 4639 self.max_attempts = max_attempts 4640 self.implementation = Implementation.PY 4641 4642 def _execute_py(self, np_img): 4643 """ 4644 Execute method. 4645 4646 Args: 4647 np_img (numpy.ndarray): image in shape of <C, H, W> to be randomly erased. 4648 4649 Returns: 4650 numpy.ndarray, erased image. 4651 """ 4652 bounded = True 4653 if self.prob > random.random(): 4654 i, j, erase_h, erase_w, erase_value = util.get_erase_params(np_img, self.scale, self.ratio, 4655 self.value, bounded, self.max_attempts) 4656 return util.erase(np_img, i, j, erase_h, erase_w, erase_value, self.inplace) 4657 return np_img 4658 4659 4660class RandomGrayscale(PyTensorOperation): 4661 """ 4662 Randomly convert the input PIL Image to grayscale. 4663 4664 Args: 4665 prob (float, optional): Probability of performing grayscale conversion, 4666 which must be in range of [0.0, 1.0]. Default: ``0.1``. 4667 4668 Raises: 4669 TypeError: If `prob` is not of type float. 4670 ValueError: If `prob` is not in range of [0.0, 1.0]. 4671 4672 Supported Platforms: 4673 ``CPU`` 4674 4675 Examples: 4676 >>> import os 4677 >>> import numpy as np 4678 >>> from PIL import Image, ImageDraw 4679 >>> import mindspore.dataset as ds 4680 >>> import mindspore.dataset.vision as vision 4681 >>> from mindspore.dataset.transforms import Compose 4682 >>> 4683 >>> # Use the transform in dataset pipeline mode 4684 >>> class MyDataset: 4685 ... def __init__(self): 4686 ... self.data = [] 4687 ... img = Image.new("RGB", (300, 300), (255, 255, 255)) 4688 ... draw = ImageDraw.Draw(img) 4689 ... draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5) 4690 ... img.save("./1.jpg") 4691 ... data = np.fromfile("./1.jpg", np.uint8) 4692 ... self.data.append(data) 4693 ... 4694 ... def __getitem__(self, index): 4695 ... return self.data[0] 4696 ... 4697 ... def __len__(self): 4698 ... return 5 4699 >>> 4700 >>> my_dataset = MyDataset() 4701 >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image") 4702 >>> transforms_list = Compose([vision.Decode(to_pil=True), 4703 ... vision.RandomGrayscale(0.3), 4704 ... vision.ToTensor()]) 4705 >>> # apply the transform to dataset through map function 4706 >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image") 4707 >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4708 ... print(item["image"].shape, item["image"].dtype) 4709 ... break 4710 (3, 300, 300) float32 4711 >>> os.remove("./1.jpg") 4712 >>> 4713 >>> # Use the transform in eager mode 4714 >>> img = Image.new("RGB", (300, 300), (255, 255, 255)) 4715 >>> draw = ImageDraw.Draw(img) 4716 >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0)) 4717 >>> img.save("./2.jpg") 4718 >>> data = Image.open("./2.jpg") 4719 >>> output = vision.RandomGrayscale(1.0)(data) 4720 >>> print(np.array(output).shape, np.array(output).dtype) 4721 (300, 300, 3) uint8 4722 >>> os.remove("./2.jpg") 4723 4724 Tutorial Examples: 4725 - `Illustration of vision transforms 4726 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4727 """ 4728 4729 @check_prob 4730 def __init__(self, prob=0.1): 4731 super().__init__() 4732 self.prob = prob 4733 self.implementation = Implementation.PY 4734 4735 def _execute_py(self, img): 4736 """ 4737 Execute method. 4738 4739 Args: 4740 img (PIL Image): Image to be randomly converted to grayscale. 4741 4742 Returns: 4743 PIL Image, randomly converted grayscale image, which has the same number of channels as the input image. 4744 If input image has 1 channel, the output grayscale image will have 1 channel. 4745 If input image has 3 channels, the output grayscale image will have 3 identical channels. 4746 """ 4747 if img.mode == 'L': 4748 num_output_channels = 1 4749 else: 4750 num_output_channels = 3 4751 4752 if self.prob > random.random(): 4753 return util.grayscale(img, num_output_channels=num_output_channels) 4754 return img 4755 4756 4757class RandomHorizontalFlip(ImageTensorOperation, PyTensorOperation): 4758 """ 4759 Randomly flip the input image horizontally with a given probability. 4760 4761 Args: 4762 prob (float, optional): Probability of the image being flipped, 4763 which must be in range of [0.0, 1.0]. Default: ``0.5``. 4764 4765 Raises: 4766 TypeError: If `prob` is not of type float. 4767 ValueError: If `prob` is not in range [0.0, 1.0]. 4768 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 4769 4770 Supported Platforms: 4771 ``CPU`` 4772 4773 Examples: 4774 >>> import numpy as np 4775 >>> import mindspore.dataset as ds 4776 >>> import mindspore.dataset.vision as vision 4777 >>> 4778 >>> # Use the transform in dataset pipeline mode 4779 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 4780 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4781 >>> transforms_list = [vision.RandomHorizontalFlip(0.75)] 4782 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 4783 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4784 ... print(item["image"].shape, item["image"].dtype) 4785 ... break 4786 (100, 100, 3) uint8 4787 >>> 4788 >>> # Use the transform in eager mode 4789 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 4790 >>> output = vision.RandomHorizontalFlip(1.0)(data) 4791 >>> print(output.shape, output.dtype) 4792 (100, 100, 3) uint8 4793 4794 Tutorial Examples: 4795 - `Illustration of vision transforms 4796 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4797 """ 4798 4799 @check_prob 4800 def __init__(self, prob=0.5): 4801 super().__init__() 4802 self.prob = prob 4803 4804 def parse(self): 4805 return cde.RandomHorizontalFlipOperation(self.prob) 4806 4807 def _execute_py(self, img): 4808 """ 4809 Execute method. 4810 4811 Args: 4812 img (PIL Image): Image to be horizontally flipped. 4813 4814 Returns: 4815 PIL Image, randomly horizontally flipped image. 4816 """ 4817 return util.random_horizontal_flip(img, self.prob) 4818 4819 4820class RandomHorizontalFlipWithBBox(ImageTensorOperation): 4821 """ 4822 Randomly flip the input image and its bounding box horizontally with a given probability. 4823 4824 Args: 4825 prob (float, optional): Probability of the image being flipped, 4826 which must be in range of [0.0, 1.0]. Default: ``0.5``. 4827 4828 Raises: 4829 TypeError: If `prob` is not of type float. 4830 ValueError: If `prob` is not in range [0.0, 1.0]. 4831 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 4832 4833 Supported Platforms: 4834 ``CPU`` 4835 4836 Examples: 4837 >>> import numpy as np 4838 >>> import mindspore.dataset as ds 4839 >>> import mindspore.dataset.vision as vision 4840 >>> 4841 >>> # Use the transform in dataset pipeline mode 4842 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 4843 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4844 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32)) 4845 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func], 4846 ... input_columns=["image"], 4847 ... output_columns=["image", "bbox"]) 4848 >>> transforms_list = [vision.RandomHorizontalFlipWithBBox(0.70)] 4849 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, 4850 ... input_columns=["image", "bbox"]) 4851 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4852 ... print(item["image"].shape, item["image"].dtype) 4853 ... print(item["bbox"].shape, item["bbox"].dtype) 4854 ... break 4855 (100, 100, 3) float32 4856 (1, 4) float32 4857 >>> 4858 >>> # Use the transform in eager mode 4859 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 4860 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype)) 4861 >>> func_data, func_bboxes = func(data) 4862 >>> output = vision.RandomHorizontalFlipWithBBox(1)(func_data, func_bboxes) 4863 >>> print(output[0].shape, output[0].dtype) 4864 (100, 100, 3) float32 4865 >>> print(output[1].shape, output[1].dtype) 4866 (1, 4) float32 4867 4868 Tutorial Examples: 4869 - `Illustration of vision transforms 4870 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4871 """ 4872 4873 @check_prob 4874 def __init__(self, prob=0.5): 4875 super().__init__() 4876 self.prob = prob 4877 self.implementation = Implementation.C 4878 4879 def parse(self): 4880 return cde.RandomHorizontalFlipWithBBoxOperation(self.prob) 4881 4882 4883class RandomInvert(ImageTensorOperation): 4884 """ 4885 Randomly invert the colors of image with a given probability. 4886 4887 Args: 4888 prob (float, optional): Probability of the image being inverted, 4889 which must be in range of [0.0, 1.0]. Default: ``0.5``. 4890 4891 Raises: 4892 TypeError: If `prob` is not of type float. 4893 ValueError: If `prob` is not in range [0.0, 1.0]. 4894 RuntimeError: If given tensor shape is not <H, W, C>. 4895 4896 Supported Platforms: 4897 ``CPU`` 4898 4899 Examples: 4900 >>> import numpy as np 4901 >>> import mindspore.dataset as ds 4902 >>> import mindspore.dataset.vision as vision 4903 >>> 4904 >>> # Use the transform in dataset pipeline mode 4905 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 4906 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4907 >>> transforms_list = [vision.RandomInvert(0.5)] 4908 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 4909 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4910 ... print(item["image"].shape, item["image"].dtype) 4911 ... break 4912 (100, 100, 3) uint8 4913 >>> 4914 >>> # Use the transform in eager mode 4915 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 4916 >>> output = vision.RandomInvert(1.0)(data) 4917 >>> print(output.shape, output.dtype) 4918 (100, 100, 3) uint8 4919 4920 Tutorial Examples: 4921 - `Illustration of vision transforms 4922 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4923 """ 4924 4925 @check_prob 4926 def __init__(self, prob=0.5): 4927 super().__init__() 4928 self.prob = prob 4929 self.implementation = Implementation.C 4930 4931 def parse(self): 4932 return cde.RandomInvertOperation(self.prob) 4933 4934 4935class RandomLighting(ImageTensorOperation, PyTensorOperation): 4936 """ 4937 Add AlexNet-style PCA-based noise to an image. The eigenvalue and eigenvectors for Alexnet's PCA noise is 4938 calculated from the imagenet dataset. 4939 4940 Args: 4941 alpha (float, optional): Intensity of the image, which must be non-negative. Default: ``0.05``. 4942 4943 Raises: 4944 TypeError: If `alpha` is not of type float. 4945 ValueError: If `alpha` is negative. 4946 RuntimeError: If given tensor shape is not <H, W, C>. 4947 4948 Supported Platforms: 4949 ``CPU`` 4950 4951 Examples: 4952 >>> import numpy as np 4953 >>> import mindspore.dataset as ds 4954 >>> import mindspore.dataset.vision as vision 4955 >>> 4956 >>> # Use the transform in dataset pipeline mode 4957 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 4958 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 4959 >>> transforms_list = [vision.RandomLighting(0.1)] 4960 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 4961 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 4962 ... print(item["image"].shape, item["image"].dtype) 4963 ... break 4964 (100, 100, 3) uint8 4965 >>> 4966 >>> # Use the transform in eager mode 4967 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 4968 >>> output = vision.RandomLighting(0.1)(data) 4969 >>> print(output.shape, output.dtype) 4970 (100, 100, 3) uint8 4971 4972 Tutorial Examples: 4973 - `Illustration of vision transforms 4974 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 4975 """ 4976 4977 @check_alpha 4978 def __init__(self, alpha=0.05): 4979 super().__init__() 4980 self.alpha = alpha 4981 4982 def parse(self): 4983 return cde.RandomLightingOperation(self.alpha) 4984 4985 def _execute_py(self, img): 4986 """ 4987 Execute method. 4988 4989 Args: 4990 img (PIL Image): Image to be added AlexNet-style PCA-based noise. 4991 4992 Returns: 4993 PIL Image, image with noise added. 4994 """ 4995 4996 return util.random_lighting(img, self.alpha) 4997 4998 4999class RandomPerspective(PyTensorOperation): 5000 """ 5001 Randomly apply perspective transformation to the input PIL Image with a given probability. 5002 5003 Args: 5004 distortion_scale (float, optional): Scale of distortion, in range of [0.0, 1.0]. Default: ``0.5``. 5005 prob (float, optional): Probability of performing perspective transformation, which 5006 must be in range of [0.0, 1.0]. Default: ``0.5``. 5007 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 5008 Default: ``Inter.BICUBIC``. 5009 5010 Raises: 5011 TypeError: If `distortion_scale` is not of type float. 5012 TypeError: If `prob` is not of type float. 5013 TypeError: If `interpolation` is not of type :class:`~.vision.Inter` . 5014 ValueError: If `distortion_scale` is not in range of [0.0, 1.0]. 5015 ValueError: If `prob` is not in range of [0.0, 1.0]. 5016 5017 Supported Platforms: 5018 ``CPU`` 5019 5020 Examples: 5021 >>> import os 5022 >>> import numpy as np 5023 >>> from PIL import Image, ImageDraw 5024 >>> import mindspore.dataset as ds 5025 >>> import mindspore.dataset.vision as vision 5026 >>> from mindspore.dataset.transforms import Compose 5027 >>> 5028 >>> # Use the transform in dataset pipeline mode 5029 >>> class MyDataset: 5030 ... def __init__(self): 5031 ... self.data = [] 5032 ... img = Image.new("RGB", (300, 300), (255, 255, 255)) 5033 ... draw = ImageDraw.Draw(img) 5034 ... draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5) 5035 ... img.save("./1.jpg") 5036 ... data = np.fromfile("./1.jpg", np.uint8) 5037 ... self.data.append(data) 5038 ... 5039 ... def __getitem__(self, index): 5040 ... return self.data[0] 5041 ... 5042 ... def __len__(self): 5043 ... return 5 5044 >>> 5045 >>> my_dataset = MyDataset() 5046 >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image") 5047 >>> transforms_list = Compose([vision.Decode(to_pil=True), 5048 ... vision.RandomPerspective(prob=0.1), 5049 ... vision.ToTensor()]) 5050 >>> # apply the transform to dataset through map function 5051 >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image") 5052 >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5053 ... print(item["image"].shape, item["image"].dtype) 5054 ... break 5055 (3, 300, 300) float32 5056 >>> os.remove("./1.jpg") 5057 >>> 5058 >>> # Use the transform in eager mode 5059 >>> img = Image.new("RGB", (300, 300), (255, 255, 255)) 5060 >>> draw = ImageDraw.Draw(img) 5061 >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0)) 5062 >>> img.save("./2.jpg") 5063 >>> data = Image.open("./2.jpg") 5064 >>> output = vision.RandomPerspective(prob=1.0)(data) 5065 >>> print(np.array(output).shape, np.array(output).dtype) 5066 (300, 300, 3) uint8 5067 >>> os.remove("./2.jpg") 5068 5069 Tutorial Examples: 5070 - `Illustration of vision transforms 5071 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5072 """ 5073 5074 @check_random_perspective 5075 def __init__(self, distortion_scale=0.5, prob=0.5, interpolation=Inter.BICUBIC): 5076 super().__init__() 5077 self.distortion_scale = distortion_scale 5078 self.prob = prob 5079 self.interpolation = interpolation 5080 self.implementation = Implementation.PY 5081 5082 def _execute_py(self, img): 5083 """ 5084 Execute method. 5085 5086 Args: 5087 img (PIL Image): Image to be applied randomly perspective transformation. 5088 5089 Returns: 5090 PIL Image, image applied randomly perspective transformation. 5091 """ 5092 if not is_pil(img): 5093 raise ValueError("Input image should be a Pillow image.") 5094 if self.prob > random.random(): 5095 start_points, end_points = util.get_perspective_params( 5096 img, self.distortion_scale) 5097 return util.perspective(img, start_points, end_points, Inter.to_python_type(self.interpolation)) 5098 return img 5099 5100 5101class RandomPosterize(ImageTensorOperation): 5102 """ 5103 Reduce the bit depth of the color channels of image with a given probability 5104 to create a high contrast and vivid color image. 5105 5106 Reduce the number of bits for each color channel to posterize the input image randomly with a given probability. 5107 5108 Args: 5109 bits (Union[int, Sequence[int]], optional): Range of random posterize to compress image. 5110 Bits values must be in range of [1,8], and include at 5111 least one integer value in the given range. It must be in 5112 (min, max) or integer format. If min=max, then it is a single fixed 5113 magnitude operation. Default: ``(8, 8)``. 5114 5115 Raises: 5116 TypeError: If `bits` is not of type integer or sequence of integer. 5117 ValueError: If `bits` is not in range [1, 8]. 5118 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 5119 5120 Supported Platforms: 5121 ``CPU`` 5122 5123 Examples: 5124 >>> import numpy as np 5125 >>> import mindspore.dataset as ds 5126 >>> import mindspore.dataset.vision as vision 5127 >>> 5128 >>> # Use the transform in dataset pipeline mode 5129 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 5130 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5131 >>> transforms_list = [vision.RandomPosterize((6, 8))] 5132 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 5133 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5134 ... print(item["image"].shape, item["image"].dtype) 5135 ... break 5136 (100, 100, 3) uint8 5137 >>> 5138 >>> # Use the transform in eager mode 5139 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 5140 >>> output = vision.RandomPosterize(1)(data) 5141 >>> print(output.shape, output.dtype) 5142 (100, 100, 3) uint8 5143 5144 Tutorial Examples: 5145 - `Illustration of vision transforms 5146 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5147 """ 5148 5149 @check_random_posterize 5150 def __init__(self, bits=(8, 8)): 5151 super().__init__() 5152 self.bits = bits 5153 self.implementation = Implementation.C 5154 5155 def parse(self): 5156 bits = self.bits 5157 if isinstance(bits, int): 5158 bits = (bits, bits) 5159 return cde.RandomPosterizeOperation(bits) 5160 5161 5162class RandomResizedCrop(ImageTensorOperation, PyTensorOperation): 5163 """ 5164 This operation will crop the input image randomly, 5165 and resize the cropped image using a selected interpolation mode :class:`~.vision.Inter` . 5166 5167 Note: 5168 If the input image is more than one, then make sure that the image size is the same. 5169 5170 Args: 5171 size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive. 5172 If size is an integer, a square of size (size, size) will be cropped with this value. 5173 If size is a sequence of length 2, an image of size (height, width) will be cropped. 5174 scale (Union[list, tuple], optional): Range [min, max) of respective size of the original 5175 size to be cropped, which must be non-negative. Default: ``(0.08, 1.0)``. 5176 ratio (Union[list, tuple], optional): Range [min, max) of aspect ratio to be 5177 cropped, which must be non-negative. Default: ``(3. / 4., 4. / 3.)``. 5178 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 5179 Default: ``Inter.BILINEAR``. 5180 max_attempts (int, optional): The maximum number of attempts to propose a valid 5181 crop_area. Default: ``10``. If exceeded, fall back to use center_crop instead. 5182 5183 Raises: 5184 TypeError: If `size` is not of type int or Sequence[int]. 5185 TypeError: If `scale` is not of type tuple or list. 5186 TypeError: If `ratio` is not of type tuple or list. 5187 TypeError: If `interpolation` is not of type :class:`~.vision.Inter` . 5188 TypeError: If `max_attempts` is not of type int. 5189 ValueError: If `size` is not positive. 5190 ValueError: If `scale` is negative. 5191 ValueError: If `ratio` is negative. 5192 ValueError: If `max_attempts` is not positive. 5193 5194 Supported Platforms: 5195 ``CPU`` 5196 5197 Examples: 5198 >>> import numpy as np 5199 >>> import mindspore.dataset as ds 5200 >>> import mindspore.dataset.vision as vision 5201 >>> from mindspore.dataset.vision import Inter 5202 >>> 5203 >>> # Use the transform in dataset pipeline mode 5204 >>> resize_crop_op = vision.RandomResizedCrop(size=(50, 75), scale=(0.25, 0.5), 5205 ... interpolation=Inter.BILINEAR) 5206 >>> transforms_list = [resize_crop_op] 5207 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 5208 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5209 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 5210 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5211 ... print(item["image"].shape, item["image"].dtype) 5212 ... break 5213 (50, 75, 3) uint8 5214 >>> 5215 >>> # Use the transform in eager mode 5216 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 5217 >>> output = vision.RandomResizedCrop(size=(50, 75), scale=(0.25, 0.5), interpolation=Inter.BILINEAR)(data) 5218 >>> print(output.shape, output.dtype) 5219 (50, 75, 3) uint8 5220 5221 Tutorial Examples: 5222 - `Illustration of vision transforms 5223 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5224 """ 5225 5226 @check_random_resize_crop 5227 def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), 5228 interpolation=Inter.BILINEAR, max_attempts=10): 5229 super().__init__() 5230 if isinstance(size, int): 5231 size = (size, size) 5232 self.size = size 5233 self.scale = scale 5234 self.ratio = ratio 5235 self.interpolation = interpolation 5236 if interpolation in [Inter.AREA, Inter.PILCUBIC]: 5237 self.implementation = Implementation.C 5238 elif interpolation == Inter.ANTIALIAS: 5239 self.implementation = Implementation.PY 5240 self.max_attempts = max_attempts 5241 5242 def parse(self): 5243 if self.interpolation == Inter.ANTIALIAS: 5244 raise TypeError("Current Interpolation is not supported with NumPy input.") 5245 return cde.RandomResizedCropOperation(self.size, self.scale, self.ratio, Inter.to_c_type(self.interpolation), 5246 self.max_attempts) 5247 5248 def _execute_py(self, img): 5249 """ 5250 Execute method. 5251 5252 Args: 5253 img (PIL Image): Image to be randomly cropped and resized. 5254 5255 Returns: 5256 PIL Image, randomly cropped and resized image. 5257 """ 5258 if self.interpolation in [Inter.AREA, Inter.PILCUBIC]: 5259 raise TypeError("Current Interpolation is not supported with PIL input.") 5260 return util.random_resize_crop(img, self.size, self.scale, self.ratio, 5261 Inter.to_python_type(self.interpolation), self.max_attempts) 5262 5263 5264class RandomResizedCropWithBBox(ImageTensorOperation): 5265 """ 5266 Crop the input image to a random size and aspect ratio and adjust bounding boxes accordingly. 5267 5268 Args: 5269 size (Union[int, Sequence[int]]): The size of the output image. The size value(s) must be positive. 5270 If size is an integer, a square crop of size (size, size) is returned. 5271 If size is a sequence of length 2, it should be (height, width). 5272 scale (Union[list, tuple], optional): Range (min, max) of respective size of the original 5273 size to be cropped, which must be non-negative. Default: ``(0.08, 1.0)``. 5274 ratio (Union[list, tuple], optional): Range (min, max) of aspect ratio to be 5275 cropped, which must be non-negative. Default: ``(3. / 4., 4. / 3.)``. 5276 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 5277 Default: ``Inter.BILINEAR``. 5278 max_attempts (int, optional): The maximum number of attempts to propose a valid 5279 crop area. Default: ``10``. If exceeded, fall back to use center crop instead. 5280 5281 Raises: 5282 TypeError: If `size` is not of type int or Sequence[int]. 5283 TypeError: If `scale` is not of type tuple. 5284 TypeError: If `ratio` is not of type tuple. 5285 TypeError: If `interpolation` is not of type Inter. 5286 TypeError: If `max_attempts` is not of type integer. 5287 ValueError: If `size` is not positive. 5288 ValueError: If `scale` is negative. 5289 ValueError: If `ratio` is negative. 5290 ValueError: If `max_attempts` is not positive. 5291 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 5292 5293 Supported Platforms: 5294 ``CPU`` 5295 5296 Examples: 5297 >>> import numpy as np 5298 >>> import mindspore.dataset as ds 5299 >>> import mindspore.dataset.vision as vision 5300 >>> from mindspore.dataset.vision import Inter 5301 >>> 5302 >>> # Use the transform in dataset pipeline mode 5303 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 5304 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5305 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32)) 5306 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func], 5307 ... input_columns=["image"], 5308 ... output_columns=["image", "bbox"]) 5309 >>> bbox_op = vision.RandomResizedCropWithBBox(size=50, interpolation=Inter.NEAREST) 5310 >>> transforms_list = [bbox_op] 5311 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, 5312 ... input_columns=["image", "bbox"]) 5313 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5314 ... print(item["image"].shape, item["image"].dtype) 5315 ... print(item["bbox"].shape, item["bbox"].dtype) 5316 ... break 5317 (50, 50, 3) float32 5318 (1, 4) float32 5319 >>> 5320 >>> # Use the transform in eager mode 5321 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 5322 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype)) 5323 >>> func_data, func_bboxes = func(data) 5324 >>> output = vision.RandomResizedCropWithBBox((16, 64), (0.5, 0.5), (0.5, 0.5))(func_data, func_bboxes) 5325 >>> print(output[0].shape, output[0].dtype) 5326 (16, 64, 3) float32 5327 >>> print(output[1].shape, output[1].dtype) 5328 (1, 4) float32 5329 5330 Tutorial Examples: 5331 - `Illustration of vision transforms 5332 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5333 """ 5334 5335 @check_random_resize_crop 5336 def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), 5337 interpolation=Inter.BILINEAR, max_attempts=10): 5338 super().__init__() 5339 if isinstance(size, int): 5340 size = (size, size) 5341 self.size = size 5342 self.scale = scale 5343 self.ratio = ratio 5344 self.interpolation = interpolation 5345 self.max_attempts = max_attempts 5346 self.implementation = Implementation.C 5347 5348 def parse(self): 5349 return cde.RandomResizedCropWithBBoxOperation(self.size, self.scale, self.ratio, 5350 Inter.to_c_type(self.interpolation), self.max_attempts) 5351 5352 5353class RandomResize(ImageTensorOperation): 5354 """ 5355 Resize the input image using :class:`~.vision.Inter` , a randomly selected interpolation mode. 5356 5357 Args: 5358 size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive. 5359 If size is an integer, smaller edge of the image will be resized to this value with 5360 the same image aspect ratio. 5361 If size is a sequence of length 2, it should be (height, width). 5362 5363 Raises: 5364 TypeError: If `size` is not of type int or Sequence[int]. 5365 ValueError: If `size` is not positive. 5366 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 5367 5368 Supported Platforms: 5369 ``CPU`` 5370 5371 Examples: 5372 >>> import numpy as np 5373 >>> import mindspore.dataset as ds 5374 >>> import mindspore.dataset.vision as vision 5375 >>> 5376 >>> # Use the transform in dataset pipeline mode 5377 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 5378 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5379 >>> # 1) randomly resize image, keeping aspect ratio 5380 >>> transforms_list1 = [vision.RandomResize(50)] 5381 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list1, input_columns=["image"]) 5382 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5383 ... print(item["image"].shape, item["image"].dtype) 5384 ... break 5385 (50, 50, 3) uint8 5386 >>> # 2) randomly resize image to landscape style 5387 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5388 >>> transforms_list2 = [vision.RandomResize((40, 60))] 5389 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list2, input_columns=["image"]) 5390 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5391 ... print(item["image"].shape, item["image"].dtype) 5392 ... break 5393 (40, 60, 3) uint8 5394 >>> 5395 >>> # Use the transform in eager mode 5396 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 5397 >>> output = vision.RandomResize(10)(data) 5398 >>> print(output.shape, output.dtype) 5399 (10, 10, 3) uint8 5400 5401 Tutorial Examples: 5402 - `Illustration of vision transforms 5403 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5404 """ 5405 5406 @check_resize 5407 def __init__(self, size): 5408 super().__init__() 5409 self.size = size 5410 self.implementation = Implementation.C 5411 5412 def parse(self): 5413 size = self.size 5414 if isinstance(size, int): 5415 size = (size,) 5416 return cde.RandomResizeOperation(size) 5417 5418 5419class RandomResizeWithBBox(ImageTensorOperation): 5420 """ 5421 Tensor operation to resize the input image 5422 using a randomly selected interpolation mode :class:`~.vision.Inter` and adjust 5423 bounding boxes accordingly. 5424 5425 Args: 5426 size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive. 5427 If size is an integer, smaller edge of the image will be resized to this value with 5428 the same image aspect ratio. 5429 If size is a sequence of length 2, it should be (height, width). 5430 5431 Raises: 5432 TypeError: If `size` is not of type int or Sequence[int]. 5433 ValueError: If `size` is not positive. 5434 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 5435 5436 Supported Platforms: 5437 ``CPU`` 5438 5439 Examples: 5440 >>> import copy 5441 >>> import numpy as np 5442 >>> import mindspore.dataset as ds 5443 >>> import mindspore.dataset.vision as vision 5444 >>> 5445 >>> # Use the transform in dataset pipeline mode 5446 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 5447 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5448 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32)) 5449 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func], 5450 ... input_columns=["image"], 5451 ... output_columns=["image", "bbox"]) 5452 >>> numpy_slices_dataset2 = copy.deepcopy(numpy_slices_dataset) 5453 >>> 5454 >>> # 1) randomly resize image with bounding boxes, keeping aspect ratio 5455 >>> transforms_list1 = [vision.RandomResizeWithBBox(60)] 5456 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list1, 5457 ... input_columns=["image", "bbox"]) 5458 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5459 ... print(item["image"].shape, item["image"].dtype) 5460 ... print(item["bbox"].shape, item["bbox"].dtype) 5461 ... break 5462 (60, 60, 3) float32 5463 (1, 4) float32 5464 >>> 5465 >>> # 2) randomly resize image with bounding boxes to portrait style 5466 >>> transforms_list2 = [vision.RandomResizeWithBBox((80, 60))] 5467 >>> numpy_slices_dataset2 = numpy_slices_dataset2.map(operations=transforms_list2, 5468 ... input_columns=["image", "bbox"]) 5469 >>> for item in numpy_slices_dataset2.create_dict_iterator(num_epochs=1, output_numpy=True): 5470 ... print(item["image"].shape, item["image"].dtype) 5471 ... print(item["bbox"].shape, item["bbox"].dtype) 5472 ... break 5473 (80, 60, 3) float32 5474 (1, 4) float32 5475 >>> 5476 >>> # Use the transform in eager mode 5477 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 5478 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype)) 5479 >>> func_data, func_bboxes = func(data) 5480 >>> output = vision.RandomResizeWithBBox(64)(func_data, func_bboxes) 5481 >>> print(output[0].shape, output[0].dtype) 5482 (64, 64, 3) float32 5483 >>> print(output[1].shape, output[1].dtype) 5484 (1, 4) float32 5485 5486 Tutorial Examples: 5487 - `Illustration of vision transforms 5488 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5489 """ 5490 5491 @check_resize 5492 def __init__(self, size): 5493 super().__init__() 5494 self.size = size 5495 self.implementation = Implementation.C 5496 5497 def parse(self): 5498 size = self.size 5499 if isinstance(size, int): 5500 size = (size,) 5501 return cde.RandomResizeWithBBoxOperation(size) 5502 5503 5504class RandomRotation(ImageTensorOperation, PyTensorOperation): 5505 """ 5506 Rotate the input image randomly within a specified range of degrees. 5507 5508 Args: 5509 degrees (Union[int, float, sequence]): Range of random rotation degrees. 5510 If `degrees` is a number, the range will be converted to (-degrees, degrees). 5511 If `degrees` is a sequence, it should be (min, max). 5512 resample (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 5513 Default: ``Inter.NEAREST``. 5514 expand (bool, optional): Optional expansion flag. Default: ``False``. If set to ``True``, 5515 expand the output image to make it large enough to hold the entire rotated image. 5516 If set to ``False`` or omitted, make the output image the same size as the input. 5517 Note that the expand flag assumes rotation around the center and no translation. 5518 center (tuple, optional): Optional center of rotation (a 2-tuple). Default: ``None``. 5519 Origin is the top left corner. ``None`` sets to the center of the image. 5520 fill_value (Union[int, tuple[int]], optional): Optional fill color for the area outside the rotated image. 5521 If it is a 3-tuple, it is used to fill R, G, B channels respectively. 5522 If it is an integer, it is used for all RGB channels. 5523 The fill_value values must be in range [0, 255]. Default: ``0``. 5524 5525 Raises: 5526 TypeError: If `degrees` is not of type integer, float or sequence. 5527 TypeError: If `resample` is not of type Inter. 5528 TypeError: If `expand` is not of type boolean. 5529 TypeError: If `center` is not of type tuple. 5530 TypeError: If `fill_value` is not of type int or tuple[int]. 5531 ValueError: If `fill_value` is not in range [0, 255]. 5532 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 5533 5534 Supported Platforms: 5535 ``CPU`` 5536 5537 Examples: 5538 >>> import numpy as np 5539 >>> import mindspore.dataset as ds 5540 >>> import mindspore.dataset.vision as vision 5541 >>> from mindspore.dataset.vision import Inter 5542 >>> 5543 >>> # Use the transform in dataset pipeline mode 5544 >>> seed = ds.config.get_seed() 5545 >>> ds.config.set_seed(12345) 5546 >>> transforms_list = [vision.RandomRotation(degrees=5.0, resample=Inter.NEAREST, expand=True)] 5547 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 5548 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5549 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 5550 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5551 ... print(item["image"].shape, item["image"].dtype) 5552 ... break 5553 (107, 107, 3) uint8 5554 >>> 5555 >>> # Use the transform in eager mode 5556 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 5557 >>> output = vision.RandomRotation(degrees=90, resample=Inter.NEAREST, expand=True)(data) 5558 >>> print(output.shape, output.dtype) 5559 (119, 119, 3) uint8 5560 >>> ds.config.set_seed(seed) 5561 5562 Tutorial Examples: 5563 - `Illustration of vision transforms 5564 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5565 """ 5566 5567 @check_random_rotation 5568 def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0): 5569 super().__init__() 5570 if isinstance(degrees, (int, float)): 5571 degrees = degrees % 360 5572 degrees = [-degrees, degrees] 5573 elif isinstance(degrees, (list, tuple)): 5574 if degrees[1] - degrees[0] >= 360: 5575 degrees = [-180, 180] 5576 else: 5577 degrees = [degrees[0] % 360, degrees[1] % 360] 5578 if degrees[0] > degrees[1]: 5579 degrees[1] += 360 5580 if isinstance(fill_value, int): 5581 fill_value = tuple([fill_value] * 3) 5582 self.degrees = degrees 5583 self.resample = resample 5584 if resample in [Inter.AREA, Inter.PILCUBIC]: 5585 self.implementation = Implementation.C 5586 elif resample == Inter.ANTIALIAS: 5587 self.implementation = Implementation.PY 5588 self.expand = expand 5589 self.py_center = center 5590 self.c_center = center 5591 if center is None: 5592 self.c_center = () 5593 self.fill_value = fill_value 5594 5595 def parse(self): 5596 if self.resample == Inter.ANTIALIAS: 5597 raise TypeError("Current Interpolation is not supported with NumPy input.") 5598 return cde.RandomRotationOperation(self.degrees, Inter.to_c_type(self.resample), self.expand, self.c_center, 5599 self.fill_value) 5600 5601 def _execute_py(self, img): 5602 """ 5603 Execute method. 5604 5605 Args: 5606 img (PIL Image): Image to be randomly rotated. 5607 5608 Returns: 5609 PIL Image, randomly rotated image. 5610 """ 5611 if self.resample in [Inter.AREA, Inter.PILCUBIC]: 5612 raise TypeError("Current Interpolation is not supported with PIL input.") 5613 return util.random_rotation(img, self.degrees, Inter.to_python_type(self.resample), self.expand, 5614 self.py_center, self.fill_value) 5615 5616 5617class RandomSelectSubpolicy(ImageTensorOperation): 5618 """ 5619 Choose a random sub-policy from a policy list to be applied on the input image. 5620 5621 Args: 5622 policy (list[list[tuple[TensorOperation, float]]]): List of sub-policies to choose from. 5623 A sub-policy is a list of tuple[operation, prob], where operation is a data processing operation and prob 5624 is the probability that this operation will be applied, and the prob values must be in range [0.0, 1.0]. 5625 Once a sub-policy is selected, each operation within the sub-policy with be applied in sequence according 5626 to its probability. 5627 5628 Raises: 5629 TypeError: If `policy` contains invalid data processing operations. 5630 5631 Supported Platforms: 5632 ``CPU`` 5633 5634 Examples: 5635 >>> import numpy as np 5636 >>> import mindspore.dataset as ds 5637 >>> import mindspore.dataset.vision as vision 5638 >>> 5639 >>> # Use the transform in dataset pipeline mode 5640 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 5641 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5642 >>> policy = [[(vision.RandomRotation((45, 45)), 0.5), 5643 ... (vision.RandomVerticalFlip(), 1), 5644 ... (vision.RandomColorAdjust(), 0.8)], 5645 ... [(vision.RandomRotation((90, 90)), 1), 5646 ... (vision.RandomColorAdjust(), 0.2)]] 5647 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=vision.RandomSelectSubpolicy(policy), 5648 ... input_columns=["image"]) 5649 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5650 ... print(item["image"].shape, item["image"].dtype) 5651 ... break 5652 (100, 100, 3) uint8 5653 >>> 5654 >>> # Use the transform in eager mode 5655 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 5656 >>> policy = [[(vision.RandomRotation((90, 90)), 1), (vision.RandomColorAdjust(), 1)]] 5657 >>> output = vision.RandomSelectSubpolicy(policy)(data) 5658 >>> print(output.shape, output.dtype) 5659 (100, 100, 3) uint8 5660 5661 Tutorial Examples: 5662 - `Illustration of vision transforms 5663 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5664 """ 5665 5666 @check_random_select_subpolicy_op 5667 def __init__(self, policy): 5668 super().__init__() 5669 self.policy = policy 5670 self.implementation = Implementation.C 5671 5672 def parse(self): 5673 policy = [] 5674 for list_one in self.policy: 5675 policy_one = [] 5676 for list_two in list_one: 5677 if list_two[0] and getattr(list_two[0], 'parse', None): 5678 policy_one.append((list_two[0].parse(), list_two[1])) 5679 else: 5680 policy_one.append((list_two[0], list_two[1])) 5681 policy.append(policy_one) 5682 return cde.RandomSelectSubpolicyOperation(policy) 5683 5684 5685class RandomSharpness(ImageTensorOperation, PyTensorOperation): 5686 """ 5687 Adjust the sharpness of the input image by a fixed or random degree. Degree of 0.0 gives a blurred image, 5688 degree of 1.0 gives the original image, and degree of 2.0 gives a sharpened image. 5689 5690 Args: 5691 degrees (Union[list, tuple], optional): Range of random sharpness adjustment degrees, 5692 which must be non-negative. It should be in (min, max) format. If min=max, then 5693 it is a single fixed magnitude operation. Default: ``(0.1, 1.9)``. 5694 5695 Raises: 5696 TypeError : If `degrees` is not a list or a tuple. 5697 ValueError: If `degrees` is negative. 5698 ValueError: If `degrees` is in (max, min) format instead of (min, max). 5699 5700 Supported Platforms: 5701 ``CPU`` 5702 5703 Examples: 5704 >>> import numpy as np 5705 >>> import mindspore.dataset as ds 5706 >>> import mindspore.dataset.vision as vision 5707 >>> 5708 >>> # Use the transform in dataset pipeline mode 5709 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 5710 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5711 >>> transforms_list = [vision.RandomSharpness(degrees=(0.2, 1.9))] 5712 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 5713 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5714 ... print(item["image"].shape, item["image"].dtype) 5715 ... break 5716 (100, 100, 3) uint8 5717 >>> 5718 >>> # Use the transform in eager mode 5719 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 5720 >>> output = vision.RandomSharpness(degrees=(0, 0.6))(data) 5721 >>> print(output.shape, output.dtype) 5722 (100, 100, 3) uint8 5723 5724 Tutorial Examples: 5725 - `Illustration of vision transforms 5726 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5727 """ 5728 5729 @check_positive_degrees 5730 def __init__(self, degrees=(0.1, 1.9)): 5731 super().__init__() 5732 self.degrees = degrees 5733 5734 def parse(self): 5735 return cde.RandomSharpnessOperation(self.degrees) 5736 5737 def _execute_py(self, img): 5738 """ 5739 Execute method. 5740 5741 Args: 5742 img (PIL Image): Image to be sharpness adjusted. 5743 5744 Returns: 5745 PIL Image, sharpness adjusted image. 5746 """ 5747 5748 return util.random_sharpness(img, self.degrees) 5749 5750 5751class RandomSolarize(ImageTensorOperation): 5752 """ 5753 Randomly selects a subrange within the specified threshold range and sets the pixel value within 5754 the subrange to (255 - pixel). 5755 5756 Args: 5757 threshold (tuple, optional): Range of random solarize threshold. Default: ``(0, 255)``. 5758 Threshold values should always be in (min, max) format, 5759 where min and max are integers in the range [0, 255], and min <= max. The pixel values 5760 belonging to the [min, max] range will be inverted. 5761 If min=max, then invert all pixel values greater than or equal min(max). 5762 5763 Raises: 5764 TypeError : If `threshold` is not of type tuple. 5765 ValueError: If `threshold` is not in range of [0, 255]. 5766 5767 Supported Platforms: 5768 ``CPU`` 5769 5770 Examples: 5771 >>> import numpy as np 5772 >>> import mindspore.dataset as ds 5773 >>> import mindspore.dataset.vision as vision 5774 >>> 5775 >>> # Use the transform in dataset pipeline mode 5776 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 5777 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5778 >>> transforms_list = [vision.RandomSolarize(threshold=(10,100))] 5779 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 5780 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5781 ... print(item["image"].shape, item["image"].dtype) 5782 ... break 5783 (100, 100, 3) uint8 5784 >>> 5785 >>> # Use the transform in eager mode 5786 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 5787 >>> output = vision.RandomSolarize(threshold=(1, 10))(data) 5788 >>> print(output.shape, output.dtype) 5789 (100, 100, 3) uint8 5790 5791 Tutorial Examples: 5792 - `Illustration of vision transforms 5793 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5794 """ 5795 5796 @check_random_solarize 5797 def __init__(self, threshold=(0, 255)): 5798 super().__init__() 5799 self.threshold = threshold 5800 self.implementation = Implementation.C 5801 5802 def parse(self): 5803 return cde.RandomSolarizeOperation(self.threshold) 5804 5805 5806class RandomVerticalFlip(ImageTensorOperation, PyTensorOperation): 5807 """ 5808 Randomly flip the input image vertically with a given probability. 5809 5810 Args: 5811 prob (float, optional): Probability of the image being flipped, which 5812 must be in range of [0.0, 1.0]. Default: ``0.5``. 5813 5814 Raises: 5815 TypeError: If `prob` is not of type float. 5816 ValueError: If `prob` is not in range [0.0, 1.0]. 5817 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 5818 5819 Supported Platforms: 5820 ``CPU`` 5821 5822 Examples: 5823 >>> import numpy as np 5824 >>> import mindspore.dataset as ds 5825 >>> import mindspore.dataset.vision as vision 5826 >>> 5827 >>> # Use the transform in dataset pipeline mode 5828 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 5829 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5830 >>> transforms_list = [vision.RandomVerticalFlip(0.25)] 5831 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 5832 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5833 ... print(item["image"].shape, item["image"].dtype) 5834 ... break 5835 (100, 100, 3) uint8 5836 >>> 5837 >>> # Use the transform in eager mode 5838 >>> data = np.array([[0, 1, 2, 3, 4, 5]], dtype=np.uint8).reshape((2, 3)) 5839 >>> output = vision.RandomVerticalFlip(1.0)(data) 5840 >>> print(output.shape, output.dtype) 5841 (2, 3) uint8 5842 5843 Tutorial Examples: 5844 - `Illustration of vision transforms 5845 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5846 """ 5847 5848 @check_prob 5849 def __init__(self, prob=0.5): 5850 super().__init__() 5851 self.prob = prob 5852 5853 def parse(self): 5854 return cde.RandomVerticalFlipOperation(self.prob) 5855 5856 def _execute_py(self, img): 5857 """ 5858 Execute method. 5859 5860 Args: 5861 img (PIL Image): Image to be vertically flipped. 5862 5863 Returns: 5864 PIL Image, randomly vertically flipped image. 5865 """ 5866 return util.random_vertical_flip(img, self.prob) 5867 5868 5869class RandomVerticalFlipWithBBox(ImageTensorOperation): 5870 """ 5871 Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly. 5872 5873 Args: 5874 prob (float, optional): Probability of the image being flipped, 5875 which must be in range of [0.0, 1.0]. Default: ``0.5``. 5876 5877 Raises: 5878 TypeError: If `prob` is not of type float. 5879 ValueError: If `prob` is not in range [0.0, 1.0]. 5880 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 5881 5882 Supported Platforms: 5883 ``CPU`` 5884 5885 Examples: 5886 >>> import numpy as np 5887 >>> import mindspore.dataset as ds 5888 >>> import mindspore.dataset.vision as vision 5889 >>> 5890 >>> # Use the transform in dataset pipeline mode 5891 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 5892 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5893 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32)) 5894 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func], 5895 ... input_columns=["image"], 5896 ... output_columns=["image", "bbox"]) 5897 >>> transforms_list = [vision.RandomVerticalFlipWithBBox(0.20)] 5898 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image", "bbox"]) 5899 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5900 ... print(item["image"].shape, item["image"].dtype) 5901 ... print(item["bbox"].shape, item["bbox"].dtype) 5902 ... break 5903 (100, 100, 3) float32 5904 (1, 4) float32 5905 >>> 5906 >>> # Use the transform in eager mode 5907 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 5908 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype)) 5909 >>> func_data, func_bboxes = func(data) 5910 >>> output = vision.RandomVerticalFlipWithBBox(1)(func_data, func_bboxes) 5911 >>> print(output[0].shape, output[0].dtype) 5912 (100, 100, 3) float32 5913 >>> print(output[1].shape, output[1].dtype) 5914 (1, 4) float32 5915 5916 Tutorial Examples: 5917 - `Illustration of vision transforms 5918 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5919 """ 5920 5921 @check_prob 5922 def __init__(self, prob=0.5): 5923 super().__init__() 5924 self.prob = prob 5925 self.implementation = Implementation.C 5926 5927 def parse(self): 5928 return cde.RandomVerticalFlipWithBBoxOperation(self.prob) 5929 5930 5931class Rescale(ImageTensorOperation): 5932 """ 5933 Rescale the input image with the given rescale and shift. This operation will rescale the input image 5934 with: output = image * rescale + shift. 5935 5936 Note: 5937 This operation is executed on the CPU by default, but it is also supported 5938 to be executed on the GPU or Ascend via heterogeneous acceleration. 5939 5940 Args: 5941 rescale (float): Rescale factor. 5942 shift (float): Shift factor. 5943 5944 Raises: 5945 TypeError: If `rescale` is not of type float. 5946 TypeError: If `shift` is not of type float. 5947 5948 Supported Platforms: 5949 ``CPU`` ``GPU`` ``Ascend`` 5950 5951 Examples: 5952 >>> import numpy as np 5953 >>> import mindspore.dataset as ds 5954 >>> import mindspore.dataset.vision as vision 5955 >>> 5956 >>> # Use the transform in dataset pipeline mode 5957 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 5958 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 5959 >>> transforms_list = [vision.Rescale(1.0 / 255.0, -1.0)] 5960 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 5961 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 5962 ... print(item["image"].shape, item["image"].dtype) 5963 ... break 5964 (100, 100, 3) float32 5965 >>> 5966 >>> # Use the transform in eager mode 5967 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 5968 >>> output = vision.Rescale(1.0 / 255.0, -1.0)(data) 5969 >>> print(output.shape, output.dtype) 5970 (100, 100, 3) float32 5971 5972 Tutorial Examples: 5973 - `Illustration of vision transforms 5974 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 5975 """ 5976 5977 @check_rescale 5978 def __init__(self, rescale, shift): 5979 super().__init__() 5980 self.rescale = rescale 5981 self.shift = shift 5982 self.implementation = Implementation.C 5983 5984 def parse(self): 5985 return cde.RescaleOperation(self.rescale, self.shift) 5986 5987 5988class Resize(ImageTensorOperation, PyTensorOperation): 5989 """ 5990 Resize the input image to the given size with a given interpolation mode :class:`~.vision.Inter` . 5991 5992 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 5993 5994 Args: 5995 size (Union[int, Sequence[int]]): The output size of the resized image. The size value(s) must be positive. 5996 If size is an integer, the smaller edge of the image will be resized to this value with 5997 the same image aspect ratio. 5998 If size is a sequence of length 2, it should be (height, width). 5999 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 6000 Default: ``Inter.LINEAR``. 6001 6002 Raises: 6003 TypeError: If `size` is not of type int or Sequence[int]. 6004 TypeError: If `interpolation` is not of type :class:`~.vision.Inter` . 6005 ValueError: If `size` is not positive. 6006 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 6007 6008 Supported Platforms: 6009 ``CPU`` ``Ascend`` 6010 6011 Examples: 6012 >>> import numpy as np 6013 >>> import mindspore.dataset as ds 6014 >>> import mindspore.dataset.vision as vision 6015 >>> from mindspore.dataset.vision import Inter 6016 >>> 6017 >>> # Use the transform in dataset pipeline mode 6018 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6019 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6020 >>> resize_op = vision.Resize([100, 75], Inter.BICUBIC) 6021 >>> transforms_list = [resize_op] 6022 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 6023 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6024 ... print(item["image"].shape, item["image"].dtype) 6025 ... break 6026 (100, 75, 3) uint8 6027 >>> 6028 >>> # Use the transform in eager mode 6029 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6030 >>> output = vision.Resize([5, 5], Inter.BICUBIC)(data) 6031 >>> print(output.shape, output.dtype) 6032 (5, 5, 3) uint8 6033 6034 Tutorial Examples: 6035 - `Illustration of vision transforms 6036 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6037 """ 6038 6039 @check_resize_interpolation 6040 def __init__(self, size, interpolation=Inter.LINEAR): 6041 super().__init__() 6042 self.py_size = size 6043 if isinstance(size, int): 6044 size = (size,) 6045 self.c_size = size 6046 self.interpolation = interpolation 6047 if interpolation in [Inter.AREA, Inter.PILCUBIC]: 6048 self.implementation = Implementation.C 6049 elif interpolation == Inter.ANTIALIAS: 6050 self.implementation = Implementation.PY 6051 self.random = False 6052 6053 @check_device_target 6054 def device(self, device_target="CPU"): 6055 """ 6056 Set the device for the current operator execution. 6057 6058 - When the device is Ascend, input/output shape should be limited from [4, 6] to [32768, 32768]. 6059 6060 Args: 6061 device_target (str, optional): The operator will be executed on this device. Currently supports 6062 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 6063 6064 Raises: 6065 TypeError: If `device_target` is not of type str. 6066 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 6067 6068 Supported Platforms: 6069 ``CPU`` ``Ascend`` 6070 6071 Examples: 6072 >>> import numpy as np 6073 >>> import mindspore.dataset as ds 6074 >>> import mindspore.dataset.vision as vision 6075 >>> from mindspore.dataset.vision import Inter 6076 >>> 6077 >>> # Use the transform in dataset pipeline mode 6078 >>> resize_op = vision.Resize([100, 75], Inter.BICUBIC).device("Ascend") 6079 >>> transforms_list = [resize_op] 6080 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6081 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6082 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 6083 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6084 ... print(item["image"].shape, item["image"].dtype) 6085 ... break 6086 (100, 75, 3) uint8 6087 >>> 6088 >>> # Use the transform in eager mode 6089 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6090 >>> output = vision.Resize([25, 25], Inter.BICUBIC).device("Ascend")(data) 6091 >>> print(output.shape, output.dtype) 6092 (25, 25, 3) uint8 6093 6094 Tutorial Examples: 6095 - `Illustration of vision transforms 6096 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6097 """ 6098 self.device_target = device_target 6099 if self.interpolation not in [Inter.BILINEAR, Inter.CUBIC, Inter.NEAREST] and self.device_target == "Ascend": 6100 raise RuntimeError("Invalid interpolation mode, only support BILINEAR, CUBIC and NEAREST.") 6101 return self 6102 6103 def parse(self): 6104 if self.interpolation == Inter.ANTIALIAS: 6105 raise TypeError("The current InterpolationMode is not supported with NumPy input.") 6106 return cde.ResizeOperation(self.c_size, Inter.to_c_type(self.interpolation), self.device_target) 6107 6108 def _execute_py(self, img): 6109 """ 6110 Execute method. 6111 6112 Args: 6113 img (PIL Image): Image to be resized. 6114 6115 Returns: 6116 PIL Image, resized image. 6117 """ 6118 if self.interpolation in [Inter.AREA, Inter.PILCUBIC]: 6119 raise TypeError("Current Interpolation is not supported with PIL input.") 6120 return util.resize(img, self.py_size, Inter.to_python_type(self.interpolation)) 6121 6122 6123class ResizedCrop(ImageTensorOperation): 6124 """ 6125 Crop the input image at a specific region and resize it to desired size. 6126 6127 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 6128 6129 Args: 6130 top (int): Horizontal ordinate of the upper left corner of the crop region. 6131 left (int): Vertical ordinate of the upper left corner of the crop region. 6132 height (int): Height of the crop region. 6133 width (int): Width of the cropp region. 6134 size (Union[int, Sequence[int, int]]): The size of the output image. 6135 If int is provided, the smaller edge of the image will be resized to this value, 6136 keeping the image aspect ratio the same. 6137 If Sequence[int, int] is provided, it should be (height, width). 6138 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 6139 Default: ``Inter.BILINEAR``. 6140 6141 Raises: 6142 TypeError: If `top` is not of type int. 6143 ValueError: If `top` is negative. 6144 TypeError: If `left` is not of type int. 6145 ValueError: If `left` is negative. 6146 TypeError: If `height` is not of type int. 6147 ValueError: If `height` is not positive. 6148 TypeError: If `width` is not of type int. 6149 ValueError: If `width` is not positive. 6150 TypeError: If `size` is not of type int or Sequence[int, int]. 6151 ValueError: If `size` is not posotive. 6152 TypeError: If `interpolation` is not of type :class:`~.vision.Inter` . 6153 RuntimeError: If shape of the input image is not <H, W> or <H, W, C>. 6154 6155 Supported Platforms: 6156 ``CPU`` ``Ascend`` 6157 6158 Examples: 6159 >>> import numpy as np 6160 >>> import mindspore.dataset as ds 6161 >>> import mindspore.dataset.vision as vision 6162 >>> from mindspore.dataset.vision import Inter 6163 >>> 6164 >>> # Use the transform in dataset pipeline mode 6165 >>> transforms_list = [vision.ResizedCrop(0, 0, 64, 64, (100, 75), Inter.BILINEAR)] 6166 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6167 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6168 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 6169 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6170 ... print(item["image"].shape, item["image"].dtype) 6171 ... break 6172 (100, 75, 3) uint8 6173 >>> 6174 >>> # Use the transform in eager mode 6175 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6176 >>> output = vision.ResizedCrop(0, 0, 1, 1, (5, 5), Inter.BILINEAR)(data) 6177 >>> print(output.shape, output.dtype) 6178 (5, 5, 3) uint8 6179 6180 Tutorial Examples: 6181 - `Illustration of vision transforms 6182 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6183 """ 6184 6185 @check_resized_crop 6186 def __init__(self, top, left, height, width, size, interpolation=Inter.BILINEAR): 6187 super().__init__() 6188 if isinstance(size, int): 6189 size = (size, size) 6190 6191 self.top = top 6192 self.left = left 6193 self.height = height 6194 self.width = width 6195 self.size = size 6196 self.interpolation = interpolation 6197 self.implementation = Implementation.C 6198 6199 @check_device_target 6200 def device(self, device_target="CPU"): 6201 """ 6202 Set the device for the current operator execution. 6203 6204 - When the device is Ascend, input type supports `uint8` and `float32`, 6205 input channel supports 1 and 3. The input data has a height limit of [4, 32768] 6206 and a width limit of [6, 32768]. 6207 6208 Args: 6209 device_target (str, optional): The operator will be executed on this device. Currently supports 6210 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 6211 6212 Raises: 6213 TypeError: If `device_target` is not of type str. 6214 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 6215 6216 Supported Platforms: 6217 ``CPU`` ``Ascend`` 6218 6219 Examples: 6220 >>> import numpy as np 6221 >>> import mindspore.dataset as ds 6222 >>> import mindspore.dataset.vision as vision 6223 >>> from mindspore.dataset.vision import Inter 6224 >>> 6225 >>> # Use the transform in dataset pipeline mode 6226 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6227 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6228 >>> resize_crop_op = vision.ResizedCrop(0, 0, 64, 64, (100, 75)).device("Ascend") 6229 >>> transforms_list = [resize_crop_op] 6230 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 6231 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6232 ... print(item["image"].shape, item["image"].dtype) 6233 ... break 6234 (100, 75, 3) uint8 6235 >>> 6236 >>> # Use the transform in eager mode 6237 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6238 >>> output = vision.ResizedCrop(0, 0, 64, 64, (32, 16), Inter.BILINEAR).device("Ascend")(data) 6239 >>> print(output.shape, output.dtype) 6240 (32, 16, 3) uint8 6241 6242 Tutorial Examples: 6243 - `Illustration of vision transforms 6244 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6245 """ 6246 self.device_target = device_target 6247 if self.interpolation not in [Inter.BILINEAR, Inter.CUBIC, Inter.NEAREST] and self.device_target == "Ascend": 6248 raise RuntimeError("Invalid interpolation mode, only support BILINEAR, CUBIC and NEAREST.") 6249 return self 6250 6251 def parse(self): 6252 return cde.ResizedCropOperation(self.top, self.left, self.height, 6253 self.width, self.size, Inter.to_c_type(self.interpolation), self.device_target) 6254 6255 6256class ResizeWithBBox(ImageTensorOperation): 6257 """ 6258 Resize the input image to the given size and adjust bounding boxes accordingly. 6259 6260 Args: 6261 size (Union[int, Sequence[int]]): The output size of the resized image. 6262 If size is an integer, smaller edge of the image will be resized to this value with 6263 the same image aspect ratio. 6264 If size is a sequence of length 2, it should be (height, width). 6265 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 6266 Default: ``Inter.LINEAR``. 6267 6268 Raises: 6269 TypeError: If `size` is not of type int or Sequence[int]. 6270 TypeError: If `interpolation` is not of type :class:`~.vision.Inter` . 6271 ValueError: If `size` is not positive. 6272 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 6273 6274 Supported Platforms: 6275 ``CPU`` 6276 6277 Examples: 6278 >>> import numpy as np 6279 >>> import mindspore.dataset as ds 6280 >>> import mindspore.dataset.vision as vision 6281 >>> from mindspore.dataset.vision import Inter 6282 >>> 6283 >>> # Use the transform in dataset pipeline mode 6284 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 6285 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6286 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(np.float32)) 6287 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[func], 6288 ... input_columns=["image"], 6289 ... output_columns=["image", "bbox"]) 6290 >>> bbox_op = vision.ResizeWithBBox(50, Inter.NEAREST) 6291 >>> transforms_list = [bbox_op] 6292 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image", "bbox"]) 6293 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6294 ... print(item["image"].shape, item["image"].dtype) 6295 ... print(item["bbox"].shape, item["bbox"].dtype) 6296 ... break 6297 (50, 50, 3) float32 6298 (1, 4) float32 6299 >>> 6300 >>> # Use the transform in eager mode 6301 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.float32) 6302 >>> func = lambda img: (data, np.array([[0, 0, data.shape[1], data.shape[0]]]).astype(data.dtype)) 6303 >>> func_data, func_bboxes = func(data) 6304 >>> output = vision.ResizeWithBBox(100)(func_data, func_bboxes) 6305 >>> print(output[0].shape, output[0].dtype) 6306 (100, 100, 3) float32 6307 >>> print(output[1].shape, output[1].dtype) 6308 (1, 4) float32 6309 6310 Tutorial Examples: 6311 - `Illustration of vision transforms 6312 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6313 """ 6314 6315 @check_resize_interpolation 6316 def __init__(self, size, interpolation=Inter.LINEAR): 6317 super().__init__() 6318 self.size = size 6319 self.interpolation = interpolation 6320 self.implementation = Implementation.C 6321 6322 def parse(self): 6323 size = self.size 6324 if isinstance(size, int): 6325 size = (size,) 6326 return cde.ResizeWithBBoxOperation(size, Inter.to_c_type(self.interpolation)) 6327 6328 6329class RgbToHsv(PyTensorOperation): 6330 """ 6331 Convert the input numpy.ndarray images from RGB to HSV. 6332 6333 Args: 6334 is_hwc (bool): If ``True``, means the input image is in shape of <H, W, C> or <N, H, W, C>. 6335 Otherwise, it is in shape of <C, H, W> or <N, C, H, W>. Default: ``False``. 6336 6337 Raises: 6338 TypeError: If `is_hwc` is not of type bool. 6339 6340 Supported Platforms: 6341 ``CPU`` 6342 6343 Examples: 6344 >>> import numpy as np 6345 >>> import mindspore.dataset as ds 6346 >>> import mindspore.dataset.vision as vision 6347 >>> from mindspore.dataset.transforms import Compose 6348 >>> 6349 >>> # Use the transform in dataset pipeline mode 6350 >>> transforms_list = Compose([vision.CenterCrop(20), 6351 ... vision.ToTensor(), 6352 ... vision.RgbToHsv()]) 6353 >>> # apply the transform to dataset through map function 6354 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6355 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6356 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 6357 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6358 ... print(item["image"].shape, item["image"].dtype) 6359 ... break 6360 (3, 20, 20) float64 6361 >>> 6362 >>> # Use the transform in eager mode 6363 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6364 >>> output = vision.RgbToHsv(is_hwc=True)(data) 6365 >>> print(output.shape, output.dtype) 6366 (100, 100, 3) float64 6367 6368 Tutorial Examples: 6369 - `Illustration of vision transforms 6370 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6371 """ 6372 6373 @check_rgb_to_hsv 6374 def __init__(self, is_hwc=False): 6375 super().__init__() 6376 self.is_hwc = is_hwc 6377 self.random = False 6378 self.implementation = Implementation.PY 6379 6380 def _execute_py(self, rgb_imgs): 6381 """ 6382 Execute method. 6383 6384 Args: 6385 rgb_imgs (numpy.ndarray): RGB images to be converted. 6386 6387 Returns: 6388 numpy.ndarray, converted HSV images. 6389 """ 6390 return util.rgb_to_hsvs(rgb_imgs, self.is_hwc) 6391 6392 6393class Rotate(ImageTensorOperation): 6394 """ 6395 Rotate the input image by specified degrees. 6396 6397 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 6398 6399 Args: 6400 degrees (Union[int, float]): Rotation degrees. 6401 resample (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 6402 Default: ``Inter.NEAREST``. 6403 expand (bool, optional): Optional expansion flag. Default: ``False``. If set to ``True``, 6404 expand the output image to make it large enough to hold the entire rotated image. 6405 If set to ``False`` or omitted, make the output image the same size as the input. 6406 Note that the expand flag assumes rotation around the center and no translation. 6407 center (tuple, optional): Optional center of rotation (a 2-tuple). Default: ``None``. 6408 Origin is the top left corner. ``None`` sets to the center of the image. 6409 fill_value (Union[int, tuple[int]], optional): Optional fill color for the area outside the rotated image. 6410 If it is a 3-tuple, it is used to fill R, G, B channels respectively. 6411 If it is an integer, it is used for all RGB channels. 6412 The fill_value values must be in range [0, 255]. Default: ``0``. 6413 6414 Raises: 6415 TypeError: If `degrees` is not of type integer, float or sequence. 6416 TypeError: If `resample` is not of type :class:`~.vision.Inter` . 6417 TypeError: If `expand` is not of type bool. 6418 TypeError: If `center` is not of type tuple. 6419 TypeError: If `fill_value` is not of type int or tuple[int]. 6420 ValueError: If `fill_value` is not in range [0, 255]. 6421 RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>. 6422 6423 Supported Platforms: 6424 ``CPU`` ``Ascend`` 6425 6426 Examples: 6427 >>> import numpy as np 6428 >>> import mindspore.dataset as ds 6429 >>> import mindspore.dataset.vision as vision 6430 >>> from mindspore.dataset.vision import Inter 6431 >>> 6432 >>> # Use the transform in dataset pipeline mode 6433 >>> transforms_list = [vision.Rotate(degrees=30.0, resample=Inter.NEAREST, expand=True)] 6434 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6435 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6436 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 6437 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6438 ... print(item["image"].shape, item["image"].dtype) 6439 ... break 6440 (137, 137, 3) uint8 6441 >>> 6442 >>> # Use the transform in eager mode 6443 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6444 >>> output = vision.Rotate(degrees=30.0, resample=Inter.NEAREST, expand=True)(data) 6445 >>> print(output.shape, output.dtype) 6446 (137, 137, 3) uint8 6447 6448 Tutorial Examples: 6449 - `Illustration of vision transforms 6450 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6451 """ 6452 6453 @check_rotate 6454 def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0): 6455 super().__init__() 6456 if isinstance(degrees, (int, float)): 6457 degrees = degrees % 360 6458 if center is None: 6459 center = () 6460 if isinstance(fill_value, int): 6461 fill_value = tuple([fill_value] * 3) 6462 self.degrees = degrees 6463 self.resample = resample 6464 self.expand = expand 6465 self.center = center 6466 self.fill_value = fill_value 6467 self.implementation = Implementation.C 6468 6469 @check_device_target 6470 def device(self, device_target="CPU"): 6471 """ 6472 Set the device for the current operator execution. 6473 6474 - When the device is Ascend, input type supports `uint8`/`float32`, input channel supports 1 and 3. 6475 The input data has a height limit of [4, 8192] and a width limit of [6, 4096]. 6476 - When the device is Ascend and `expand` is True, `center` does not take effect 6477 and the image is rotated according to the center of the image. 6478 6479 Args: 6480 device_target (str, optional): The operator will be executed on this device. Currently supports 6481 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 6482 6483 Raises: 6484 TypeError: If `device_target` is not of type str. 6485 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 6486 6487 Supported Platforms: 6488 ``CPU`` ``Ascend`` 6489 6490 Examples: 6491 >>> import numpy as np 6492 >>> import mindspore.dataset as ds 6493 >>> import mindspore.dataset.vision as vision 6494 >>> from mindspore.dataset.vision import Inter 6495 >>> 6496 >>> # Use the transform in dataset pipeline mode 6497 >>> data = np.random.randint(0, 255, size=(1, 300, 400, 3)).astype(np.uint8) 6498 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6499 >>> rotate_op = vision.Rotate(degrees=90.0, resample=Inter.NEAREST, expand=True).device("Ascend") 6500 >>> transforms_list = [rotate_op] 6501 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 6502 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6503 ... print(item["image"].shape, item["image"].dtype) 6504 ... break 6505 (400, 300, 3) uint8 6506 >>> 6507 >>> # Use the transform in eager mode 6508 >>> data = np.random.randint(0, 255, size=(300, 400, 3)).astype(np.uint8) 6509 >>> output = vision.Rotate(degrees=90.0, resample=Inter.NEAREST, expand=True).device("Ascend")(data) 6510 >>> print(output.shape, output.dtype) 6511 (400, 300, 3) uint8 6512 6513 Tutorial Examples: 6514 - `Illustration of vision transforms 6515 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6516 """ 6517 self.device_target = device_target 6518 if self.resample not in [Inter.BILINEAR, Inter.NEAREST] and self.device_target == "Ascend": 6519 raise RuntimeError("Invalid interpolation mode, only support BILINEAR and NEAREST.") 6520 return self 6521 6522 def parse(self): 6523 return cde.RotateOperation(self.degrees, Inter.to_c_type(self.resample), self.expand, self.center, 6524 self.fill_value, self.device_target) 6525 6526 6527class SlicePatches(ImageTensorOperation): 6528 r""" 6529 Slice Tensor to multiple patches in horizontal and vertical directions. 6530 6531 The usage scenario is suitable to large height and width Tensor. The Tensor 6532 will keep the same if set both num_height and num_width to 1. And the 6533 number of output tensors is equal to :math:`num\_height * num\_width`. 6534 6535 Args: 6536 num_height (int, optional): The number of patches in vertical direction, which must be positive. Default: ``1``. 6537 num_width (int, optional): The number of patches in horizontal direction, which must be positive. 6538 Default: ``1``. 6539 slice_mode (SliceMode, optional): A mode represents pad or drop. Default: ``SliceMode.PAD``. 6540 It can be ``SliceMode.PAD``, ``SliceMode.DROP``. 6541 fill_value (int, optional): The border width in number of pixels in 6542 right and bottom direction if slice_mode is set to be SliceMode.PAD. 6543 The `fill_value` must be in range [0, 255]. Default: ``0``. 6544 6545 Raises: 6546 TypeError: If `num_height` is not of type integer. 6547 TypeError: If `num_width` is not of type integer. 6548 TypeError: If `slice_mode` is not of type Inter. 6549 TypeError: If `fill_value` is not of type integer. 6550 ValueError: If `num_height` is not positive. 6551 ValueError: If `num_width` is not positive. 6552 ValueError: If `fill_value` is not in range [0, 255]. 6553 RuntimeError: If given tensor shape is not <H, W> or <H, W, C>. 6554 6555 Supported Platforms: 6556 ``CPU`` 6557 6558 Examples: 6559 >>> import numpy as np 6560 >>> import mindspore.dataset as ds 6561 >>> import mindspore.dataset.vision as vision 6562 >>> 6563 >>> # Use the transform in dataset pipeline mode 6564 >>> # default padding mode 6565 >>> num_h, num_w = (1, 4) 6566 >>> slice_patches_op = vision.SlicePatches(num_h, num_w) 6567 >>> transforms_list = [slice_patches_op] 6568 >>> cols = ['img' + str(x) for x in range(num_h*num_w)] 6569 >>> 6570 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6571 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6572 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, 6573 ... input_columns=["image"], 6574 ... output_columns=cols) 6575 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6576 ... print(len(item), item["img0"].shape, item["img0"].dtype) 6577 ... break 6578 4 (100, 25, 3) uint8 6579 >>> 6580 >>> # Use the transform in eager mode 6581 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6582 >>> output = vision.SlicePatches(1, 2)(data) 6583 >>> print(np.array(output).shape, np.array(output).dtype) 6584 (2, 100, 50, 3) uint8 6585 6586 Tutorial Examples: 6587 - `Illustration of vision transforms 6588 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6589 """ 6590 6591 @check_slice_patches 6592 def __init__(self, num_height=1, num_width=1, slice_mode=SliceMode.PAD, fill_value=0): 6593 super().__init__() 6594 self.num_height = num_height 6595 self.num_width = num_width 6596 self.slice_mode = slice_mode 6597 self.fill_value = fill_value 6598 self.implementation = Implementation.C 6599 6600 def parse(self): 6601 return cde.SlicePatchesOperation(self.num_height, self.num_width, 6602 SliceMode.to_c_type(self.slice_mode), self.fill_value) 6603 6604 6605class Solarize(ImageTensorOperation): 6606 """ 6607 Solarize the image by inverting all pixel values within the threshold. 6608 6609 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 6610 6611 Args: 6612 threshold (Union[float, Sequence[float, float]]): Range of solarize threshold, should always 6613 be in (min, max) format, where min and max are integers in range of [0, 255], and min <= max. 6614 The pixel values belonging to the [min, max] range will be inverted. 6615 If a single value is provided or min=max, then invert all pixel values greater than or equal min(max). 6616 6617 Raises: 6618 TypeError: If `threshold` is not of type float or Sequence[float, float]. 6619 ValueError: If `threshold` is not in range of [0, 255]. 6620 6621 Supported Platforms: 6622 ``CPU`` ``Ascend`` 6623 6624 Examples: 6625 >>> import numpy as np 6626 >>> import mindspore.dataset as ds 6627 >>> import mindspore.dataset.vision as vision 6628 >>> 6629 >>> # Use the transform in dataset pipeline mode 6630 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6631 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6632 >>> transforms_list = [vision.Solarize(threshold=(10, 100))] 6633 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 6634 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6635 ... print(item["image"].shape, item["image"].dtype) 6636 ... break 6637 (100, 100, 3) uint8 6638 >>> 6639 >>> # Use the transform in eager mode 6640 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6641 >>> output = vision.Solarize(threshold=(1, 10))(data) 6642 >>> print(output.shape, output.dtype) 6643 (100, 100, 3) uint8 6644 6645 Tutorial Examples: 6646 - `Illustration of vision transforms 6647 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6648 """ 6649 6650 @check_solarize 6651 def __init__(self, threshold): 6652 super().__init__() 6653 if isinstance(threshold, (float, int)): 6654 threshold = (threshold, threshold) 6655 self.threshold = threshold 6656 self.implementation = Implementation.C 6657 6658 @check_device_target 6659 def device(self, device_target="CPU"): 6660 """ 6661 Set the device for the current operator execution. 6662 6663 - When the device is Ascend, input type only supports `uint8` , input channel supports 1 and 3. 6664 The input data has a height limit of [4, 8192] and a width limit of [6, 4096]. 6665 6666 Args: 6667 device_target (str, optional): The operator will be executed on this device. Currently supports 6668 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 6669 6670 Raises: 6671 TypeError: If `device_target` is not of type str. 6672 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 6673 6674 Supported Platforms: 6675 ``CPU`` ``Ascend`` 6676 6677 Examples: 6678 >>> import numpy as np 6679 >>> import mindspore.dataset as ds 6680 >>> import mindspore.dataset.vision as vision 6681 >>> 6682 >>> # Use the transform in dataset pipeline mode 6683 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6684 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6685 >>> solarize_op = vision.Solarize(threshold=(10, 100)).device("Ascend") 6686 >>> transforms_list = [solarize_op] 6687 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 6688 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6689 ... print(item["image"].shape, item["image"].dtype) 6690 ... break 6691 (100, 100, 3) uint8 6692 >>> 6693 >>> # Use the transform in eager mode 6694 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6695 >>> output = vision.Solarize(threshold=(10, 100)).device("Ascend")(data) 6696 >>> print(output.shape, output.dtype) 6697 (100, 100, 3) uint8 6698 6699 Tutorial Examples: 6700 - `Illustration of vision transforms 6701 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6702 """ 6703 self.device_target = device_target 6704 return self 6705 6706 def parse(self): 6707 return cde.SolarizeOperation(self.threshold, self.device_target) 6708 6709 6710class TenCrop(PyTensorOperation): 6711 """ 6712 Crop the given image into one central crop and four corners with the flipped version of these. 6713 6714 Args: 6715 size (Union[int, Sequence[int, int]]): The size of the cropped image. 6716 If a single integer is provided, a square of size (size, size) will be cropped with this value. 6717 If a sequence of length 2 is provided, an image of size (height, width) will be cropped. 6718 use_vertical_flip (bool, optional): If ``True``, flip the images vertically. Otherwise, flip them 6719 horizontally. Default: ``False``. 6720 6721 Raises: 6722 TypeError: If `size` is not of type integer or sequence of integer. 6723 TypeError: If `use_vertical_flip` is not of type boolean. 6724 ValueError: If `size` is not positive. 6725 6726 Supported Platforms: 6727 ``CPU`` 6728 6729 Examples: 6730 >>> import os 6731 >>> import numpy as np 6732 >>> from PIL import Image, ImageDraw 6733 >>> import mindspore.dataset as ds 6734 >>> import mindspore.dataset.vision as vision 6735 >>> from mindspore.dataset.transforms import Compose 6736 >>> 6737 >>> # Use the transform in dataset pipeline mode 6738 >>> class MyDataset: 6739 ... def __init__(self): 6740 ... self.data = [] 6741 ... img = Image.new("RGB", (300, 300), (255, 255, 255)) 6742 ... draw = ImageDraw.Draw(img) 6743 ... draw.ellipse(((0, 0), (100, 100)), fill=(255, 0, 0), outline=(255, 0, 0), width=5) 6744 ... img.save("./1.jpg") 6745 ... data = np.fromfile("./1.jpg", np.uint8) 6746 ... self.data.append(data) 6747 ... 6748 ... def __getitem__(self, index): 6749 ... return self.data[0] 6750 ... 6751 ... def __len__(self): 6752 ... return 5 6753 >>> 6754 >>> my_dataset = MyDataset() 6755 >>> generator_dataset = ds.GeneratorDataset(my_dataset, column_names="image") 6756 >>> transforms_list = Compose([vision.Decode(to_pil=True), 6757 ... vision.TenCrop(size=200), 6758 ... # 4D stack of 10 images 6759 ... lambda *images: np.stack([vision.ToTensor()(image) for image in images])]) 6760 >>> # apply the transform to dataset through map function 6761 >>> generator_dataset = generator_dataset.map(operations=transforms_list, input_columns="image") 6762 >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6763 ... print(item["image"].shape, item["image"].dtype) 6764 ... break 6765 (10, 3, 200, 200) float32 6766 >>> os.remove("./1.jpg") 6767 >>> 6768 >>> # Use the transform in eager mode 6769 >>> img = Image.new("RGB", (300, 300), (255, 255, 255)) 6770 >>> draw = ImageDraw.Draw(img) 6771 >>> draw.polygon([(50, 50), (150, 50), (100, 150)], fill=(0, 255, 0), outline=(0, 255, 0)) 6772 >>> img.save("./2.jpg") 6773 >>> data = Image.open("./2.jpg") 6774 >>> output = vision.TenCrop(size=200)(data) 6775 >>> print(len(output), np.array(output[0]).shape, np.array(output[0]).dtype) 6776 10 (200, 200, 3) uint8 6777 >>> os.remove("./2.jpg") 6778 6779 Tutorial Examples: 6780 - `Illustration of vision transforms 6781 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6782 """ 6783 6784 @check_ten_crop 6785 def __init__(self, size, use_vertical_flip=False): 6786 super().__init__() 6787 if isinstance(size, int): 6788 size = (size, size) 6789 self.size = size 6790 self.use_vertical_flip = use_vertical_flip 6791 self.random = False 6792 self.implementation = Implementation.PY 6793 6794 def _execute_py(self, img): 6795 """ 6796 Execute method. 6797 6798 Args: 6799 img (PIL Image): Image to be cropped. 6800 6801 Returns: 6802 tuple, a tuple of 10 PIL Image, in order of top_left, top_right, bottom_left, bottom_right, center 6803 of the original image and top_left, top_right, bottom_left, bottom_right, center of the flipped image. 6804 """ 6805 return util.ten_crop(img, self.size, self.use_vertical_flip) 6806 6807 6808class ToNumpy(PyTensorOperation): 6809 """ 6810 Convert the PIL input image to numpy.ndarray image. 6811 6812 Supported Platforms: 6813 ``CPU`` 6814 6815 Examples: 6816 >>> import numpy as np 6817 >>> import mindspore.dataset as ds 6818 >>> import mindspore.dataset.vision as vision 6819 >>> from mindspore.dataset.transforms import Compose 6820 >>> 6821 >>> # Use the transform in dataset pipeline mode 6822 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6823 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6824 >>> # Use ToNumpy to explicitly select C++ implementation of subsequent op 6825 >>> transforms_list = Compose([vision.RandomHorizontalFlip(0.5), 6826 ... vision.ToNumpy(), 6827 ... vision.Resize((50, 60))]) 6828 >>> # apply the transform to dataset through map function 6829 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 6830 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6831 ... print(item["image"].shape, item["image"].dtype) 6832 ... break 6833 (50, 60, 3) uint8 6834 >>> 6835 >>> # Use the transform in eager mode 6836 >>> data = list(np.random.randint(0, 255, size=(32, 32, 3, 3)).astype(np.int32)) 6837 >>> output = vision.ToNumpy()(data) 6838 >>> print(type(output), output.shape, output.dtype) 6839 <class 'numpy.ndarray'> (32, 32, 3, 3) int32 6840 6841 Tutorial Examples: 6842 - `Illustration of vision transforms 6843 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6844 """ 6845 6846 def __init__(self): 6847 super().__init__() 6848 self.random = False 6849 # Use "Implementation.C" to indicate to select C++ implementation for next op in transforms list 6850 self.implementation = Implementation.C 6851 6852 def _execute_py(self, img): 6853 """ 6854 Execute method. 6855 6856 Args: 6857 img (PIL Image): Image to be converted to numpy.ndarray. 6858 6859 Returns: 6860 Image converted to numpy.ndarray 6861 """ 6862 return np.array(img) 6863 6864 6865class ToPIL(PyTensorOperation): 6866 """ 6867 Convert the input decoded numpy.ndarray image to PIL Image. 6868 6869 Raises: 6870 TypeError: If the input image is not of type :class:`numpy.ndarray` or `PIL.Image.Image` . 6871 6872 Supported Platforms: 6873 ``CPU`` 6874 6875 Examples: 6876 >>> import numpy as np 6877 >>> import mindspore.dataset as ds 6878 >>> import mindspore.dataset.vision as vision 6879 >>> from mindspore.dataset.transforms import Compose 6880 >>> 6881 >>> # Use the transform in dataset pipeline mode 6882 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6883 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6884 >>> # data is already decoded, but not in PIL Image format 6885 >>> transforms_list = Compose([vision.ToPIL(), 6886 ... vision.RandomHorizontalFlip(0.5), 6887 ... vision.ToTensor()]) 6888 >>> # apply the transform to dataset through map function 6889 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 6890 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6891 ... print(item["image"].shape, item["image"].dtype) 6892 ... break 6893 (3, 100, 100) float32 6894 >>> 6895 >>> # Use the transform in eager mode 6896 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6897 >>> output = vision.ToPIL()(data) 6898 >>> print(type(output), np.array(output).shape, np.array(output).dtype) 6899 <class 'PIL.Image.Image'> (100, 100, 3) uint8 6900 6901 Tutorial Examples: 6902 - `Illustration of vision transforms 6903 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6904 """ 6905 6906 def __init__(self): 6907 super().__init__() 6908 self.random = False 6909 self.implementation = Implementation.PY 6910 6911 def _execute_py(self, img): 6912 """ 6913 Execute method. 6914 6915 Args: 6916 img (numpy.ndarray): Decoded numpy.ndarray image to be converted to PIL Image. 6917 6918 Returns: 6919 PIL Image, converted PIL Image. 6920 """ 6921 return util.to_pil(img) 6922 6923 6924class ToTensor(ImageTensorOperation): 6925 """ 6926 Convert the input PIL Image or numpy.ndarray to numpy.ndarray of the desired dtype, rescale the pixel value 6927 range from [0, 255] to [0.0, 1.0] and change the shape from <H, W, C> to <C, H, W>. 6928 6929 Args: 6930 output_type (Union[mindspore.dtype, numpy.dtype], optional): The desired dtype of the output image. 6931 Default: ``np.float32`` . 6932 6933 Raises: 6934 TypeError: If the input image is not of type `PIL.Image.Image` or :class:`numpy.ndarray` . 6935 TypeError: If dimension of the input image is not 2 or 3. 6936 6937 Supported Platforms: 6938 ``CPU`` 6939 6940 Examples: 6941 >>> import numpy as np 6942 >>> import mindspore.dataset as ds 6943 >>> import mindspore.dataset.vision as vision 6944 >>> from mindspore.dataset.transforms import Compose 6945 >>> 6946 >>> # Use the transform in dataset pipeline mode 6947 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 6948 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 6949 >>> # create a list of transformations to be applied to the "image" column of each data row 6950 >>> transforms_list = Compose([vision.RandomHorizontalFlip(0.5), 6951 ... vision.ToTensor()]) 6952 >>> # apply the transform to dataset through map function 6953 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 6954 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 6955 ... print(item["image"].shape, item["image"].dtype) 6956 ... break 6957 (3, 100, 100) float32 6958 >>> 6959 >>> # Use the transform in eager mode 6960 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 6961 >>> output = vision.ToTensor()(data) 6962 >>> print(output.shape, output.dtype) 6963 (3, 100, 100) float32 6964 6965 Tutorial Examples: 6966 - `Illustration of vision transforms 6967 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 6968 """ 6969 6970 @check_to_tensor 6971 def __init__(self, output_type=np.float32): 6972 super().__init__() 6973 if isinstance(output_type, typing.Type): 6974 output_type = mstype_to_detype(output_type) 6975 else: 6976 output_type = nptype_to_detype(output_type) 6977 self.output_type = str(output_type) 6978 self.random = False 6979 self.implementation = Implementation.C 6980 6981 def parse(self): 6982 return cde.ToTensorOperation(self.output_type) 6983 6984 6985class ToType(TypeCast): 6986 """ 6987 Cast the input to a given MindSpore data type or NumPy data type. 6988 6989 It is the same as that of :class:`mindspore.dataset.transforms.TypeCast` . 6990 6991 Note: 6992 This operation is executed on the CPU by default, but it is also supported 6993 to be executed on the GPU or Ascend via heterogeneous acceleration. 6994 6995 Args: 6996 data_type (Union[mindspore.dtype, numpy.dtype]): The desired data type of the output image, 6997 such as ``numpy.float32`` . 6998 6999 Raises: 7000 TypeError: If `data_type` is not of type :class:`mindspore.dtype` or :class:`numpy.dtype` . 7001 7002 Supported Platforms: 7003 ``CPU`` ``GPU`` ``Ascend`` 7004 7005 Examples: 7006 >>> import numpy as np 7007 >>> import mindspore.dataset as ds 7008 >>> import mindspore.dataset.vision as vision 7009 >>> import numpy as np 7010 >>> from mindspore.dataset.transforms import Compose 7011 >>> 7012 >>> # Use the transform in dataset pipeline mode 7013 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 7014 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 7015 >>> transforms_list = Compose([vision.RandomHorizontalFlip(0.5), 7016 ... vision.ToTensor(), 7017 ... vision.ToType(np.float32)]) 7018 >>> # apply the transform to dataset through map function 7019 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 7020 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 7021 ... print(item["image"].shape, item["image"].dtype) 7022 ... break 7023 (3, 100, 100) float32 7024 >>> 7025 >>> # Use the transform in eager mode 7026 >>> data = np.array([2.71606445312564e-03, 6.3476562564e-03]).astype(np.float64) 7027 >>> output = vision.ToType(np.float32)(data) 7028 >>> print(output, output.dtype) 7029 [0.00271606 0.00634766] float32 7030 7031 Tutorial Examples: 7032 - `Illustration of vision transforms 7033 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 7034 """ 7035 7036 7037class TrivialAugmentWide(ImageTensorOperation): 7038 """ 7039 Apply TrivialAugmentWide data augmentation method on the input image. 7040 7041 Refer to 7042 `TrivialAugmentWide: Tuning-free Yet State-of-the-Art Data Augmentation <https://arxiv.org/abs/2103.10158>`_ . 7043 7044 Only support 3-channel RGB image. 7045 7046 Args: 7047 num_magnitude_bins (int, optional): The number of different magnitude values, 7048 must be greater than or equal to 2. Default: ``31``. 7049 interpolation (Inter, optional): Image interpolation method defined by :class:`~.vision.Inter` . 7050 Default: ``Inter.NEAREST``. 7051 fill_value (Union[int, tuple[int, int, int]], optional): Pixel fill value for the area outside the 7052 transformed image, must be in range of [0, 255]. Default: ``0``. 7053 If int is provided, pad all RGB channels with this value. 7054 If tuple[int, int, int] is provided, pad R, G, B channels respectively. 7055 7056 Raises: 7057 TypeError: If `num_magnitude_bins` is not of type int. 7058 ValueError: If `num_magnitude_bins` is less than 2. 7059 TypeError: If `interpolation` not of type :class:`~.vision.Inter` . 7060 TypeError: If `fill_value` is not of type int or tuple[int, int, int]. 7061 ValueError: If `fill_value` is not in range of [0, 255]. 7062 RuntimeError: If shape of the input image is not <H, W, C>. 7063 7064 Supported Platforms: 7065 ``CPU`` 7066 7067 Examples: 7068 >>> import numpy as np 7069 >>> import mindspore.dataset as ds 7070 >>> import mindspore.dataset.vision as vision 7071 >>> from mindspore.dataset.vision import Inter 7072 >>> 7073 >>> # Use the transform in dataset pipeline mode 7074 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 7075 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 7076 >>> transforms_list = [vision.TrivialAugmentWide(num_magnitude_bins=31, 7077 ... interpolation=Inter.NEAREST, 7078 ... fill_value=0)] 7079 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 7080 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 7081 ... print(item["image"].shape, item["image"].dtype) 7082 ... break 7083 (100, 100, 3) uint8 7084 >>> 7085 >>> # Use the transform in eager mode 7086 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 7087 >>> output = vision.TrivialAugmentWide()(data) 7088 >>> print(output.shape, output.dtype) 7089 (100, 100, 3) uint8 7090 7091 Tutorial Examples: 7092 - `Illustration of vision transforms 7093 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 7094 """ 7095 7096 @check_trivial_augment_wide 7097 def __init__(self, num_magnitude_bins=31, interpolation=Inter.NEAREST, fill_value=0): 7098 super().__init__() 7099 self.num_magnitude_bins = num_magnitude_bins 7100 self.interpolation = interpolation 7101 if isinstance(fill_value, int): 7102 fill_value = tuple([fill_value] * 3) 7103 self.fill_value = fill_value 7104 self.implementation = Implementation.C 7105 7106 def parse(self): 7107 return cde.TrivialAugmentWideOperation(self.num_magnitude_bins, Inter.to_c_type(self.interpolation), 7108 self.fill_value) 7109 7110 7111class UniformAugment(CompoundOperation): 7112 """ 7113 Uniformly select a number of transformations from a sequence and apply them 7114 sequentially and randomly, which means that there is a chance that a chosen 7115 transformation will not be applied. 7116 7117 All transformations in the sequence require the output type to be the same as 7118 the input. Thus, the latter one can deal with the output of the previous one. 7119 7120 Args: 7121 transforms (Sequence): Sequence of transformations to select from. 7122 num_ops (int, optional): Number of transformations to be sequentially and randomly applied. 7123 Default: ``2``. 7124 7125 Raises: 7126 TypeError: If `transforms` is not a sequence of data processing operations. 7127 TypeError: If `num_ops` is not of type integer. 7128 ValueError: If `num_ops` is not positive. 7129 7130 Supported Platforms: 7131 ``CPU`` 7132 7133 Examples: 7134 >>> import numpy as np 7135 >>> import mindspore.dataset as ds 7136 >>> import mindspore.dataset.vision as vision 7137 >>> from mindspore.dataset.transforms import Compose 7138 >>> 7139 >>> # Use the transform in dataset pipeline mode 7140 >>> seed = ds.config.get_seed() 7141 >>> ds.config.set_seed(12345) 7142 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 7143 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 7144 >>> transform = [vision.CenterCrop(64), 7145 ... vision.RandomColor(), 7146 ... vision.RandomSharpness(), 7147 ... vision.RandomRotation(30)] 7148 >>> transforms_list = Compose([vision.UniformAugment(transform), 7149 ... vision.ToTensor()]) 7150 >>> # apply the transform to dataset through map function 7151 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns="image") 7152 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 7153 ... print(item["image"].shape, item["image"].dtype) 7154 ... break 7155 (3, 100, 100) float32 7156 >>> 7157 >>> # Use the transform in eager mode 7158 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 7159 >>> transform = [vision.RandomCrop(size=[20, 40], padding=[32, 32, 32, 32]), 7160 ... vision.RandomCrop(size=[20, 40], padding=[32, 32, 32, 32])] 7161 >>> output = vision.UniformAugment(transform)(data) 7162 >>> print(output.shape, output.dtype) 7163 (20, 40, 3) uint8 7164 >>> ds.config.set_seed(seed) 7165 7166 Tutorial Examples: 7167 - `Illustration of vision transforms 7168 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 7169 """ 7170 7171 @check_uniform_augment 7172 def __init__(self, transforms, num_ops=2): 7173 super().__init__(transforms) 7174 self.num_ops = num_ops 7175 self.random = True 7176 7177 def parse(self): 7178 operations = self.parse_transforms() 7179 return cde.UniformAugOperation(operations, self.num_ops) 7180 7181 def _execute_py(self, img): 7182 """ 7183 Execute method. 7184 7185 Args: 7186 img (PIL Image): Image to be transformed. 7187 7188 Returns: 7189 PIL Image, transformed image. 7190 """ 7191 return util.uniform_augment(img, self.transforms.copy(), self.num_ops) 7192 7193 7194class VerticalFlip(ImageTensorOperation): 7195 """ 7196 Flip the input image vertically. 7197 7198 Supports Ascend hardware acceleration and can be enabled through the `.device("Ascend")` method. 7199 7200 Raises: 7201 RuntimeError: If given tensor shape is not <H, W> or <..., H, W, C>. 7202 7203 Supported Platforms: 7204 ``CPU`` ``Ascend`` 7205 7206 Examples: 7207 >>> import numpy as np 7208 >>> import mindspore.dataset as ds 7209 >>> import mindspore.dataset.vision as vision 7210 >>> 7211 >>> # Use the transform in dataset pipeline mode 7212 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 7213 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 7214 >>> transforms_list = [vision.VerticalFlip()] 7215 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 7216 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 7217 ... print(item["image"].shape, item["image"].dtype) 7218 ... break 7219 (100, 100, 3) uint8 7220 >>> 7221 >>> # Use the transform in eager mode 7222 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 7223 >>> output = vision.VerticalFlip()(data) 7224 >>> print(output.shape, output.dtype) 7225 (100, 100, 3) uint8 7226 7227 Tutorial Examples: 7228 - `Illustration of vision transforms 7229 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 7230 """ 7231 7232 def __init__(self): 7233 super().__init__() 7234 self.implementation = Implementation.C 7235 7236 @check_device_target 7237 def device(self, device_target="CPU"): 7238 """ 7239 Set the device for the current operator execution. 7240 7241 - When the device is Ascend, input type supports `uint8` and `float32`, 7242 input channel supports 1 and 3. The input data has a height limit of [4, 8192] 7243 and a width limit of [6, 4096]. 7244 7245 Args: 7246 device_target (str, optional): The operator will be executed on this device. Currently supports 7247 ``CPU`` and ``Ascend`` . Default: ``CPU`` . 7248 7249 Raises: 7250 TypeError: If `device_target` is not of type str. 7251 ValueError: If `device_target` is not within the valid set of ['CPU', 'Ascend']. 7252 7253 Supported Platforms: 7254 ``CPU`` ``Ascend`` 7255 7256 Examples: 7257 >>> import numpy as np 7258 >>> import mindspore.dataset as ds 7259 >>> import mindspore.dataset.vision as vision 7260 >>> 7261 >>> # Use the transform in dataset pipeline mode 7262 >>> data = np.random.randint(0, 255, size=(1, 100, 100, 3)).astype(np.uint8) 7263 >>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["image"]) 7264 >>> vertical_flip_op = vision.VerticalFlip().device("Ascend") 7265 >>> transforms_list = [vertical_flip_op] 7266 >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms_list, input_columns=["image"]) 7267 >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): 7268 ... print(item["image"].shape, item["image"].dtype) 7269 ... break 7270 (100, 100, 3) uint8 7271 >>> 7272 >>> # Use the transform in eager mode 7273 >>> data = np.random.randint(0, 255, size=(100, 100, 3)).astype(np.uint8) 7274 >>> output = vision.VerticalFlip().device("Ascend")(data) 7275 >>> print(output.shape, output.dtype) 7276 (100, 100, 3) uint8 7277 7278 Tutorial Examples: 7279 - `Illustration of vision transforms 7280 <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/vision_gallery.html>`_ 7281 """ 7282 self.device_target = device_target 7283 return self 7284 7285 def parse(self): 7286 return cde.VerticalFlipOperation(self.device_target) 7287 7288 7289def not_random(func): 7290 """ 7291 Specify the function as "not random", i.e., it produces deterministic result. 7292 A Python function can only be cached after it is specified as "not random". 7293 """ 7294 func.random = False 7295 return func 7296