1# Copyright 2019-2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15""" 16The module vision.c_transforms is inherited from _c_dataengine 17and is implemented based on OpenCV in C++. It's a high performance module to 18process images. Users can apply suitable augmentations on image data 19to improve their training models. 20 21.. Note:: 22 A constructor's arguments for every class in this module must be saved into the 23 class attributes (self.xxx) to support save() and load(). 24 25Examples: 26 >>> from mindspore.dataset.vision import Border, Inter 27 >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory" 28 >>> # create a dataset that reads all files in dataset_dir with 8 threads 29 >>> image_folder_dataset = ds.ImageFolderDataset(image_folder_dataset_dir, 30 ... num_parallel_workers=8) 31 >>> # create a list of transformations to be applied to the image data 32 >>> transforms_list = [c_vision.Decode(), 33 ... c_vision.Resize((256, 256), interpolation=Inter.LINEAR), 34 ... c_vision.RandomCrop(200, padding_mode=Border.EDGE), 35 ... c_vision.RandomRotation((0, 15)), 36 ... c_vision.Normalize((100, 115.0, 121.0), (71.0, 68.0, 70.0)), 37 ... c_vision.HWC2CHW()] 38 >>> onehot_op = c_transforms.OneHot(num_classes=10) 39 >>> # apply the transformation to the dataset through data1.map() 40 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 41 ... input_columns="image") 42 >>> image_folder_dataset = image_folder_dataset.map(operations=onehot_op, 43 ... input_columns="label") 44""" 45import numbers 46import numpy as np 47from PIL import Image 48import mindspore._c_dataengine as cde 49 50from .utils import Inter, Border, ImageBatchFormat, ConvertMode, SliceMode 51from .validators import check_prob, check_crop, check_center_crop, check_resize_interpolation, \ 52 check_mix_up_batch_c, check_normalize_c, check_normalizepad_c, check_random_crop, check_random_color_adjust, \ 53 check_random_rotation, check_range, check_resize, check_rescale, check_pad, check_cutout, \ 54 check_uniform_augment_cpp, check_convert_color, check_random_resize_crop, \ 55 check_bounding_box_augment_cpp, check_random_select_subpolicy_op, check_auto_contrast, check_random_affine, \ 56 check_random_solarize, check_soft_dvpp_decode_random_crop_resize_jpeg, check_positive_degrees, FLOAT_MAX_INTEGER, \ 57 check_cut_mix_batch_c, check_posterize, check_gaussian_blur, check_rotate, check_slice_patches, check_adjust_gamma 58from ..transforms.c_transforms import TensorOperation 59 60 61class ImageTensorOperation(TensorOperation): 62 """ 63 Base class of Image Tensor Ops 64 """ 65 66 def __call__(self, *input_tensor_list): 67 for tensor in input_tensor_list: 68 if not isinstance(tensor, (np.ndarray, Image.Image)): 69 raise TypeError( 70 "Input should be NumPy or PIL image, got {}.".format(type(tensor))) 71 return super().__call__(*input_tensor_list) 72 73 def parse(self): 74 raise NotImplementedError( 75 "ImageTensorOperation has to implement parse() method.") 76 77 78DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT, 79 Border.EDGE: cde.BorderType.DE_BORDER_EDGE, 80 Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT, 81 Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC} 82 83DE_C_IMAGE_BATCH_FORMAT = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC, 84 ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW} 85 86DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, 87 Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, 88 Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC, 89 Inter.AREA: cde.InterpolationMode.DE_INTER_AREA, 90 Inter.PILCUBIC: cde.InterpolationMode.DE_INTER_PILCUBIC} 91 92DE_C_SLICE_MODE = {SliceMode.PAD: cde.SliceMode.DE_SLICE_PAD, 93 SliceMode.DROP: cde.SliceMode.DE_SLICE_DROP} 94 95DE_C_CONVERTCOLOR_MODE = {ConvertMode.COLOR_BGR2BGRA: cde.ConvertMode.DE_COLOR_BGR2BGRA, 96 ConvertMode.COLOR_RGB2RGBA: cde.ConvertMode.DE_COLOR_RGB2RGBA, 97 ConvertMode.COLOR_BGRA2BGR: cde.ConvertMode.DE_COLOR_BGRA2BGR, 98 ConvertMode.COLOR_RGBA2RGB: cde.ConvertMode.DE_COLOR_RGBA2RGB, 99 ConvertMode.COLOR_BGR2RGBA: cde.ConvertMode.DE_COLOR_BGR2RGBA, 100 ConvertMode.COLOR_RGB2BGRA: cde.ConvertMode.DE_COLOR_RGB2BGRA, 101 ConvertMode.COLOR_RGBA2BGR: cde.ConvertMode.DE_COLOR_RGBA2BGR, 102 ConvertMode.COLOR_BGRA2RGB: cde.ConvertMode.DE_COLOR_BGRA2RGB, 103 ConvertMode.COLOR_BGR2RGB: cde.ConvertMode.DE_COLOR_BGR2RGB, 104 ConvertMode.COLOR_RGB2BGR: cde.ConvertMode.DE_COLOR_RGB2BGR, 105 ConvertMode.COLOR_BGRA2RGBA: cde.ConvertMode.DE_COLOR_BGRA2RGBA, 106 ConvertMode.COLOR_RGBA2BGRA: cde.ConvertMode.DE_COLOR_RGBA2BGRA, 107 ConvertMode.COLOR_BGR2GRAY: cde.ConvertMode.DE_COLOR_BGR2GRAY, 108 ConvertMode.COLOR_RGB2GRAY: cde.ConvertMode.DE_COLOR_RGB2GRAY, 109 ConvertMode.COLOR_GRAY2BGR: cde.ConvertMode.DE_COLOR_GRAY2BGR, 110 ConvertMode.COLOR_GRAY2RGB: cde.ConvertMode.DE_COLOR_GRAY2RGB, 111 ConvertMode.COLOR_GRAY2BGRA: cde.ConvertMode.DE_COLOR_GRAY2BGRA, 112 ConvertMode.COLOR_GRAY2RGBA: cde.ConvertMode.DE_COLOR_GRAY2RGBA, 113 ConvertMode.COLOR_BGRA2GRAY: cde.ConvertMode.DE_COLOR_BGRA2GRAY, 114 ConvertMode.COLOR_RGBA2GRAY: cde.ConvertMode.DE_COLOR_RGBA2GRAY, 115 } 116 117 118def parse_padding(padding): 119 """ Parses and prepares the padding tuple""" 120 121 if isinstance(padding, numbers.Number): 122 padding = [padding] * 4 123 if len(padding) == 2: 124 left = top = padding[0] 125 right = bottom = padding[1] 126 padding = (left, top, right, bottom,) 127 if isinstance(padding, list): 128 padding = tuple(padding) 129 return padding 130 131 132class AdjustGamma(ImageTensorOperation): 133 r""" 134 Apply gamma correction on input image. Input image is expected to be in [..., H, W, C] or [H, W] format. 135 .. math:: 136 I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma} 137 138 See `Gamma Correction`_ for more details. 139 140 .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction 141 142 Args: 143 gamma (float): Non negative real number. 144 The output image pixel value is exponentially related to the input image pixel value. 145 gamma larger than 1 make the shadows darker, 146 while gamma smaller than 1 make dark regions lighter. 147 gain (float, optional): The constant multiplier (default=1). 148 149 Examples: 150 >>> transforms_list = [c_vision.Decode(), c_vision.AdjustGamma(gamma=10.0, gain=1.0)] 151 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 152 ... input_columns=["image"]) 153 """ 154 @check_adjust_gamma 155 def __init__(self, gamma, gain=1): 156 self.gamma = gamma 157 self.gain = gain 158 159 def parse(self): 160 return cde.AdjustGammaOperation(self.gamma, self.gain) 161 162 163class AutoContrast(ImageTensorOperation): 164 """ 165 Apply automatic contrast on input image. This operator calculates histogram of image, reassign cutoff percent 166 of lightest pixels from histogram to 255, and reassign cutoff percent of darkest pixels from histogram to 0. 167 168 Args: 169 cutoff (float, optional): Percent of lightest and darkest pixels to cut off from 170 the histogram of input image. the value must be in the range [0.0, 50.0) (default=0.0). 171 ignore (Union[int, sequence], optional): The background pixel values to ignore (default=None). 172 173 Examples: 174 >>> transforms_list = [c_vision.Decode(), c_vision.AutoContrast(cutoff=10.0, ignore=[10, 20])] 175 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 176 ... input_columns=["image"]) 177 """ 178 179 @check_auto_contrast 180 def __init__(self, cutoff=0.0, ignore=None): 181 if ignore is None: 182 ignore = [] 183 if isinstance(ignore, int): 184 ignore = [ignore] 185 self.cutoff = cutoff 186 self.ignore = ignore 187 188 def parse(self): 189 return cde.AutoContrastOperation(self.cutoff, self.ignore) 190 191 192class BoundingBoxAugment(ImageTensorOperation): 193 """ 194 Apply a given image transform on a random selection of bounding box regions of a given image. 195 196 Args: 197 transform: C++ transformation operator to be applied on random selection 198 of bounding box regions of a given image. 199 ratio (float, optional): Ratio of bounding boxes to apply augmentation on. 200 Range: [0, 1] (default=0.3). 201 202 Examples: 203 >>> # set bounding box operation with ratio of 1 to apply rotation on all bounding boxes 204 >>> bbox_aug_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1) 205 >>> # map to apply ops 206 >>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op], 207 ... input_columns=["image", "bbox"], 208 ... output_columns=["image", "bbox"], 209 ... column_order=["image", "bbox"]) 210 """ 211 212 @check_bounding_box_augment_cpp 213 def __init__(self, transform, ratio=0.3): 214 self.ratio = ratio 215 self.transform = transform 216 217 def parse(self): 218 if self.transform and getattr(self.transform, 'parse', None): 219 transform = self.transform.parse() 220 else: 221 transform = self.transform 222 return cde.BoundingBoxAugmentOperation(transform, self.ratio) 223 224 225class CenterCrop(ImageTensorOperation): 226 """ 227 Crop the input image at the center to the given size. If input image size is smaller than output size, 228 input image will be padded with 0 before cropping. 229 230 Args: 231 size (Union[int, sequence]): The output size of the cropped image. 232 If size is an integer, a square crop of size (size, size) is returned. 233 If size is a sequence of length 2, it should be (height, width). 234 235 Examples: 236 >>> # crop image to a square 237 >>> transforms_list1 = [c_vision.Decode(), c_vision.CenterCrop(50)] 238 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, 239 ... input_columns=["image"]) 240 >>> # crop image to portrait style 241 >>> transforms_list2 = [c_vision.Decode(), c_vision.CenterCrop((60, 40))] 242 >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, 243 ... input_columns=["image"]) 244 """ 245 246 @check_center_crop 247 def __init__(self, size): 248 if isinstance(size, int): 249 size = (size, size) 250 self.size = size 251 252 def parse(self): 253 return cde.CenterCropOperation(self.size) 254 255 256class ConvertColor(ImageTensorOperation): 257 """ 258 Change the color space of the image. 259 260 Args: 261 convert_mode (ConvertMode): The mode of image channel conversion. 262 263 - ConvertMode.COLOR_BGR2BGRA, Add alpha channel to BGR image. 264 265 - ConvertMode.COLOR_RGB2RGBA, Add alpha channel to RGB image. 266 267 - ConvertMode.COLOR_BGRA2BGR, Remove alpha channel to BGR image. 268 269 - ConvertMode.COLOR_RGBA2RGB, Remove alpha channel to RGB image. 270 271 - ConvertMode.COLOR_BGR2RGBA, Convert BGR image to RGBA image. 272 273 - ConvertMode.COLOR_RGB2BGRA, Convert RGB image to BGRA image. 274 275 - ConvertMode.COLOR_RGBA2BGR, Convert RGBA image to BGR image. 276 277 - ConvertMode.COLOR_BGRA2RGB, Convert BGRA image to RGB image. 278 279 - ConvertMode.COLOR_BGR2RGB, Convert BGR image to RGB image. 280 281 - ConvertMode.COLOR_RGB2BGR, Convert RGB image to BGR image. 282 283 - ConvertMode.COLOR_BGRA2RGBA, Convert BGRA image to RGBA image. 284 285 - ConvertMode.COLOR_RGBA2BGRA, Convert RGBA image to BGRA image. 286 287 - ConvertMode.COLOR_BGR2GRAY, Convert BGR image to GRAY image. 288 289 - ConvertMode.COLOR_RGB2GRAY, Convert RGB image to GRAY image. 290 291 - ConvertMode.COLOR_GRAY2BGR, Convert GRAY image to BGR image. 292 293 - ConvertMode.COLOR_GRAY2RGB, Convert GRAY image to RGB image. 294 295 - ConvertMode.COLOR_GRAY2BGRA, Convert GRAY image to BGRA image. 296 297 - ConvertMode.COLOR_GRAY2RGBA, Convert GRAY image to RGBA image. 298 299 - ConvertMode.COLOR_BGRA2GRAY, Convert BGRA image to GRAY image. 300 301 - ConvertMode.COLOR_RGBA2GRAY, Convert RGBA image to GRAY image. 302 303 Examples: 304 >>> import mindspore.dataset.vision.utils as mode 305 >>> # Convert RGB images to GRAY images 306 >>> convert_op = c_vision.ConvertColor(mode.ConvertMode.COLOR_RGB2GRAY) 307 >>> image_folder_dataset = image_folder_dataset.map(operations=convert_op, 308 ... input_columns=["image"]) 309 >>> # Convert RGB images to BGR images 310 >>> convert_op = c_vision.ConvertColor(mode.ConvertMode.COLOR_RGB2BGR) 311 >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=convert_op, 312 ... input_columns=["image"]) 313 """ 314 @check_convert_color 315 def __init__(self, convert_mode): 316 self.convert_mode = convert_mode 317 318 def parse(self): 319 return cde.ConvertColorOperation(DE_C_CONVERTCOLOR_MODE[self.convert_mode]) 320 321 322class Crop(ImageTensorOperation): 323 """ 324 Crop the input image at a specific location. 325 326 Args: 327 coordinates(sequence): Coordinates of the upper left corner of the cropping image. Must be a sequence of two 328 values, in the form of (top, left). 329 size (Union[int, sequence]): The output size of the cropped image. 330 If size is an integer, a square crop of size (size, size) is returned. 331 If size is a sequence of length 2, it should be (height, width). 332 333 Examples: 334 >>> decode_op = c_vision.Decode() 335 >>> crop_op = c_vision.Crop((0, 0), 32) 336 >>> transforms_list = [decode_op, crop_op] 337 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 338 ... input_columns=["image"]) 339 """ 340 341 @check_crop 342 def __init__(self, coordinates, size): 343 if isinstance(size, int): 344 size = (size, size) 345 self.coordinates = coordinates 346 self.size = size 347 348 def parse(self): 349 return cde.CropOperation(self.coordinates, self.size) 350 351 352class CutMixBatch(ImageTensorOperation): 353 """ 354 Apply CutMix transformation on input batch of images and labels. 355 Note that you need to make labels into one-hot format and batched before calling this operator. 356 357 Args: 358 image_batch_format (Image Batch Format): The method of padding. Can be any of 359 [ImageBatchFormat.NHWC, ImageBatchFormat.NCHW]. 360 alpha (float, optional): hyperparameter of beta distribution (default = 1.0). 361 prob (float, optional): The probability by which CutMix is applied to each image (default = 1.0). 362 363 Examples: 364 >>> from mindspore.dataset.vision import ImageBatchFormat 365 >>> onehot_op = c_transforms.OneHot(num_classes=10) 366 >>> image_folder_dataset= image_folder_dataset.map(operations=onehot_op, 367 ... input_columns=["label"]) 368 >>> cutmix_batch_op = c_vision.CutMixBatch(ImageBatchFormat.NHWC, 1.0, 0.5) 369 >>> image_folder_dataset = image_folder_dataset.batch(5) 370 >>> image_folder_dataset = image_folder_dataset.map(operations=cutmix_batch_op, 371 ... input_columns=["image", "label"]) 372 """ 373 374 @check_cut_mix_batch_c 375 def __init__(self, image_batch_format, alpha=1.0, prob=1.0): 376 self.image_batch_format = image_batch_format.value 377 self.alpha = alpha 378 self.prob = prob 379 380 def parse(self): 381 return cde.CutMixBatchOperation(DE_C_IMAGE_BATCH_FORMAT[self.image_batch_format], self.alpha, self.prob) 382 383 384class CutOut(ImageTensorOperation): 385 """ 386 Randomly cut (mask) out a given number of square patches from the input image array. 387 388 Args: 389 length (int): The side length of each square patch. 390 num_patches (int, optional): Number of patches to be cut out of an image (default=1). 391 392 Examples: 393 >>> transforms_list = [c_vision.Decode(), c_vision.CutOut(80, num_patches=10)] 394 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 395 ... input_columns=["image"]) 396 """ 397 398 @check_cutout 399 def __init__(self, length, num_patches=1): 400 self.length = length 401 self.num_patches = num_patches 402 403 def parse(self): 404 return cde.CutOutOperation(self.length, self.num_patches) 405 406 407class Decode(ImageTensorOperation): 408 """ 409 Decode the input image in RGB mode(default) or BGR mode(deprecated). 410 411 Args: 412 rgb (bool, optional): Mode of decoding input image (default=True). 413 If True means format of decoded image is RGB else BGR(deprecated). 414 415 Examples: 416 >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlip()] 417 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 418 ... input_columns=["image"]) 419 """ 420 421 def __init__(self, rgb=True): 422 self.rgb = rgb 423 424 def __call__(self, img): 425 """ 426 Call method. 427 428 Args: 429 img (NumPy): Image to be decoded. 430 431 Returns: 432 img (NumPy), Decoded image. 433 """ 434 if not isinstance(img, np.ndarray) or img.ndim != 1 or img.dtype.type is np.str_: 435 raise TypeError( 436 "Input should be an encoded image in 1-D NumPy format, got {}.".format(type(img))) 437 return super().__call__(img) 438 439 def parse(self): 440 return cde.DecodeOperation(self.rgb) 441 442 443class Equalize(ImageTensorOperation): 444 """ 445 Apply histogram equalization on input image. 446 447 Examples: 448 >>> transforms_list = [c_vision.Decode(), c_vision.Equalize()] 449 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 450 ... input_columns=["image"]) 451 """ 452 453 def parse(self): 454 return cde.EqualizeOperation() 455 456 457class GaussianBlur(ImageTensorOperation): 458 """ 459 Blur input image with the specified Gaussian kernel. 460 461 Args: 462 kernel_size (Union[int, sequence]): Size of the Gaussian kernel to use. The value must be positive and odd. If 463 only an integer is provided, the kernel size will be (size, size). If a sequence of integer is provided, it 464 must be a sequence of 2 values which represents (width, height). 465 sigma (Union[float, sequence], optional): Standard deviation of the Gaussian kernel to use (default=None). The 466 value must be positive. If only a float is provided, the sigma will be (sigma, sigma). If a sequence of 467 float is provided, it must be a sequence of 2 values which represents the sigma of width and height. If None 468 is provided, the sigma will be calculated as ((kernel_size - 1) * 0.5 - 1) * 0.3 + 0.8. 469 470 471 Examples: 472 >>> transforms_list = [c_vision.Decode(), c_vision.GaussianBlur(3, 3)] 473 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 474 ... input_columns=["image"]) 475 """ 476 477 @check_gaussian_blur 478 def __init__(self, kernel_size, sigma=None): 479 if isinstance(kernel_size, int): 480 kernel_size = (kernel_size,) 481 if sigma is None: 482 sigma = (0,) 483 elif isinstance(sigma, (int, float)): 484 sigma = (float(sigma),) 485 self.kernel_size = kernel_size 486 self.sigma = sigma 487 488 def parse(self): 489 return cde.GaussianBlurOperation(self.kernel_size, self.sigma) 490 491 492class HorizontalFlip(ImageTensorOperation): 493 """ 494 Flip the input image horizontally. 495 496 Examples: 497 >>> transforms_list = [c_vision.Decode(), c_vision.HorizontalFlip()] 498 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 499 ... input_columns=["image"]) 500 """ 501 502 def parse(self): 503 return cde.HorizontalFlipOperation() 504 505 506class HWC2CHW(ImageTensorOperation): 507 """ 508 Transpose the input image from shape (H, W, C) to shape (C, H, W). The input image should be 3 channels image. 509 510 Examples: 511 >>> transforms_list = [c_vision.Decode(), 512 ... c_vision.RandomHorizontalFlip(0.75), 513 ... c_vision.RandomCrop(512), 514 ... c_vision.HWC2CHW()] 515 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 516 ... input_columns=["image"]) 517 """ 518 519 def parse(self): 520 return cde.HwcToChwOperation() 521 522 523class Invert(ImageTensorOperation): 524 """ 525 Apply invert on input image in RGB mode. This operator will reassign every pixel to (255 - pixel). 526 527 Examples: 528 >>> transforms_list = [c_vision.Decode(), c_vision.Invert()] 529 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 530 ... input_columns=["image"]) 531 """ 532 533 def parse(self): 534 return cde.InvertOperation() 535 536 537class MixUpBatch(ImageTensorOperation): 538 """ 539 Apply MixUp transformation on input batch of images and labels. Each image is 540 multiplied by a random weight (lambda) and then added to a randomly selected image from the batch 541 multiplied by (1 - lambda). The same formula is also applied to the one-hot labels. 542 The lambda is generated based on the specified alpha value. Two coefficients x1, x2 are randomly generated 543 in the range [alpha, 1], and lambda = (x1 / (x1 + x2)). 544 Note that you need to make labels into one-hot format and batched before calling this operator. 545 546 Args: 547 alpha (float, optional): Hyperparameter of beta distribution (default = 1.0). 548 549 Examples: 550 >>> onehot_op = c_transforms.OneHot(num_classes=10) 551 >>> image_folder_dataset= image_folder_dataset.map(operations=onehot_op, 552 ... input_columns=["label"]) 553 >>> mixup_batch_op = c_vision.MixUpBatch(alpha=0.9) 554 >>> image_folder_dataset = image_folder_dataset.batch(5) 555 >>> image_folder_dataset = image_folder_dataset.map(operations=mixup_batch_op, 556 ... input_columns=["image", "label"]) 557 """ 558 559 @check_mix_up_batch_c 560 def __init__(self, alpha=1.0): 561 self.alpha = alpha 562 563 def parse(self): 564 return cde.MixUpBatchOperation(self.alpha) 565 566 567class Normalize(ImageTensorOperation): 568 """ 569 Normalize the input image with respect to mean and standard deviation. This operator will normalize 570 the input image with: output[channel] = (input[channel] - mean[channel]) / std[channel], where channel >= 1. 571 572 Args: 573 mean (sequence): List or tuple of mean values for each channel, with respect to channel order. 574 The mean values must be in range [0.0, 255.0]. 575 std (sequence): List or tuple of standard deviations for each channel, with respect to channel order. 576 The standard deviation values must be in range (0.0, 255.0]. 577 578 Examples: 579 >>> decode_op = c_vision.Decode() 580 >>> normalize_op = c_vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0]) 581 >>> transforms_list = [decode_op, normalize_op] 582 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 583 ... input_columns=["image"]) 584 """ 585 586 @check_normalize_c 587 def __init__(self, mean, std): 588 self.mean = mean 589 self.std = std 590 591 def parse(self): 592 return cde.NormalizeOperation(self.mean, self.std) 593 594 595class NormalizePad(ImageTensorOperation): 596 """ 597 Normalize the input image with respect to mean and standard deviation then pad an extra channel with value zero. 598 599 Args: 600 mean (sequence): List or tuple of mean values for each channel, with respect to channel order. 601 The mean values must be in range (0.0, 255.0]. 602 std (sequence): List or tuple of standard deviations for each channel, with respect to channel order. 603 The standard deviation values must be in range (0.0, 255.0]. 604 dtype (str): Set the output data type of normalized image (default is "float32"). 605 606 Examples: 607 >>> decode_op = c_vision.Decode() 608 >>> normalize_pad_op = c_vision.NormalizePad(mean=[121.0, 115.0, 100.0], 609 ... std=[70.0, 68.0, 71.0], 610 ... dtype="float32") 611 >>> transforms_list = [decode_op, normalize_pad_op] 612 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 613 ... input_columns=["image"]) 614 """ 615 616 @check_normalizepad_c 617 def __init__(self, mean, std, dtype="float32"): 618 self.mean = mean 619 self.std = std 620 self.dtype = dtype 621 622 def parse(self): 623 return cde.NormalizePadOperation(self.mean, self.std, self.dtype) 624 625 626class Pad(ImageTensorOperation): 627 """ 628 Pad the image according to padding parameters. 629 630 Args: 631 padding (Union[int, sequence]): The number of pixels to pad the image. 632 If a single number is provided, it pads all borders with this value. 633 If a tuple or lists of 2 values are provided, it pads the (left and top) 634 with the first value and (right and bottom) with the second value. 635 If 4 values are provided as a list or tuple, 636 it pads the left, top, right and bottom respectively. 637 fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for 638 padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively. 639 If it is an integer, it is used for all RGB channels. 640 The fill_value values must be in range [0, 255] (default=0). 641 padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of 642 [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. 643 644 - Border.CONSTANT, means it fills the border with constant values. 645 646 - Border.EDGE, means it pads with the last value on the edge. 647 648 - Border.REFLECT, means it reflects the values on the edge omitting the last 649 value of edge. 650 651 - Border.SYMMETRIC, means it reflects the values on the edge repeating the last 652 value of edge. 653 654 Examples: 655 >>> transforms_list = [c_vision.Decode(), c_vision.Pad([100, 100, 100, 100])] 656 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 657 ... input_columns=["image"]) 658 """ 659 660 @check_pad 661 def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT): 662 padding = parse_padding(padding) 663 if isinstance(fill_value, int): 664 fill_value = tuple([fill_value] * 3) 665 self.padding = padding 666 self.fill_value = fill_value 667 self.padding_mode = padding_mode 668 669 def parse(self): 670 return cde.PadOperation(self.padding, self.fill_value, DE_C_BORDER_TYPE[self.padding_mode]) 671 672 673class RandomAffine(ImageTensorOperation): 674 """ 675 Apply Random affine transformation to the input image. 676 677 Args: 678 degrees (int or float or sequence): Range of the rotation degrees. 679 If `degrees` is a number, the range will be (-degrees, degrees). 680 If `degrees` is a sequence, it should be (min, max). 681 translate (sequence, optional): Sequence (tx_min, tx_max, ty_min, ty_max) of minimum/maximum translation in 682 x(horizontal) and y(vertical) directions (default=None). 683 The horizontal and vertical shift is selected randomly from the range: 684 (tx_min*width, tx_max*width) and (ty_min*height, ty_max*height), respectively. 685 If a tuple or list of size 2, then a translate parallel to the X axis in the range of 686 (translate[0], translate[1]) is applied. 687 If a tuple of list of size 4, then a translate parallel to the X axis in the range of 688 (translate[0], translate[1]) and a translate parallel to the Y axis in the range of 689 (translate[2], translate[3]) are applied. 690 If None, no translation is applied. 691 scale (sequence, optional): Scaling factor interval (default=None, original scale is used). 692 shear (int or float or sequence, optional): Range of shear factor (default=None). 693 If a number, then a shear parallel to the X axis in the range of (-shear, +shear) is applied. 694 If a tuple or list of size 2, then a shear parallel to the X axis in the range of (shear[0], shear[1]) 695 is applied. 696 If a tuple of list of size 4, then a shear parallel to X axis in the range of (shear[0], shear[1]) 697 and a shear parallel to Y axis in the range of (shear[2], shear[3]) is applied. 698 If None, no shear is applied. 699 resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST). 700 It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. 701 702 - Inter.BILINEAR, means resample method is bilinear interpolation. 703 704 - Inter.NEAREST, means resample method is nearest-neighbor interpolation. 705 706 - Inter.BICUBIC, means resample method is bicubic interpolation. 707 708 fill_value (tuple or int, optional): Optional fill_value to fill the area outside the transform 709 in the output image. There must be three elements in tuple and the value of single element is [0, 255]. 710 (default=0, filling is performed). 711 712 Raises: 713 ValueError: If `degrees` is negative. 714 ValueError: If translation value is not between -1 and 1. 715 ValueError: If scale is not positive. 716 ValueError: If shear is a number but is not positive. 717 TypeError: If `degrees` is not a number or a list or a tuple. 718 If `degrees` is a list or tuple, its length is not 2. 719 TypeError: If translate is specified but is not list or a tuple of length 2 or 4. 720 TypeError: If scale is not a list or tuple of length 2. 721 TypeError: If shear is not a list or tuple of length 2 or 4. 722 TypeError: If fill_value is not a single integer or a 3-tuple. 723 724 Examples: 725 >>> from mindspore.dataset.vision import Inter 726 >>> decode_op = c_vision.Decode() 727 >>> random_affine_op = c_vision.RandomAffine(degrees=15, 728 ... translate=(-0.1, 0.1, 0, 0), 729 ... scale=(0.9, 1.1), 730 ... resample=Inter.NEAREST) 731 >>> transforms_list = [decode_op, random_affine_op] 732 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 733 ... input_columns=["image"]) 734 """ 735 736 @check_random_affine 737 def __init__(self, degrees, translate=None, scale=None, shear=None, resample=Inter.NEAREST, fill_value=0): 738 # Parameter checking 739 if shear is not None: 740 if isinstance(shear, numbers.Number): 741 shear = (-1 * shear, shear, 0., 0.) 742 else: 743 if len(shear) == 2: 744 shear = [shear[0], shear[1], 0., 0.] 745 elif len(shear) == 4: 746 shear = [s for s in shear] 747 748 if isinstance(degrees, numbers.Number): 749 degrees = (-1 * degrees, degrees) 750 751 if isinstance(fill_value, numbers.Number): 752 fill_value = (fill_value, fill_value, fill_value) 753 754 # translation 755 if translate is None: 756 translate = (0.0, 0.0, 0.0, 0.0) 757 758 # scale 759 if scale is None: 760 scale = (1.0, 1.0) 761 762 # shear 763 if shear is None: 764 shear = (0.0, 0.0, 0.0, 0.0) 765 766 self.degrees = degrees 767 self.translate = translate 768 self.scale_ = scale 769 self.shear = shear 770 self.resample = DE_C_INTER_MODE[resample] 771 self.fill_value = fill_value 772 773 def parse(self): 774 return cde.RandomAffineOperation(self.degrees, self.translate, self.scale_, self.shear, self.resample, 775 self.fill_value) 776 777 778class RandomColor(ImageTensorOperation): 779 """ 780 Adjust the color of the input image by a fixed or random degree. 781 This operation works only with 3-channel color images. 782 783 Args: 784 degrees (sequence, optional): Range of random color adjustment degrees. 785 It should be in (min, max) format. If min=max, then it is a 786 single fixed magnitude operation (default=(0.1, 1.9)). 787 788 Examples: 789 >>> transforms_list = [c_vision.Decode(), c_vision.RandomColor((0.5, 2.0))] 790 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 791 ... input_columns=["image"]) 792 """ 793 794 @check_positive_degrees 795 def __init__(self, degrees=(0.1, 1.9)): 796 self.degrees = degrees 797 798 def parse(self): 799 return cde.RandomColorOperation(*self.degrees) 800 801 802class RandomColorAdjust(ImageTensorOperation): 803 """ 804 Randomly adjust the brightness, contrast, saturation, and hue of the input image. 805 806 Args: 807 brightness (Union[float, list, tuple], optional): Brightness adjustment factor (default=(1, 1)). 808 Cannot be negative. 809 If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness]. 810 If it is a sequence, it should be [min, max] for the range. 811 contrast (Union[float, list, tuple], optional): Contrast adjustment factor (default=(1, 1)). 812 Cannot be negative. 813 If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast]. 814 If it is a sequence, it should be [min, max] for the range. 815 saturation (Union[float, list, tuple], optional): Saturation adjustment factor (default=(1, 1)). 816 Cannot be negative. 817 If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation]. 818 If it is a sequence, it should be [min, max] for the range. 819 hue (Union[float, list, tuple], optional): Hue adjustment factor (default=(0, 0)). 820 If it is a float, the range will be [-hue, hue]. Value should be 0 <= hue <= 0.5. 821 If it is a sequence, it should be [min, max] where -0.5 <= min <= max <= 0.5. 822 823 Examples: 824 >>> decode_op = c_vision.Decode() 825 >>> transform_op = c_vision.RandomColorAdjust(brightness=(0.5, 1), 826 ... contrast=(0.4, 1), 827 ... saturation=(0.3, 1)) 828 >>> transforms_list = [decode_op, transform_op] 829 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 830 ... input_columns=["image"]) 831 """ 832 833 @check_random_color_adjust 834 def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)): 835 brightness = self.__expand_values(brightness) 836 contrast = self.__expand_values(contrast) 837 saturation = self.__expand_values(saturation) 838 hue = self.__expand_values( 839 hue, center=0, bound=(-0.5, 0.5), non_negative=False) 840 841 self.brightness = brightness 842 self.contrast = contrast 843 self.saturation = saturation 844 self.hue = hue 845 846 def __expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True): 847 """Expand input value for vision adjustment factor.""" 848 if isinstance(value, numbers.Number): 849 value = [center - value, center + value] 850 if non_negative: 851 value[0] = max(0, value[0]) 852 check_range(value, bound) 853 return (value[0], value[1]) 854 855 def parse(self): 856 return cde.RandomColorAdjustOperation(self.brightness, self.contrast, self.saturation, self.hue) 857 858 859class RandomCrop(ImageTensorOperation): 860 """ 861 Crop the input image at a random location. If input image size is smaller than output size, 862 input image will be padded before cropping. 863 864 Note: 865 If the input image is more than one, then make sure that the image size is the same. 866 867 Args: 868 size (Union[int, sequence]): The output size of the cropped image. 869 If size is an integer, a square crop of size (size, size) is returned. 870 If size is a sequence of length 2, it should be (height, width). 871 padding (Union[int, sequence], optional): The number of pixels to pad the image (default=None). 872 If padding is not None, pad image first with padding values. 873 If a single number is provided, pad all borders with this value. 874 If a tuple or lists of 2 values are provided, pad the (left and top) 875 with the first value and (right and bottom) with the second value. 876 If 4 values are provided as a list or tuple, 877 pad the left, top, right and bottom respectively. 878 pad_if_needed (bool, optional): Pad the image if either side is smaller than 879 the given output size (default=False). 880 fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for 881 padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively. 882 If it is an integer, it is used for all RGB channels. 883 The fill_value values must be in range [0, 255] (default=0). 884 padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of 885 [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. 886 887 - Border.CONSTANT, means it fills the border with constant values. 888 889 - Border.EDGE, means it pads with the last value on the edge. 890 891 - Border.REFLECT, means it reflects the values on the edge omitting the last 892 value of edge. 893 894 - Border.SYMMETRIC, means it reflects the values on the edge repeating the last 895 value of edge. 896 897 Examples: 898 >>> from mindspore.dataset.vision import Border 899 >>> decode_op = c_vision.Decode() 900 >>> random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=Border.EDGE) 901 >>> transforms_list = [decode_op, random_crop_op] 902 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 903 ... input_columns=["image"]) 904 """ 905 906 @check_random_crop 907 def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT): 908 if isinstance(size, int): 909 size = (size, size) 910 if padding is None: 911 padding = (0, 0, 0, 0) 912 else: 913 padding = parse_padding(padding) 914 if isinstance(fill_value, int): 915 fill_value = tuple([fill_value] * 3) 916 917 self.size = size 918 self.padding = padding 919 self.pad_if_needed = pad_if_needed 920 self.fill_value = fill_value 921 self.padding_mode = padding_mode.value 922 923 def parse(self): 924 border_type = DE_C_BORDER_TYPE[self.padding_mode] 925 return cde.RandomCropOperation(self.size, self.padding, self.pad_if_needed, self.fill_value, border_type) 926 927 928class RandomCropDecodeResize(ImageTensorOperation): 929 """ 930 A combination of `Crop`, `Decode` and `Resize`. It will get better performance for JPEG images. This operator 931 will crop the input image at a random location, decode the cropped image in RGB mode, and resize the decoded image. 932 933 Args: 934 size (Union[int, sequence]): The output size of the resized image. 935 If size is an integer, a square crop of size (size, size) is returned. 936 If size is a sequence of length 2, it should be (height, width). 937 scale (list, tuple, optional): Range [min, max) of respective size of the 938 original size to be cropped (default=(0.08, 1.0)). 939 ratio (list, tuple, optional): Range [min, max) of aspect ratio to be 940 cropped (default=(3. / 4., 4. / 3.)). 941 interpolation (Inter mode, optional): Image interpolation mode for resize operator(default=Inter.BILINEAR). 942 It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. 943 944 - Inter.BILINEAR, means interpolation method is bilinear interpolation. 945 946 - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. 947 948 - Inter.BICUBIC, means interpolation method is bicubic interpolation. 949 950 max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10). 951 If exceeded, fall back to use center_crop instead. 952 953 Examples: 954 >>> from mindspore.dataset.vision import Inter 955 >>> resize_crop_decode_op = c_vision.RandomCropDecodeResize(size=(50, 75), 956 ... scale=(0.25, 0.5), 957 ... interpolation=Inter.NEAREST, 958 ... max_attempts=5) 959 >>> transforms_list = [resize_crop_decode_op] 960 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 961 ... input_columns=["image"]) 962 """ 963 964 @check_random_resize_crop 965 def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), 966 interpolation=Inter.BILINEAR, max_attempts=10): 967 if isinstance(size, int): 968 size = (size, size) 969 self.size = size 970 self.scale = scale 971 self.ratio = ratio 972 self.interpolation = interpolation 973 self.max_attempts = max_attempts 974 975 def parse(self): 976 return cde.RandomCropDecodeResizeOperation(self.size, self.scale, self.ratio, 977 DE_C_INTER_MODE[self.interpolation], 978 self.max_attempts) 979 980 def __call__(self, img): 981 if not isinstance(img, np.ndarray): 982 raise TypeError( 983 "Input should be an encoded image in 1-D NumPy format, got {}.".format(type(img))) 984 if img.ndim != 1 or img.dtype.type is not np.uint8: 985 raise TypeError("Input should be an encoded image with uint8 type in 1-D NumPy format, " + 986 "got format:{}, dtype:{}.".format(type(img), img.dtype.type)) 987 return super().__call__(img) 988 989 990class RandomCropWithBBox(ImageTensorOperation): 991 """ 992 Crop the input image at a random location and adjust bounding boxes accordingly. 993 994 Args: 995 size (Union[int, sequence]): The output size of the cropped image. 996 If size is an integer, a square crop of size (size, size) is returned. 997 If size is a sequence of length 2, it should be (height, width). 998 padding (Union[int, sequence], optional): The number of pixels to pad the image (default=None). 999 If padding is not None, first pad image with padding values. 1000 If a single number is provided, pad all borders with this value. 1001 If a tuple or lists of 2 values are provided, pad the (left and top) 1002 with the first value and (right and bottom) with the second value. 1003 If 4 values are provided as a list or tuple, pad the left, top, right and bottom respectively. 1004 pad_if_needed (bool, optional): Pad the image if either side is smaller than 1005 the given output size (default=False). 1006 fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for 1007 padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively. 1008 If it is an integer, it is used for all RGB channels. 1009 The fill_value values must be in range [0, 255] (default=0). 1010 padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of 1011 [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. 1012 1013 - Border.CONSTANT, means it fills the border with constant values. 1014 1015 - Border.EDGE, means it pads with the last value on the edge. 1016 1017 - Border.REFLECT, means it reflects the values on the edge omitting the last 1018 value of edge. 1019 1020 - Border.SYMMETRIC, means it reflects the values on the edge repeating the last 1021 value of edge. 1022 1023 Examples: 1024 >>> decode_op = c_vision.Decode() 1025 >>> random_crop_with_bbox_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200]) 1026 >>> transforms_list = [decode_op, random_crop_with_bbox_op] 1027 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1028 ... input_columns=["image"]) 1029 """ 1030 1031 @check_random_crop 1032 def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT): 1033 if isinstance(size, int): 1034 size = (size, size) 1035 if padding is None: 1036 padding = (0, 0, 0, 0) 1037 else: 1038 padding = parse_padding(padding) 1039 1040 if isinstance(fill_value, int): 1041 fill_value = tuple([fill_value] * 3) 1042 1043 self.size = size 1044 self.padding = padding 1045 self.pad_if_needed = pad_if_needed 1046 self.fill_value = fill_value 1047 self.padding_mode = padding_mode.value 1048 1049 def parse(self): 1050 border_type = DE_C_BORDER_TYPE[self.padding_mode] 1051 return cde.RandomCropWithBBoxOperation(self.size, self.padding, self.pad_if_needed, self.fill_value, 1052 border_type) 1053 1054 1055class RandomHorizontalFlip(ImageTensorOperation): 1056 """ 1057 Randomly flip the input image horizontally with a given probability. 1058 1059 Args: 1060 prob (float, optional): Probability of the image being flipped (default=0.5). 1061 1062 Examples: 1063 >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlip(0.75)] 1064 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1065 ... input_columns=["image"]) 1066 """ 1067 1068 @check_prob 1069 def __init__(self, prob=0.5): 1070 self.prob = prob 1071 1072 def parse(self): 1073 return cde.RandomHorizontalFlipOperation(self.prob) 1074 1075 1076class RandomHorizontalFlipWithBBox(ImageTensorOperation): 1077 """ 1078 Flip the input image horizontally randomly with a given probability and adjust bounding boxes accordingly. 1079 1080 Args: 1081 prob (float, optional): Probability of the image being flipped (default=0.5). 1082 1083 Examples: 1084 >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlipWithBBox(0.70)] 1085 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1086 ... input_columns=["image"]) 1087 """ 1088 1089 @check_prob 1090 def __init__(self, prob=0.5): 1091 self.prob = prob 1092 1093 def parse(self): 1094 return cde.RandomHorizontalFlipWithBBoxOperation(self.prob) 1095 1096 1097class RandomPosterize(ImageTensorOperation): 1098 """ 1099 Reduce the number of bits for each color channel to posterize the input image randomly with a given probability. 1100 1101 Args: 1102 bits (sequence or int, optional): Range of random posterize to compress image. 1103 Bits values must be in range of [1,8], and include at 1104 least one integer value in the given range. It must be in 1105 (min, max) or integer format. If min=max, then it is a single fixed 1106 magnitude operation (default=(8, 8)). 1107 1108 Examples: 1109 >>> transforms_list = [c_vision.Decode(), c_vision.RandomPosterize((6, 8))] 1110 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1111 ... input_columns=["image"]) 1112 """ 1113 1114 @check_posterize 1115 def __init__(self, bits=(8, 8)): 1116 self.bits = bits 1117 1118 def parse(self): 1119 bits = self.bits 1120 if isinstance(bits, int): 1121 bits = (bits, bits) 1122 return cde.RandomPosterizeOperation(bits) 1123 1124 1125class RandomResizedCrop(ImageTensorOperation): 1126 """ 1127 Crop the input image to a random size and aspect ratio. This operator will crop the input image randomly, and 1128 resize the cropped image using a selected interpolation mode. 1129 1130 Note: 1131 If the input image is more than one, then make sure that the image size is the same. 1132 1133 Args: 1134 size (Union[int, sequence]): The output size of the resized image. 1135 If size is an integer, a square crop of size (size, size) is returned. 1136 If size is a sequence of length 2, it should be (height, width). 1137 scale (list, tuple, optional): Range [min, max) of respective size of the original 1138 size to be cropped (default=(0.08, 1.0)). 1139 ratio (list, tuple, optional): Range [min, max) of aspect ratio to be cropped 1140 (default=(3. / 4., 4. / 3.)). 1141 interpolation (Inter mode, optional): Image interpolation mode for resize operator (default=Inter.BILINEAR). 1142 It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC, Inter.PILCUBIC]. 1143 1144 - Inter.BILINEAR, means interpolation method is bilinear interpolation. 1145 1146 - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. 1147 1148 - Inter.BICUBIC, means interpolation method is bicubic interpolation. 1149 1150 - Inter.AREA, means interpolation method is pixel area interpolation. 1151 1152 - Inter.PILCUBIC, means interpolation method is bicubic interpolation like implemented in pillow, input 1153 should be in 3 channels format. 1154 1155 max_attempts (int, optional): The maximum number of attempts to propose a valid 1156 crop_area (default=10). If exceeded, fall back to use center_crop instead. 1157 1158 Examples: 1159 >>> from mindspore.dataset.vision import Inter 1160 >>> decode_op = c_vision.Decode() 1161 >>> resize_crop_op = c_vision.RandomResizedCrop(size=(50, 75), scale=(0.25, 0.5), 1162 ... interpolation=Inter.BILINEAR) 1163 >>> transforms_list = [decode_op, resize_crop_op] 1164 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1165 ... input_columns=["image"]) 1166 """ 1167 1168 @check_random_resize_crop 1169 def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), 1170 interpolation=Inter.BILINEAR, max_attempts=10): 1171 if isinstance(size, int): 1172 size = (size, size) 1173 self.size = size 1174 self.scale = scale 1175 self.ratio = ratio 1176 self.interpolation = interpolation 1177 self.max_attempts = max_attempts 1178 1179 def parse(self): 1180 return cde.RandomResizedCropOperation(self.size, self.scale, self.ratio, DE_C_INTER_MODE[self.interpolation], 1181 self.max_attempts) 1182 1183 1184class RandomResizedCropWithBBox(ImageTensorOperation): 1185 """ 1186 Crop the input image to a random size and aspect ratio and adjust bounding boxes accordingly. 1187 1188 Args: 1189 size (Union[int, sequence]): The size of the output image. 1190 If size is an integer, a square crop of size (size, size) is returned. 1191 If size is a sequence of length 2, it should be (height, width). 1192 scale (list, tuple, optional): Range (min, max) of respective size of the original 1193 size to be cropped (default=(0.08, 1.0)). 1194 ratio (list, tuple, optional): Range (min, max) of aspect ratio to be cropped 1195 (default=(3. / 4., 4. / 3.)). 1196 interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). 1197 It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. 1198 1199 - Inter.BILINEAR, means interpolation method is bilinear interpolation. 1200 1201 - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. 1202 1203 - Inter.BICUBIC, means interpolation method is bicubic interpolation. 1204 1205 max_attempts (int, optional): The maximum number of attempts to propose a valid 1206 crop area (default=10). If exceeded, fall back to use center crop instead. 1207 1208 Examples: 1209 >>> from mindspore.dataset.vision import Inter 1210 >>> decode_op = c_vision.Decode() 1211 >>> bbox_op = c_vision.RandomResizedCropWithBBox(size=50, interpolation=Inter.NEAREST) 1212 >>> transforms_list = [decode_op, bbox_op] 1213 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1214 ... input_columns=["image"]) 1215 """ 1216 1217 @check_random_resize_crop 1218 def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), 1219 interpolation=Inter.BILINEAR, max_attempts=10): 1220 if isinstance(size, int): 1221 size = (size, size) 1222 self.size = size 1223 self.scale = scale 1224 self.ratio = ratio 1225 self.interpolation = interpolation 1226 self.max_attempts = max_attempts 1227 1228 def parse(self): 1229 return cde.RandomResizedCropWithBBoxOperation(self.size, self.scale, self.ratio, 1230 DE_C_INTER_MODE[self.interpolation], self.max_attempts) 1231 1232 1233class RandomResize(ImageTensorOperation): 1234 """ 1235 Resize the input image using a randomly selected interpolation mode. 1236 1237 Args: 1238 size (Union[int, sequence]): The output size of the resized image. 1239 If size is an integer, smaller edge of the image will be resized to this value with 1240 the same image aspect ratio. 1241 If size is a sequence of length 2, it should be (height, width). 1242 1243 Examples: 1244 >>> # randomly resize image, keeping aspect ratio 1245 >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResize(50)] 1246 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, 1247 ... input_columns=["image"]) 1248 >>> # randomly resize image to landscape style 1249 >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResize((40, 60))] 1250 >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, 1251 ... input_columns=["image"]) 1252 """ 1253 1254 @check_resize 1255 def __init__(self, size): 1256 self.size = size 1257 1258 def parse(self): 1259 size = self.size 1260 if isinstance(size, int): 1261 size = (size,) 1262 return cde.RandomResizeOperation(size) 1263 1264 1265class RandomResizeWithBBox(ImageTensorOperation): 1266 """ 1267 Tensor operation to resize the input image using a randomly selected interpolation mode and adjust 1268 bounding boxes accordingly. 1269 1270 Args: 1271 size (Union[int, sequence]): The output size of the resized image. 1272 If size is an integer, smaller edge of the image will be resized to this value with 1273 the same image aspect ratio. 1274 If size is a sequence of length 2, it should be (height, width). 1275 1276 Examples: 1277 >>> # randomly resize image with bounding boxes, keeping aspect ratio 1278 >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResizeWithBBox(60)] 1279 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, 1280 ... input_columns=["image"]) 1281 >>> # randomly resize image with bounding boxes to portrait style 1282 >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResizeWithBBox((80, 60))] 1283 >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, 1284 ... input_columns=["image"]) 1285 """ 1286 1287 @check_resize 1288 def __init__(self, size): 1289 self.size = size 1290 1291 def parse(self): 1292 size = self.size 1293 if isinstance(size, int): 1294 size = (size,) 1295 return cde.RandomResizeWithBBoxOperation(size) 1296 1297 1298class RandomRotation(ImageTensorOperation): 1299 """ 1300 Rotate the input image randomly within a specified range of degrees. 1301 1302 Args: 1303 degrees (Union[int, float, sequence]): Range of random rotation degrees. 1304 If `degrees` is a number, the range will be converted to (-degrees, degrees). 1305 If `degrees` is a sequence, it should be (min, max). 1306 resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST). 1307 It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. 1308 1309 - Inter.BILINEAR, means resample method is bilinear interpolation. 1310 1311 - Inter.NEAREST, means resample method is nearest-neighbor interpolation. 1312 1313 - Inter.BICUBIC, means resample method is bicubic interpolation. 1314 1315 expand (bool, optional): Optional expansion flag (default=False). If set to True, expand the output 1316 image to make it large enough to hold the entire rotated image. 1317 If set to False or omitted, make the output image the same size as the input. 1318 Note that the expand flag assumes rotation around the center and no translation. 1319 center (tuple, optional): Optional center of rotation (a 2-tuple) (default=None). 1320 Origin is the top left corner. None sets to the center of the image. 1321 fill_value (Union[int, tuple], optional): Optional fill color for the area outside the rotated image. 1322 If it is a 3-tuple, it is used to fill R, G, B channels respectively. 1323 If it is an integer, it is used for all RGB channels. 1324 The fill_value values must be in range [0, 255] (default=0). 1325 1326 Examples: 1327 >>> from mindspore.dataset.vision import Inter 1328 >>> transforms_list = [c_vision.Decode(), 1329 ... c_vision.RandomRotation(degrees=5.0, 1330 ... resample=Inter.NEAREST, 1331 ... expand=True)] 1332 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1333 ... input_columns=["image"]) 1334 """ 1335 1336 @check_random_rotation 1337 def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0): 1338 if isinstance(degrees, (int, float)): 1339 degrees = degrees % 360 1340 degrees = [-degrees, degrees] 1341 elif isinstance(degrees, (list, tuple)): 1342 if degrees[1] - degrees[0] >= 360: 1343 degrees = [-180, 180] 1344 else: 1345 degrees = [degrees[0] % 360, degrees[1] % 360] 1346 if degrees[0] > degrees[1]: 1347 degrees[1] += 360 1348 if center is None: 1349 center = () 1350 if isinstance(fill_value, int): 1351 fill_value = tuple([fill_value] * 3) 1352 self.degrees = degrees 1353 self.resample = resample 1354 self.expand = expand 1355 self.center = center 1356 self.fill_value = fill_value 1357 1358 def parse(self): 1359 return cde.RandomRotationOperation(self.degrees, DE_C_INTER_MODE[self.resample], self.expand, self.center, 1360 self.fill_value) 1361 1362 1363class RandomSelectSubpolicy(ImageTensorOperation): 1364 """ 1365 Choose a random sub-policy from a policy list to be applied on the input image. 1366 1367 Args: 1368 policy (list(list(tuple(TensorOp, prob (float)))): List of sub-policies to choose from. 1369 A sub-policy is a list of tuples (op, prob), where op is a TensorOp operation and prob is the probability 1370 that this op will be applied, and the prob values must be in range [0, 1]. Once a sub-policy is selected, 1371 each op within the sub-policy with be applied in sequence according to its probability. 1372 1373 Examples: 1374 >>> policy = [[(c_vision.RandomRotation((45, 45)), 0.5), 1375 ... (c_vision.RandomVerticalFlip(), 1), 1376 ... (c_vision.RandomColorAdjust(), 0.8)], 1377 ... [(c_vision.RandomRotation((90, 90)), 1), 1378 ... (c_vision.RandomColorAdjust(), 0.2)]] 1379 >>> image_folder_dataset = image_folder_dataset.map(operations=c_vision.RandomSelectSubpolicy(policy), 1380 ... input_columns=["image"]) 1381 """ 1382 1383 @check_random_select_subpolicy_op 1384 def __init__(self, policy): 1385 self.policy = policy 1386 1387 def parse(self): 1388 policy = [] 1389 for list_one in self.policy: 1390 policy_one = [] 1391 for list_two in list_one: 1392 if list_two[0] and getattr(list_two[0], 'parse', None): 1393 policy_one.append((list_two[0].parse(), list_two[1])) 1394 else: 1395 policy_one.append((list_two[0], list_two[1])) 1396 policy.append(policy_one) 1397 return cde.RandomSelectSubpolicyOperation(policy) 1398 1399 1400class RandomSharpness(ImageTensorOperation): 1401 """ 1402 Adjust the sharpness of the input image by a fixed or random degree. Degree of 0.0 gives a blurred image, 1403 degree of 1.0 gives the original image, and degree of 2.0 gives a sharpened image. 1404 1405 Args: 1406 degrees (Union[list, tuple], optional): Range of random sharpness adjustment degrees. It should be in 1407 (min, max) format. If min=max, then it is a single fixed magnitude operation (default = (0.1, 1.9)). 1408 1409 Raises: 1410 TypeError : If `degrees` is not a list or tuple. 1411 ValueError: If `degrees` is negative. 1412 ValueError: If `degrees` is in (max, min) format instead of (min, max). 1413 1414 Examples: 1415 >>> transforms_list = [c_vision.Decode(), c_vision.RandomSharpness(degrees=(0.2, 1.9))] 1416 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1417 ... input_columns=["image"]) 1418 """ 1419 1420 @check_positive_degrees 1421 def __init__(self, degrees=(0.1, 1.9)): 1422 self.degrees = degrees 1423 1424 def parse(self): 1425 return cde.RandomSharpnessOperation(self.degrees) 1426 1427 1428class RandomSolarize(ImageTensorOperation): 1429 """ 1430 Randomly selects a subrange within the specified threshold range and sets the pixel value within 1431 the subrange to (255 - pixel). 1432 1433 Args: 1434 threshold (tuple, optional): Range of random solarize threshold (default=(0, 255)). 1435 Threshold values should always be in (min, max) format, 1436 where min and max are integers in the range (0, 255), and min <= max. 1437 If min=max, then invert all pixel values above min(max). 1438 1439 Examples: 1440 >>> transforms_list = [c_vision.Decode(), c_vision.RandomSolarize(threshold=(10,100))] 1441 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1442 ... input_columns=["image"]) 1443 """ 1444 1445 @check_random_solarize 1446 def __init__(self, threshold=(0, 255)): 1447 self.threshold = threshold 1448 1449 def parse(self): 1450 return cde.RandomSolarizeOperation(self.threshold) 1451 1452 1453class RandomVerticalFlip(ImageTensorOperation): 1454 """ 1455 Randomly flip the input image vertically with a given probability. 1456 1457 Args: 1458 prob (float, optional): Probability of the image being flipped (default=0.5). 1459 1460 Examples: 1461 >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlip(0.25)] 1462 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1463 ... input_columns=["image"]) 1464 """ 1465 1466 @check_prob 1467 def __init__(self, prob=0.5): 1468 self.prob = prob 1469 1470 def parse(self): 1471 return cde.RandomVerticalFlipOperation(self.prob) 1472 1473 1474class RandomVerticalFlipWithBBox(ImageTensorOperation): 1475 """ 1476 Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly. 1477 1478 Args: 1479 prob (float, optional): Probability of the image being flipped (default=0.5). 1480 1481 Examples: 1482 >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlipWithBBox(0.20)] 1483 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1484 ... input_columns=["image"]) 1485 """ 1486 1487 @check_prob 1488 def __init__(self, prob=0.5): 1489 self.prob = prob 1490 1491 def parse(self): 1492 return cde.RandomVerticalFlipWithBBoxOperation(self.prob) 1493 1494 1495class Rescale(ImageTensorOperation): 1496 """ 1497 Rescale the input image with the given rescale and shift. This operator will rescale the input image 1498 with: output = image * rescale + shift. 1499 1500 Args: 1501 rescale (float): Rescale factor. 1502 shift (float): Shift factor. 1503 1504 Examples: 1505 >>> transforms_list = [c_vision.Decode(), c_vision.Rescale(1.0 / 255.0, -1.0)] 1506 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1507 ... input_columns=["image"]) 1508 """ 1509 1510 @check_rescale 1511 def __init__(self, rescale, shift): 1512 self.rescale = rescale 1513 self.shift = shift 1514 1515 def parse(self): 1516 return cde.RescaleOperation(self.rescale, self.shift) 1517 1518 1519class Resize(ImageTensorOperation): 1520 """ 1521 Resize the input image to the given size with a given interpolation mode. 1522 1523 Args: 1524 size (Union[int, sequence]): The output size of the resized image. 1525 If size is an integer, the smaller edge of the image will be resized to this value with 1526 the same image aspect ratio. 1527 If size is a sequence of length 2, it should be (height, width). 1528 interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). 1529 It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC, Inter.PILCUBIC]. 1530 1531 - Inter.LINEAR, means interpolation method is bilinear interpolation. 1532 1533 - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. 1534 1535 - Inter.BICUBIC, means interpolation method is bicubic interpolation. 1536 1537 - Inter.AREA, means interpolation method is pixel area interpolation. 1538 1539 - Inter.PILCUBIC, means interpolation method is bicubic interpolation like implemented in pillow, input 1540 should be in 3 channels format. 1541 1542 Examples: 1543 >>> from mindspore.dataset.vision import Inter 1544 >>> decode_op = c_vision.Decode() 1545 >>> resize_op = c_vision.Resize([100, 75], Inter.BICUBIC) 1546 >>> transforms_list = [decode_op, resize_op] 1547 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1548 ... input_columns=["image"]) 1549 """ 1550 1551 @check_resize_interpolation 1552 def __init__(self, size, interpolation=Inter.LINEAR): 1553 if isinstance(size, int): 1554 size = (size,) 1555 self.size = size 1556 self.interpolation = interpolation 1557 1558 def parse(self): 1559 return cde.ResizeOperation(self.size, DE_C_INTER_MODE[self.interpolation]) 1560 1561 1562class ResizeWithBBox(ImageTensorOperation): 1563 """ 1564 Resize the input image to the given size and adjust bounding boxes accordingly. 1565 1566 Args: 1567 size (Union[int, sequence]): The output size of the resized image. 1568 If size is an integer, smaller edge of the image will be resized to this value with 1569 the same image aspect ratio. 1570 If size is a sequence of length 2, it should be (height, width). 1571 interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). 1572 It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC]. 1573 1574 - Inter.LINEAR, means interpolation method is bilinear interpolation. 1575 1576 - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. 1577 1578 - Inter.BICUBIC, means interpolation method is bicubic interpolation. 1579 1580 Examples: 1581 >>> from mindspore.dataset.vision import Inter 1582 >>> decode_op = c_vision.Decode() 1583 >>> bbox_op = c_vision.ResizeWithBBox(50, Inter.NEAREST) 1584 >>> transforms_list = [decode_op, bbox_op] 1585 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1586 ... input_columns=["image"]) 1587 """ 1588 1589 @check_resize_interpolation 1590 def __init__(self, size, interpolation=Inter.LINEAR): 1591 self.size = size 1592 self.interpolation = interpolation 1593 1594 def parse(self): 1595 size = self.size 1596 if isinstance(size, int): 1597 size = (size,) 1598 return cde.ResizeWithBBoxOperation(size, DE_C_INTER_MODE[self.interpolation]) 1599 1600 1601class RgbToBgr(ImageTensorOperation): 1602 """ 1603 Convert RGB image to BGR. 1604 1605 Examples: 1606 >>> from mindspore.dataset.vision import Inter 1607 >>> 1608 >>> decode_op = c_vision.Decode() 1609 >>> rgb2bgr_op = c_vision.RgbToBgr() 1610 >>> transforms_list = [decode_op, rgb2bgr_op] 1611 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1612 ... input_columns=["image"]) 1613 """ 1614 1615 def parse(self): 1616 return cde.RgbToBgrOperation() 1617 1618 1619class Rotate(ImageTensorOperation): 1620 """ 1621 Rotate the input image by specified degrees. 1622 1623 Args: 1624 degrees (Union[int, float]): Rotation degrees. 1625 1626 resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST). 1627 It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. 1628 1629 - Inter.BILINEAR, means resample method is bilinear interpolation. 1630 1631 - Inter.NEAREST, means resample method is nearest-neighbor interpolation. 1632 1633 - Inter.BICUBIC, means resample method is bicubic interpolation. 1634 1635 expand (bool, optional): Optional expansion flag (default=False). If set to True, expand the output 1636 image to make it large enough to hold the entire rotated image. 1637 If set to False or omitted, make the output image the same size as the input. 1638 Note that the expand flag assumes rotation around the center and no translation. 1639 center (tuple, optional): Optional center of rotation (a 2-tuple) (default=None). 1640 Origin is the top left corner. None sets to the center of the image. 1641 fill_value (Union[int, tuple], optional): Optional fill color for the area outside the rotated image. 1642 If it is a 3-tuple, it is used to fill R, G, B channels respectively. 1643 If it is an integer, it is used for all RGB channels. 1644 The fill_value values must be in range [0, 255] (default=0). 1645 1646 Examples: 1647 >>> from mindspore.dataset.vision import Inter 1648 >>> transforms_list = [c_vision.Decode(), 1649 ... c_vision.Rotate(degrees=30.0, 1650 ... resample=Inter.NEAREST, 1651 ... expand=True)] 1652 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1653 ... input_columns=["image"]) 1654 """ 1655 1656 @check_rotate 1657 def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0): 1658 if isinstance(degrees, (int, float)): 1659 degrees = degrees % 360 1660 if center is None: 1661 center = () 1662 if isinstance(fill_value, int): 1663 fill_value = tuple([fill_value] * 3) 1664 self.degrees = degrees 1665 self.resample = resample 1666 self.expand = expand 1667 self.center = center 1668 self.fill_value = fill_value 1669 1670 def parse(self): 1671 return cde.RotateOperation(self.degrees, DE_C_INTER_MODE[self.resample], self.expand, self.center, 1672 self.fill_value) 1673 1674 1675class SlicePatches(ImageTensorOperation): 1676 """ 1677 Slice Tensor to multiple patches in horizontal and vertical directions. 1678 1679 The usage scenario is suitable to large height and width Tensor. The Tensor 1680 will keep the same if set both num_height and num_width to 1. And the 1681 number of output tensors is equal to num_height*num_width. 1682 1683 Args: 1684 num_height (int, optional): The number of patches in vertical direction (default=1). 1685 num_width (int, optional): The number of patches in horizontal direction (default=1). 1686 slice_mode (Inter mode, optional): A mode represents pad or drop (default=SliceMode.PAD). 1687 It can be any of [SliceMode.PAD, SliceMode.DROP]. 1688 fill_value (int, optional): The border width in number of pixels in 1689 right and bottom direction if slice_mode is set to be SliceMode.PAD (default=0). 1690 1691 Examples: 1692 >>> # default padding mode 1693 >>> decode_op = c_vision.Decode() 1694 >>> num_h, num_w = (1, 4) 1695 >>> slice_patches_op = c_vision.SlicePatches(num_h, num_w) 1696 >>> transforms_list = [decode_op, slice_patches_op] 1697 >>> cols = ['img' + str(x) for x in range(num_h*num_w)] 1698 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1699 ... input_columns=["image"], 1700 ... output_columns=cols, column_order=cols) 1701 """ 1702 1703 @check_slice_patches 1704 def __init__(self, num_height=1, num_width=1, slice_mode=SliceMode.PAD, fill_value=0): 1705 self.num_height = num_height 1706 self.num_width = num_width 1707 self.slice_mode = slice_mode 1708 self.fill_value = fill_value 1709 1710 def parse(self): 1711 return cde.SlicePatchesOperation(self.num_height, self.num_width, 1712 DE_C_SLICE_MODE[self.slice_mode], self.fill_value) 1713 1714 1715class SoftDvppDecodeRandomCropResizeJpeg(ImageTensorOperation): 1716 """ 1717 A combination of `Crop`, `Decode` and `Resize` using the simulation algorithm of Ascend series chip DVPP module. 1718 1719 The usage scenario is consistent with SoftDvppDecodeResizeJpeg. 1720 The input image size should be in range [32*32, 8192*8192]. 1721 The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16]. 1722 Only images with an even resolution can be output. The output of odd resolution is not supported. 1723 1724 Args: 1725 size (Union[int, sequence]): The size of the output image. 1726 If size is an integer, a square crop of size (size, size) is returned. 1727 If size is a sequence of length 2, it should be (height, width). 1728 scale (list, tuple, optional): Range [min, max) of respective size of the 1729 original size to be cropped (default=(0.08, 1.0)). 1730 ratio (list, tuple, optional): Range [min, max) of aspect ratio to be 1731 cropped (default=(3. / 4., 4. / 3.)). 1732 max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10). 1733 If exceeded, fall back to use center_crop instead. 1734 1735 Examples: 1736 >>> # decode, randomly crop and resize image, keeping aspect ratio 1737 >>> transforms_list1 = [c_vision.SoftDvppDecodeRandomCropResizeJpeg(90)] 1738 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, 1739 ... input_columns=["image"]) 1740 >>> # decode, randomly crop and resize to landscape style 1741 >>> transforms_list2 = [c_vision.SoftDvppDecodeRandomCropResizeJpeg((80, 100))] 1742 >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, 1743 ... input_columns=["image"]) 1744 """ 1745 1746 @check_soft_dvpp_decode_random_crop_resize_jpeg 1747 def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), max_attempts=10): 1748 if isinstance(size, int): 1749 size = (size, size) 1750 self.size = size 1751 self.scale = scale 1752 self.ratio = ratio 1753 self.max_attempts = max_attempts 1754 1755 def parse(self): 1756 return cde.SoftDvppDecodeRandomCropResizeJpegOperation(self.size, self.scale, self.ratio, self.max_attempts) 1757 1758 1759class SoftDvppDecodeResizeJpeg(ImageTensorOperation): 1760 """ 1761 Decode and resize JPEG image using the simulation algorithm of Ascend series chip DVPP module. 1762 1763 It is recommended to use this algorithm in the following scenarios: 1764 When training, the DVPP of the Ascend chip is not used, 1765 and the DVPP of the Ascend chip is used during inference, 1766 and the accuracy of inference is lower than the accuracy of training; 1767 and the input image size should be in range [32*32, 8192*8192]. 1768 The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16]. 1769 Only images with an even resolution can be output. The output of odd resolution is not supported. 1770 1771 Args: 1772 size (Union[int, sequence]): The output size of the resized image. 1773 If size is an integer, smaller edge of the image will be resized to this value with 1774 the same image aspect ratio. 1775 If size is a sequence of length 2, it should be (height, width). 1776 1777 Examples: 1778 >>> # decode and resize image, keeping aspect ratio 1779 >>> transforms_list1 = [c_vision.SoftDvppDecodeResizeJpeg(70)] 1780 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, 1781 ... input_columns=["image"]) 1782 >>> # decode and resize to portrait style 1783 >>> transforms_list2 = [c_vision.SoftDvppDecodeResizeJpeg((80, 60))] 1784 >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, 1785 ... input_columns=["image"]) 1786 """ 1787 1788 @check_resize 1789 def __init__(self, size): 1790 if isinstance(size, int): 1791 size = (size,) 1792 self.size = size 1793 1794 def parse(self): 1795 return cde.SoftDvppDecodeResizeJpegOperation(self.size) 1796 1797 1798class UniformAugment(ImageTensorOperation): 1799 """ 1800 Perform randomly selected augmentation on input image. 1801 1802 Args: 1803 transforms: List of C++ operations (Python operations are not accepted). 1804 num_ops (int, optional): Number of operations to be selected and applied (default=2). 1805 1806 Examples: 1807 >>> import mindspore.dataset.vision.py_transforms as py_vision 1808 >>> transforms_list = [c_vision.RandomHorizontalFlip(), 1809 ... c_vision.RandomVerticalFlip(), 1810 ... c_vision.RandomColorAdjust(), 1811 ... c_vision.RandomRotation(degrees=45)] 1812 >>> uni_aug_op = c_vision.UniformAugment(transforms=transforms_list, num_ops=2) 1813 >>> transforms_all = [c_vision.Decode(), c_vision.Resize(size=[224, 224]), 1814 ... uni_aug_op] 1815 >>> image_folder_dataset_1 = image_folder_dataset.map(operations=transforms_all, 1816 ... input_columns="image", 1817 ... num_parallel_workers=1) 1818 """ 1819 1820 @check_uniform_augment_cpp 1821 def __init__(self, transforms, num_ops=2): 1822 self.transforms = transforms 1823 self.num_ops = num_ops 1824 1825 def parse(self): 1826 transforms = [] 1827 for op in self.transforms: 1828 if op and getattr(op, 'parse', None): 1829 transforms.append(op.parse()) 1830 else: 1831 transforms.append(op) 1832 return cde.UniformAugOperation(transforms, self.num_ops) 1833 1834 1835class VerticalFlip(ImageTensorOperation): 1836 """ 1837 Flip the input image vertically. 1838 1839 Examples: 1840 >>> transforms_list = [c_vision.Decode(), c_vision.VerticalFlip()] 1841 >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, 1842 ... input_columns=["image"]) 1843 """ 1844 1845 def parse(self): 1846 return cde.VerticalFlipOperation() 1847