1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Implementation of image ops.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import functools 22import numpy as np 23 24from tensorflow.python.eager import def_function 25from tensorflow.python.framework import constant_op 26from tensorflow.python.framework import dtypes 27from tensorflow.python.framework import ops 28from tensorflow.python.framework import random_seed 29from tensorflow.python.framework import tensor_shape 30from tensorflow.python.framework import tensor_util 31from tensorflow.python.ops import array_ops 32from tensorflow.python.ops import check_ops 33from tensorflow.python.ops import control_flow_ops 34from tensorflow.python.ops import gen_image_ops 35from tensorflow.python.ops import math_ops 36from tensorflow.python.ops import nn 37from tensorflow.python.ops import nn_ops 38from tensorflow.python.ops import random_ops 39from tensorflow.python.ops import sort_ops 40from tensorflow.python.ops import stateless_random_ops 41from tensorflow.python.ops import string_ops 42from tensorflow.python.ops import variables 43from tensorflow.python.util import deprecation 44from tensorflow.python.util import dispatch 45from tensorflow.python.util.tf_export import tf_export 46 47ops.NotDifferentiable('RandomCrop') 48# TODO(b/31222613): This op may be differentiable, and there may be 49# latent bugs here. 50ops.NotDifferentiable('HSVToRGB') 51ops.NotDifferentiable('DrawBoundingBoxes') 52ops.NotDifferentiable('SampleDistortedBoundingBox') 53ops.NotDifferentiable('SampleDistortedBoundingBoxV2') 54# TODO(bsteiner): Implement the gradient function for extract_glimpse 55# TODO(b/31222613): This op may be differentiable, and there may be 56# latent bugs here. 57ops.NotDifferentiable('ExtractGlimpse') 58ops.NotDifferentiable('NonMaxSuppression') 59ops.NotDifferentiable('NonMaxSuppressionV2') 60ops.NotDifferentiable('NonMaxSuppressionWithOverlaps') 61ops.NotDifferentiable('GenerateBoundingBoxProposals') 62 63 64# pylint: disable=invalid-name 65def _assert(cond, ex_type, msg): 66 """A polymorphic assert, works with tensors and boolean expressions. 67 68 If `cond` is not a tensor, behave like an ordinary assert statement, except 69 that a empty list is returned. If `cond` is a tensor, return a list 70 containing a single TensorFlow assert op. 71 72 Args: 73 cond: Something evaluates to a boolean value. May be a tensor. 74 ex_type: The exception class to use. 75 msg: The error message. 76 77 Returns: 78 A list, containing at most one assert op. 79 """ 80 if _is_tensor(cond): 81 return [control_flow_ops.Assert(cond, [msg])] 82 else: 83 if not cond: 84 raise ex_type(msg) 85 else: 86 return [] 87 88 89def _is_tensor(x): 90 """Returns `True` if `x` is a symbolic tensor-like object. 91 92 Args: 93 x: A python object to check. 94 95 Returns: 96 `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`. 97 """ 98 return isinstance(x, (ops.Tensor, variables.Variable)) 99 100 101def _ImageDimensions(image, rank): 102 """Returns the dimensions of an image tensor. 103 104 Args: 105 image: A rank-D Tensor. For 3-D of shape: `[height, width, channels]`. 106 rank: The expected rank of the image 107 108 Returns: 109 A list of corresponding to the dimensions of the 110 input image. Dimensions that are statically known are python integers, 111 otherwise, they are integer scalar tensors. 112 """ 113 if image.get_shape().is_fully_defined(): 114 return image.get_shape().as_list() 115 else: 116 static_shape = image.get_shape().with_rank(rank).as_list() 117 dynamic_shape = array_ops.unstack(array_ops.shape(image), rank) 118 return [ 119 s if s is not None else d for s, d in zip(static_shape, dynamic_shape) 120 ] 121 122 123def _Check3DImage(image, require_static=True): 124 """Assert that we are working with a properly shaped image. 125 126 Args: 127 image: 3-D Tensor of shape [height, width, channels] 128 require_static: If `True`, requires that all dimensions of `image` are known 129 and non-zero. 130 131 Raises: 132 ValueError: if `image.shape` is not a 3-vector. 133 134 Returns: 135 An empty list, if `image` has fully defined dimensions. Otherwise, a list 136 containing an assert op is returned. 137 """ 138 try: 139 image_shape = image.get_shape().with_rank(3) 140 except ValueError: 141 raise ValueError("'image' (shape %s) must be three-dimensional." % 142 image.shape) 143 if require_static and not image_shape.is_fully_defined(): 144 raise ValueError("'image' (shape %s) must be fully defined." % image_shape) 145 if any(x == 0 for x in image_shape): 146 raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape) 147 if not image_shape.is_fully_defined(): 148 return [ 149 check_ops.assert_positive( 150 array_ops.shape(image), 151 ["all dims of 'image.shape' " 152 'must be > 0.']) 153 ] 154 else: 155 return [] 156 157 158def _Assert3DImage(image): 159 """Assert that we are working with a properly shaped image. 160 161 Performs the check statically if possible (i.e. if the shape 162 is statically known). Otherwise adds a control dependency 163 to an assert op that checks the dynamic shape. 164 165 Args: 166 image: 3-D Tensor of shape [height, width, channels] 167 168 Raises: 169 ValueError: if `image.shape` is not a 3-vector. 170 171 Returns: 172 If the shape of `image` could be verified statically, `image` is 173 returned unchanged, otherwise there will be a control dependency 174 added that asserts the correct dynamic shape. 175 """ 176 return control_flow_ops.with_dependencies( 177 _Check3DImage(image, require_static=False), image) 178 179 180def _AssertAtLeast3DImage(image): 181 """Assert that we are working with a properly shaped image. 182 183 Performs the check statically if possible (i.e. if the shape 184 is statically known). Otherwise adds a control dependency 185 to an assert op that checks the dynamic shape. 186 187 Args: 188 image: >= 3-D Tensor of size [*, height, width, depth] 189 190 Raises: 191 ValueError: if image.shape is not a [>= 3] vector. 192 193 Returns: 194 If the shape of `image` could be verified statically, `image` is 195 returned unchanged, otherwise there will be a control dependency 196 added that asserts the correct dynamic shape. 197 """ 198 return control_flow_ops.with_dependencies( 199 _CheckAtLeast3DImage(image, require_static=False), image) 200 201 202def _CheckAtLeast3DImage(image, require_static=True): 203 """Assert that we are working with a properly shaped image. 204 205 Args: 206 image: >= 3-D Tensor of size [*, height, width, depth] 207 require_static: If `True`, requires that all dimensions of `image` are known 208 and non-zero. 209 210 Raises: 211 ValueError: if image.shape is not a [>= 3] vector. 212 213 Returns: 214 An empty list, if `image` has fully defined dimensions. Otherwise, a list 215 containing an assert op is returned. 216 """ 217 try: 218 if image.get_shape().ndims is None: 219 image_shape = image.get_shape().with_rank(3) 220 else: 221 image_shape = image.get_shape().with_rank_at_least(3) 222 except ValueError: 223 raise ValueError("'image' (shape %s) must be at least three-dimensional." % 224 image.shape) 225 if require_static and not image_shape.is_fully_defined(): 226 raise ValueError('\'image\' must be fully defined.') 227 if any(x == 0 for x in image_shape[-3:]): 228 raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' % 229 image_shape) 230 if not image_shape[-3:].is_fully_defined(): 231 return [ 232 check_ops.assert_positive( 233 array_ops.shape(image)[-3:], 234 ["inner 3 dims of 'image.shape' " 235 'must be > 0.']), 236 check_ops.assert_greater_equal( 237 array_ops.rank(image), 238 3, 239 message="'image' must be at least three-dimensional.") 240 ] 241 else: 242 return [] 243 244 245def _AssertGrayscaleImage(image): 246 """Assert that we are working with a properly shaped grayscale image. 247 248 Performs the check statically if possible (i.e. if the shape 249 is statically known). Otherwise adds a control dependency 250 to an assert op that checks the dynamic shape. 251 252 Args: 253 image: >= 2-D Tensor of size [*, 1] 254 255 Raises: 256 ValueError: if image.shape is not a [>= 2] vector or if 257 last dimension is not size 1. 258 259 Returns: 260 If the shape of `image` could be verified statically, `image` is 261 returned unchanged, otherwise there will be a control dependency 262 added that asserts the correct dynamic shape. 263 """ 264 return control_flow_ops.with_dependencies( 265 _CheckGrayscaleImage(image, require_static=False), image) 266 267 268def _CheckGrayscaleImage(image, require_static=True): 269 """Assert that we are working with properly shaped grayscale image. 270 271 Args: 272 image: >= 2-D Tensor of size [*, 1] 273 require_static: Boolean, whether static shape is required. 274 275 Raises: 276 ValueError: if image.shape is not a [>= 2] vector or if 277 last dimension is not size 1. 278 279 Returns: 280 An empty list, if `image` has fully defined dimensions. Otherwise, a list 281 containing an assert op is returned. 282 """ 283 try: 284 if image.get_shape().ndims is None: 285 image_shape = image.get_shape().with_rank(2) 286 else: 287 image_shape = image.get_shape().with_rank_at_least(2) 288 except ValueError: 289 raise ValueError('A grayscale image (shape %s) must be at least ' 290 'two-dimensional.' % image.shape) 291 if require_static and not image_shape.is_fully_defined(): 292 raise ValueError('\'image\' must be fully defined.') 293 if image_shape.is_fully_defined(): 294 if image_shape[-1] != 1: 295 raise ValueError('Last dimension of a grayscale image should be size 1.') 296 if not image_shape.is_fully_defined(): 297 return [ 298 check_ops.assert_equal( 299 array_ops.shape(image)[-1], 300 1, 301 message='Last dimension of a grayscale image should be size 1.'), 302 check_ops.assert_greater_equal( 303 array_ops.rank(image), 304 3, 305 message='A grayscale image must be at least two-dimensional.') 306 ] 307 else: 308 return [] 309 310 311def fix_image_flip_shape(image, result): 312 """Set the shape to 3 dimensional if we don't know anything else. 313 314 Args: 315 image: original image size 316 result: flipped or transformed image 317 318 Returns: 319 An image whose shape is at least (None, None, None). 320 """ 321 322 image_shape = image.get_shape() 323 if image_shape == tensor_shape.unknown_shape(): 324 result.set_shape([None, None, None]) 325 else: 326 result.set_shape(image_shape) 327 return result 328 329 330@tf_export('image.random_flip_up_down') 331@dispatch.add_dispatch_support 332def random_flip_up_down(image, seed=None): 333 """Randomly flips an image vertically (upside down). 334 335 With a 1 in 2 chance, outputs the contents of `image` flipped along the first 336 dimension, which is `height`. Otherwise, output the image as-is. 337 When passing a batch of images, each image will be randomly flipped 338 independent of other images. 339 340 Example usage: 341 342 >>> image = np.array([[[1], [2]], [[3], [4]]]) 343 >>> tf.image.random_flip_up_down(image, 3).numpy().tolist() 344 [[[3], [4]], [[1], [2]]] 345 346 Randomly flip multiple images. 347 348 >>> images = np.array( 349 ... [ 350 ... [[[1], [2]], [[3], [4]]], 351 ... [[[5], [6]], [[7], [8]]] 352 ... ]) 353 >>> tf.image.random_flip_up_down(images, 4).numpy().tolist() 354 [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]] 355 356 For producing deterministic results given a `seed` value, use 357 `tf.image.stateless_random_flip_up_down`. Unlike using the `seed` param 358 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 359 same results given the same seed independent of how many times the function is 360 called, and independent of global seed settings (e.g. tf.random.set_seed). 361 362 Args: 363 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 364 of shape `[height, width, channels]`. 365 seed: A Python integer. Used to create a random seed. See 366 `tf.compat.v1.set_random_seed` for behavior. 367 368 Returns: 369 A tensor of the same type and shape as `image`. 370 Raises: 371 ValueError: if the shape of `image` not supported. 372 """ 373 random_func = functools.partial(random_ops.random_uniform, seed=seed) 374 return _random_flip(image, 0, random_func, 'random_flip_up_down') 375 376 377@tf_export('image.random_flip_left_right') 378@dispatch.add_dispatch_support 379def random_flip_left_right(image, seed=None): 380 """Randomly flip an image horizontally (left to right). 381 382 With a 1 in 2 chance, outputs the contents of `image` flipped along the 383 second dimension, which is `width`. Otherwise output the image as-is. 384 When passing a batch of images, each image will be randomly flipped 385 independent of other images. 386 387 Example usage: 388 389 >>> image = np.array([[[1], [2]], [[3], [4]]]) 390 >>> tf.image.random_flip_left_right(image, 5).numpy().tolist() 391 [[[2], [1]], [[4], [3]]] 392 393 Randomly flip multiple images. 394 395 >>> images = np.array( 396 ... [ 397 ... [[[1], [2]], [[3], [4]]], 398 ... [[[5], [6]], [[7], [8]]] 399 ... ]) 400 >>> tf.image.random_flip_left_right(images, 6).numpy().tolist() 401 [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]] 402 403 For producing deterministic results given a `seed` value, use 404 `tf.image.stateless_random_flip_left_right`. Unlike using the `seed` param 405 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 406 same results given the same seed independent of how many times the function is 407 called, and independent of global seed settings (e.g. tf.random.set_seed). 408 409 Args: 410 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 411 of shape `[height, width, channels]`. 412 seed: A Python integer. Used to create a random seed. See 413 `tf.compat.v1.set_random_seed` for behavior. 414 415 Returns: 416 A tensor of the same type and shape as `image`. 417 418 Raises: 419 ValueError: if the shape of `image` not supported. 420 """ 421 random_func = functools.partial(random_ops.random_uniform, seed=seed) 422 return _random_flip(image, 1, random_func, 'random_flip_left_right') 423 424 425@tf_export('image.stateless_random_flip_left_right', v1=[]) 426@dispatch.add_dispatch_support 427def stateless_random_flip_left_right(image, seed): 428 """Randomly flip an image horizontally (left to right) deterministically. 429 430 Guarantees the same results given the same `seed` independent of how many 431 times the function is called, and independent of global seed settings (e.g. 432 `tf.random.set_seed`). 433 434 Example usage: 435 436 >>> image = np.array([[[1], [2]], [[3], [4]]]) 437 >>> seed = (2, 3) 438 >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist() 439 [[[2], [1]], [[4], [3]]] 440 441 Args: 442 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 443 of shape `[height, width, channels]`. 444 seed: A shape [2] Tensor, the seed to the random number generator. Must have 445 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 446 447 Returns: 448 A tensor of the same type and shape as `image`. 449 """ 450 random_func = functools.partial( 451 stateless_random_ops.stateless_random_uniform, seed=seed) 452 return _random_flip( 453 image, 1, random_func, 'stateless_random_flip_left_right') 454 455 456@tf_export('image.stateless_random_flip_up_down', v1=[]) 457@dispatch.add_dispatch_support 458def stateless_random_flip_up_down(image, seed): 459 """Randomly flip an image vertically (upside down) deterministically. 460 461 Guarantees the same results given the same `seed` independent of how many 462 times the function is called, and independent of global seed settings (e.g. 463 `tf.random.set_seed`). 464 465 Example usage: 466 467 >>> image = np.array([[[1], [2]], [[3], [4]]]) 468 >>> seed = (2, 3) 469 >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist() 470 [[[3], [4]], [[1], [2]]] 471 472 Args: 473 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 474 of shape `[height, width, channels]`. 475 seed: A shape [2] Tensor, the seed to the random number generator. Must have 476 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 477 478 Returns: 479 A tensor of the same type and shape as `image`. 480 """ 481 random_func = functools.partial( 482 stateless_random_ops.stateless_random_uniform, seed=seed) 483 return _random_flip( 484 image, 0, random_func, 'stateless_random_flip_up_down') 485 486 487def _random_flip(image, flip_index, random_func, scope_name): 488 """Randomly (50% chance) flip an image along axis `flip_index`. 489 490 Args: 491 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 492 of shape `[height, width, channels]`. 493 flip_index: Dimension along which to flip the image. 494 Vertical is 0, Horizontal is 1. 495 random_func: partial function for calling either stateful or stateless 496 random ops with `seed` parameter specified. 497 scope_name: Name of the scope in which the ops are added. 498 499 Returns: 500 A tensor of the same type and shape as `image`. 501 502 Raises: 503 ValueError: if the shape of `image` not supported. 504 """ 505 with ops.name_scope(None, scope_name, [image]) as scope: 506 image = ops.convert_to_tensor(image, name='image') 507 image = _AssertAtLeast3DImage(image) 508 shape = image.get_shape() 509 510 def f_rank3(): 511 uniform_random = random_func(shape=[], minval=0, maxval=1.0) 512 mirror_cond = math_ops.less(uniform_random, .5) 513 result = control_flow_ops.cond( 514 mirror_cond, 515 lambda: array_ops.reverse(image, [flip_index]), 516 lambda: image, 517 name=scope) 518 return fix_image_flip_shape(image, result) 519 520 def f_rank4(): 521 batch_size = array_ops.shape(image)[0] 522 uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0) 523 flips = math_ops.round( 524 array_ops.reshape(uniform_random, [batch_size, 1, 1, 1])) 525 flips = math_ops.cast(flips, image.dtype) 526 flipped_input = array_ops.reverse(image, [flip_index + 1]) 527 return flips * flipped_input + (1 - flips) * image 528 529 if shape.ndims is None: 530 rank = array_ops.rank(image) 531 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 532 if shape.ndims == 3: 533 return f_rank3() 534 elif shape.ndims == 4: 535 return f_rank4() 536 else: 537 raise ValueError( 538 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 539 540 541@tf_export('image.flip_left_right') 542@dispatch.add_dispatch_support 543def flip_left_right(image): 544 """Flip an image horizontally (left to right). 545 546 Outputs the contents of `image` flipped along the width dimension. 547 548 See also `tf.reverse`. 549 550 Usage Example: 551 552 >>> x = [[[1.0, 2.0, 3.0], 553 ... [4.0, 5.0, 6.0]], 554 ... [[7.0, 8.0, 9.0], 555 ... [10.0, 11.0, 12.0]]] 556 >>> tf.image.flip_left_right(x) 557 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 558 array([[[ 4., 5., 6.], 559 [ 1., 2., 3.]], 560 [[10., 11., 12.], 561 [ 7., 8., 9.]]], dtype=float32)> 562 563 Args: 564 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 565 of shape `[height, width, channels]`. 566 567 Returns: 568 A tensor of the same type and shape as `image`. 569 570 Raises: 571 ValueError: if the shape of `image` not supported. 572 """ 573 return _flip(image, 1, 'flip_left_right') 574 575 576@tf_export('image.flip_up_down') 577@dispatch.add_dispatch_support 578def flip_up_down(image): 579 """Flip an image vertically (upside down). 580 581 Outputs the contents of `image` flipped along the height dimension. 582 583 See also `reverse()`. 584 585 Usage Example: 586 587 >>> x = [[[1.0, 2.0, 3.0], 588 ... [4.0, 5.0, 6.0]], 589 ... [[7.0, 8.0, 9.0], 590 ... [10.0, 11.0, 12.0]]] 591 >>> tf.image.flip_up_down(x) 592 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 593 array([[[ 7., 8., 9.], 594 [10., 11., 12.]], 595 [[ 1., 2., 3.], 596 [ 4., 5., 6.]]], dtype=float32)> 597 598 Args: 599 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 600 of shape `[height, width, channels]`. 601 602 Returns: 603 A `Tensor` of the same type and shape as `image`. 604 605 Raises: 606 ValueError: if the shape of `image` not supported. 607 """ 608 return _flip(image, 0, 'flip_up_down') 609 610 611def _flip(image, flip_index, scope_name): 612 """Flip an image either horizontally or vertically. 613 614 Outputs the contents of `image` flipped along the dimension `flip_index`. 615 616 See also `reverse()`. 617 618 Args: 619 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 620 of shape `[height, width, channels]`. 621 flip_index: 0 For vertical, 1 for horizontal. 622 scope_name: string, scope name. 623 624 Returns: 625 A `Tensor` of the same type and shape as `image`. 626 627 Raises: 628 ValueError: if the shape of `image` not supported. 629 """ 630 with ops.name_scope(None, scope_name, [image]): 631 image = ops.convert_to_tensor(image, name='image') 632 image = _AssertAtLeast3DImage(image) 633 shape = image.get_shape() 634 635 def f_rank3(): 636 return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index])) 637 638 def f_rank4(): 639 return array_ops.reverse(image, [flip_index + 1]) 640 641 if shape.ndims is None: 642 rank = array_ops.rank(image) 643 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 644 elif shape.ndims == 3: 645 return f_rank3() 646 elif shape.ndims == 4: 647 return f_rank4() 648 else: 649 raise ValueError( 650 '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape) 651 652 653@tf_export('image.rot90') 654@dispatch.add_dispatch_support 655def rot90(image, k=1, name=None): 656 """Rotate image(s) counter-clockwise by 90 degrees. 657 658 659 For example: 660 661 >>> a=tf.constant([[[1],[2]], 662 ... [[3],[4]]]) 663 >>> # rotating `a` counter clockwise by 90 degrees 664 >>> a_rot=tf.image.rot90(a) 665 >>> print(a_rot[...,0].numpy()) 666 [[2 4] 667 [1 3]] 668 >>> # rotating `a` counter clockwise by 270 degrees 669 >>> a_rot=tf.image.rot90(a, k=3) 670 >>> print(a_rot[...,0].numpy()) 671 [[3 1] 672 [4 2]] 673 674 Args: 675 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 676 of shape `[height, width, channels]`. 677 k: A scalar integer. The number of times the image is rotated by 90 degrees. 678 name: A name for this operation (optional). 679 680 Returns: 681 A rotated tensor of the same type and shape as `image`. 682 683 Raises: 684 ValueError: if the shape of `image` not supported. 685 """ 686 with ops.name_scope(name, 'rot90', [image, k]) as scope: 687 image = ops.convert_to_tensor(image, name='image') 688 image = _AssertAtLeast3DImage(image) 689 k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k') 690 k.get_shape().assert_has_rank(0) 691 k = math_ops.mod(k, 4) 692 693 shape = image.get_shape() 694 if shape.ndims is None: 695 rank = array_ops.rank(image) 696 697 def f_rank3(): 698 return _rot90_3D(image, k, scope) 699 700 def f_rank4(): 701 return _rot90_4D(image, k, scope) 702 703 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 704 elif shape.ndims == 3: 705 return _rot90_3D(image, k, scope) 706 elif shape.ndims == 4: 707 return _rot90_4D(image, k, scope) 708 else: 709 raise ValueError( 710 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 711 712 713def _rot90_3D(image, k, name_scope): 714 """Rotate image counter-clockwise by 90 degrees `k` times. 715 716 Args: 717 image: 3-D Tensor of shape `[height, width, channels]`. 718 k: A scalar integer. The number of times the image is rotated by 90 degrees. 719 name_scope: A valid TensorFlow name scope. 720 721 Returns: 722 A 3-D tensor of the same type and shape as `image`. 723 724 """ 725 726 def _rot90(): 727 return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2]) 728 729 def _rot180(): 730 return array_ops.reverse_v2(image, [0, 1]) 731 732 def _rot270(): 733 return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1]) 734 735 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), 736 (math_ops.equal(k, 3), _rot270)] 737 738 result = control_flow_ops.case( 739 cases, default=lambda: image, exclusive=True, name=name_scope) 740 result.set_shape([None, None, image.get_shape()[2]]) 741 return result 742 743 744def _rot90_4D(images, k, name_scope): 745 """Rotate batch of images counter-clockwise by 90 degrees `k` times. 746 747 Args: 748 images: 4-D Tensor of shape `[height, width, channels]`. 749 k: A scalar integer. The number of times the images are rotated by 90 750 degrees. 751 name_scope: A valid TensorFlow name scope. 752 753 Returns: 754 A 4-D `Tensor` of the same type and shape as `images`. 755 """ 756 757 def _rot90(): 758 return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3]) 759 760 def _rot180(): 761 return array_ops.reverse_v2(images, [1, 2]) 762 763 def _rot270(): 764 return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2]) 765 766 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), 767 (math_ops.equal(k, 3), _rot270)] 768 769 result = control_flow_ops.case( 770 cases, default=lambda: images, exclusive=True, name=name_scope) 771 shape = result.get_shape() 772 result.set_shape([shape[0], None, None, shape[3]]) 773 return result 774 775 776@tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image']) 777@dispatch.add_dispatch_support 778def transpose(image, name=None): 779 """Transpose image(s) by swapping the height and width dimension. 780 781 Usage Example: 782 783 >>> x = [[[1.0, 2.0, 3.0], 784 ... [4.0, 5.0, 6.0]], 785 ... [[7.0, 8.0, 9.0], 786 ... [10.0, 11.0, 12.0]]] 787 >>> tf.image.transpose(x) 788 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 789 array([[[ 1., 2., 3.], 790 [ 7., 8., 9.]], 791 [[ 4., 5., 6.], 792 [10., 11., 12.]]], dtype=float32)> 793 794 Args: 795 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 796 of shape `[height, width, channels]`. 797 name: A name for this operation (optional). 798 799 Returns: 800 If `image` was 4-D, a 4-D float Tensor of shape 801 `[batch, width, height, channels]` 802 If `image` was 3-D, a 3-D float Tensor of shape 803 `[width, height, channels]` 804 805 Raises: 806 ValueError: if the shape of `image` not supported. 807 808 Usage Example: 809 810 >>> image = [[[1, 2], [3, 4]], 811 ... [[5, 6], [7, 8]], 812 ... [[9, 10], [11, 12]]] 813 >>> image = tf.constant(image) 814 >>> tf.image.transpose(image) 815 <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy= 816 array([[[ 1, 2], 817 [ 5, 6], 818 [ 9, 10]], 819 [[ 3, 4], 820 [ 7, 8], 821 [11, 12]]], dtype=int32)> 822 """ 823 with ops.name_scope(name, 'transpose', [image]): 824 image = ops.convert_to_tensor(image, name='image') 825 image = _AssertAtLeast3DImage(image) 826 shape = image.get_shape() 827 if shape.ndims is None: 828 rank = array_ops.rank(image) 829 830 def f_rank3(): 831 return array_ops.transpose(image, [1, 0, 2], name=name) 832 833 def f_rank4(): 834 return array_ops.transpose(image, [0, 2, 1, 3], name=name) 835 836 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 837 elif shape.ndims == 3: 838 return array_ops.transpose(image, [1, 0, 2], name=name) 839 elif shape.ndims == 4: 840 return array_ops.transpose(image, [0, 2, 1, 3], name=name) 841 else: 842 raise ValueError( 843 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 844 845 846@tf_export('image.central_crop') 847@dispatch.add_dispatch_support 848def central_crop(image, central_fraction): 849 """Crop the central region of the image(s). 850 851 Remove the outer parts of an image but retain the central region of the image 852 along each dimension. If we specify central_fraction = 0.5, this function 853 returns the region marked with "X" in the below diagram. 854 855 -------- 856 | | 857 | XXXX | 858 | XXXX | 859 | | where "X" is the central 50% of the image. 860 -------- 861 862 This function works on either a single image (`image` is a 3-D Tensor), or a 863 batch of images (`image` is a 4-D Tensor). 864 865 Usage Example: 866 867 >>> x = [[[1.0, 2.0, 3.0], 868 ... [4.0, 5.0, 6.0], 869 ... [7.0, 8.0, 9.0], 870 ... [10.0, 11.0, 12.0]], 871 ... [[13.0, 14.0, 15.0], 872 ... [16.0, 17.0, 18.0], 873 ... [19.0, 20.0, 21.0], 874 ... [22.0, 23.0, 24.0]], 875 ... [[25.0, 26.0, 27.0], 876 ... [28.0, 29.0, 30.0], 877 ... [31.0, 32.0, 33.0], 878 ... [34.0, 35.0, 36.0]], 879 ... [[37.0, 38.0, 39.0], 880 ... [40.0, 41.0, 42.0], 881 ... [43.0, 44.0, 45.0], 882 ... [46.0, 47.0, 48.0]]] 883 >>> tf.image.central_crop(x, 0.5) 884 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 885 array([[[16., 17., 18.], 886 [19., 20., 21.]], 887 [[28., 29., 30.], 888 [31., 32., 33.]]], dtype=float32)> 889 890 Args: 891 image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D 892 Tensor of shape [batch_size, height, width, depth]. 893 central_fraction: float (0, 1], fraction of size to crop 894 895 Raises: 896 ValueError: if central_crop_fraction is not within (0, 1]. 897 898 Returns: 899 3-D / 4-D float Tensor, as per the input. 900 """ 901 with ops.name_scope(None, 'central_crop', [image]): 902 image = ops.convert_to_tensor(image, name='image') 903 central_fraction_static = tensor_util.constant_value(central_fraction) 904 if central_fraction_static is not None: 905 if central_fraction_static <= 0.0 or central_fraction_static > 1.0: 906 raise ValueError('central_fraction must be within (0, 1]') 907 if central_fraction_static == 1.0: 908 return image 909 else: 910 assert_ops = _assert( 911 math_ops.logical_or(central_fraction > 0.0, central_fraction <= 1.0), 912 ValueError, 'central_fraction must be within (0, 1]') 913 image = control_flow_ops.with_dependencies(assert_ops, image) 914 915 _AssertAtLeast3DImage(image) 916 rank = image.get_shape().ndims 917 if rank != 3 and rank != 4: 918 raise ValueError('`image` should either be a Tensor with rank = 3 or ' 919 'rank = 4. Had rank = {}.'.format(rank)) 920 921 # Helper method to return the `idx`-th dimension of `tensor`, along with 922 # a boolean signifying if the dimension is dynamic. 923 def _get_dim(tensor, idx): 924 static_shape = tensor.get_shape().dims[idx].value 925 if static_shape is not None: 926 return static_shape, False 927 return array_ops.shape(tensor)[idx], True 928 929 # Get the height, width, depth (and batch size, if the image is a 4-D 930 # tensor). 931 if rank == 3: 932 img_h, dynamic_h = _get_dim(image, 0) 933 img_w, dynamic_w = _get_dim(image, 1) 934 img_d = image.get_shape()[2] 935 else: 936 img_bs = image.get_shape()[0] 937 img_h, dynamic_h = _get_dim(image, 1) 938 img_w, dynamic_w = _get_dim(image, 2) 939 img_d = image.get_shape()[3] 940 941 dynamic_h = dynamic_h or (central_fraction_static is None) 942 dynamic_w = dynamic_w or (central_fraction_static is None) 943 944 # Compute the bounding boxes for the crop. The type and value of the 945 # bounding boxes depend on the `image` tensor's rank and whether / not the 946 # dimensions are statically defined. 947 if dynamic_h: 948 img_hd = math_ops.cast(img_h, dtypes.float64) 949 bbox_h_start = math_ops.cast( 950 (img_hd - img_hd * math_ops.cast(central_fraction, dtypes.float64)) / 951 2, dtypes.int32) 952 else: 953 img_hd = float(img_h) 954 bbox_h_start = int((img_hd - img_hd * central_fraction_static) / 2) 955 956 if dynamic_w: 957 img_wd = math_ops.cast(img_w, dtypes.float64) 958 bbox_w_start = math_ops.cast( 959 (img_wd - img_wd * math_ops.cast(central_fraction, dtypes.float64)) / 960 2, dtypes.int32) 961 else: 962 img_wd = float(img_w) 963 bbox_w_start = int((img_wd - img_wd * central_fraction_static) / 2) 964 965 bbox_h_size = img_h - bbox_h_start * 2 966 bbox_w_size = img_w - bbox_w_start * 2 967 968 if rank == 3: 969 bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0]) 970 bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1]) 971 else: 972 bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0]) 973 bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1]) 974 975 image = array_ops.slice(image, bbox_begin, bbox_size) 976 977 # Reshape the `image` tensor to the desired size. 978 if rank == 3: 979 image.set_shape([ 980 None if dynamic_h else bbox_h_size, 981 None if dynamic_w else bbox_w_size, img_d 982 ]) 983 else: 984 image.set_shape([ 985 img_bs, None if dynamic_h else bbox_h_size, 986 None if dynamic_w else bbox_w_size, img_d 987 ]) 988 return image 989 990 991@tf_export('image.pad_to_bounding_box') 992@dispatch.add_dispatch_support 993def pad_to_bounding_box(image, offset_height, offset_width, target_height, 994 target_width): 995 """Pad `image` with zeros to the specified `height` and `width`. 996 997 Adds `offset_height` rows of zeros on top, `offset_width` columns of 998 zeros on the left, and then pads the image on the bottom and right 999 with zeros until it has dimensions `target_height`, `target_width`. 1000 1001 This op does nothing if `offset_*` is zero and the image already has size 1002 `target_height` by `target_width`. 1003 1004 Usage Example: 1005 1006 >>> x = [[[1., 2., 3.], 1007 ... [4., 5., 6.]], 1008 ... [[7., 8., 9.], 1009 ... [10., 11., 12.]]] 1010 >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4) 1011 >>> padded_image 1012 <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy= 1013 array([[[ 0., 0., 0.], 1014 [ 0., 0., 0.], 1015 [ 0., 0., 0.], 1016 [ 0., 0., 0.]], 1017 [[ 0., 0., 0.], 1018 [ 1., 2., 3.], 1019 [ 4., 5., 6.], 1020 [ 0., 0., 0.]], 1021 [[ 0., 0., 0.], 1022 [ 7., 8., 9.], 1023 [10., 11., 12.], 1024 [ 0., 0., 0.]], 1025 [[ 0., 0., 0.], 1026 [ 0., 0., 0.], 1027 [ 0., 0., 0.], 1028 [ 0., 0., 0.]]], dtype=float32)> 1029 1030 Args: 1031 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1032 of shape `[height, width, channels]`. 1033 offset_height: Number of rows of zeros to add on top. 1034 offset_width: Number of columns of zeros to add on the left. 1035 target_height: Height of output image. 1036 target_width: Width of output image. 1037 1038 Returns: 1039 If `image` was 4-D, a 4-D float Tensor of shape 1040 `[batch, target_height, target_width, channels]` 1041 If `image` was 3-D, a 3-D float Tensor of shape 1042 `[target_height, target_width, channels]` 1043 1044 Raises: 1045 ValueError: If the shape of `image` is incompatible with the `offset_*` or 1046 `target_*` arguments, or either `offset_height` or `offset_width` is 1047 negative. 1048 """ 1049 with ops.name_scope(None, 'pad_to_bounding_box', [image]): 1050 image = ops.convert_to_tensor(image, name='image') 1051 1052 is_batch = True 1053 image_shape = image.get_shape() 1054 if image_shape.ndims == 3: 1055 is_batch = False 1056 image = array_ops.expand_dims(image, 0) 1057 elif image_shape.ndims is None: 1058 is_batch = False 1059 image = array_ops.expand_dims(image, 0) 1060 image.set_shape([None] * 4) 1061 elif image_shape.ndims != 4: 1062 raise ValueError( 1063 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1064 image_shape) 1065 1066 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1067 batch, height, width, depth = _ImageDimensions(image, rank=4) 1068 1069 after_padding_width = target_width - offset_width - width 1070 1071 after_padding_height = target_height - offset_height - height 1072 1073 assert_ops += _assert(offset_height >= 0, ValueError, 1074 'offset_height must be >= 0') 1075 assert_ops += _assert(offset_width >= 0, ValueError, 1076 'offset_width must be >= 0') 1077 assert_ops += _assert(after_padding_width >= 0, ValueError, 1078 'width must be <= target - offset') 1079 assert_ops += _assert(after_padding_height >= 0, ValueError, 1080 'height must be <= target - offset') 1081 image = control_flow_ops.with_dependencies(assert_ops, image) 1082 1083 # Do not pad on the depth dimensions. 1084 paddings = array_ops.reshape( 1085 array_ops.stack([ 1086 0, 0, offset_height, after_padding_height, offset_width, 1087 after_padding_width, 0, 0 1088 ]), [4, 2]) 1089 padded = array_ops.pad(image, paddings) 1090 1091 padded_shape = [ 1092 None if _is_tensor(i) else i 1093 for i in [batch, target_height, target_width, depth] 1094 ] 1095 padded.set_shape(padded_shape) 1096 1097 if not is_batch: 1098 padded = array_ops.squeeze(padded, axis=[0]) 1099 1100 return padded 1101 1102 1103@tf_export('image.crop_to_bounding_box') 1104@dispatch.add_dispatch_support 1105def crop_to_bounding_box(image, offset_height, offset_width, target_height, 1106 target_width): 1107 """Crops an `image` to a specified bounding box. 1108 1109 This op cuts a rectangular bounding box out of `image`. The top-left corner 1110 of the bounding box is at `offset_height, offset_width` in `image`, and the 1111 lower-right corner is at 1112 `offset_height + target_height, offset_width + target_width`. 1113 1114 Example Usage: 1115 1116 >>> image = tf.constant(np.arange(1, 28, dtype=np.float32), shape=[3, 3, 3]) 1117 >>> image[:,:,0] # print the first channel of the 3-D tensor 1118 <tf.Tensor: shape=(3, 3), dtype=float32, numpy= 1119 array([[ 1., 4., 7.], 1120 [10., 13., 16.], 1121 [19., 22., 25.]], dtype=float32)> 1122 >>> cropped_image = tf.image.crop_to_bounding_box(image, 0, 0, 2, 2) 1123 >>> cropped_image[:,:,0] # print the first channel of the cropped 3-D tensor 1124 <tf.Tensor: shape=(2, 2), dtype=float32, numpy= 1125 array([[ 1., 4.], 1126 [10., 13.]], dtype=float32)> 1127 1128 Args: 1129 image: 4-D `Tensor` of shape `[batch, height, width, channels]` or 3-D 1130 `Tensor` of shape `[height, width, channels]`. 1131 offset_height: Vertical coordinate of the top-left corner of the bounding 1132 box in `image`. 1133 offset_width: Horizontal coordinate of the top-left corner of the bounding 1134 box in `image`. 1135 target_height: Height of the bounding box. 1136 target_width: Width of the bounding box. 1137 1138 Returns: 1139 If `image` was 4-D, a 4-D `Tensor` of shape 1140 `[batch, target_height, target_width, channels]`. 1141 If `image` was 3-D, a 3-D `Tensor` of shape 1142 `[target_height, target_width, channels]`. 1143 It has the same dtype with `image`. 1144 1145 Raises: 1146 ValueError: `image` is not a 3-D or 4-D `Tensor`. 1147 ValueError: `offset_width < 0` or `offset_height < 0`. 1148 ValueError: `target_width <= 0` or `target_width <= 0`. 1149 ValueError: `width < offset_width + target_width` or 1150 `height < offset_height + target_height`. 1151 """ 1152 with ops.name_scope(None, 'crop_to_bounding_box', [image]): 1153 image = ops.convert_to_tensor(image, name='image') 1154 1155 is_batch = True 1156 image_shape = image.get_shape() 1157 if image_shape.ndims == 3: 1158 is_batch = False 1159 image = array_ops.expand_dims(image, 0) 1160 elif image_shape.ndims is None: 1161 is_batch = False 1162 image = array_ops.expand_dims(image, 0) 1163 image.set_shape([None] * 4) 1164 elif image_shape.ndims != 4: 1165 raise ValueError( 1166 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1167 image_shape) 1168 1169 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1170 1171 batch, height, width, depth = _ImageDimensions(image, rank=4) 1172 1173 assert_ops += _assert(offset_width >= 0, ValueError, 1174 'offset_width must be >= 0.') 1175 assert_ops += _assert(offset_height >= 0, ValueError, 1176 'offset_height must be >= 0.') 1177 assert_ops += _assert(target_width > 0, ValueError, 1178 'target_width must be > 0.') 1179 assert_ops += _assert(target_height > 0, ValueError, 1180 'target_height must be > 0.') 1181 assert_ops += _assert(width >= (target_width + offset_width), ValueError, 1182 'width must be >= target + offset.') 1183 assert_ops += _assert(height >= (target_height + offset_height), ValueError, 1184 'height must be >= target + offset.') 1185 image = control_flow_ops.with_dependencies(assert_ops, image) 1186 1187 cropped = array_ops.slice( 1188 image, array_ops.stack([0, offset_height, offset_width, 0]), 1189 array_ops.stack([array_ops.shape(image)[0], target_height, target_width, 1190 array_ops.shape(image)[3]])) 1191 1192 cropped_shape = [ 1193 None if _is_tensor(i) else i 1194 for i in [batch, target_height, target_width, depth] 1195 ] 1196 cropped.set_shape(cropped_shape) 1197 1198 if not is_batch: 1199 cropped = array_ops.squeeze(cropped, axis=[0]) 1200 1201 return cropped 1202 1203 1204@tf_export( 1205 'image.resize_with_crop_or_pad', 1206 v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad']) 1207@dispatch.add_dispatch_support 1208def resize_image_with_crop_or_pad(image, target_height, target_width): 1209 """Crops and/or pads an image to a target width and height. 1210 1211 Resizes an image to a target width and height by either centrally 1212 cropping the image or padding it evenly with zeros. 1213 1214 If `width` or `height` is greater than the specified `target_width` or 1215 `target_height` respectively, this op centrally crops along that dimension. 1216 1217 For example: 1218 1219 >>> image = np.arange(75).reshape(5, 5, 3) # create 3-D image input 1220 >>> image[:,:,0] # print first channel just for demo purposes 1221 array([[ 0, 3, 6, 9, 12], 1222 [15, 18, 21, 24, 27], 1223 [30, 33, 36, 39, 42], 1224 [45, 48, 51, 54, 57], 1225 [60, 63, 66, 69, 72]]) 1226 >>> image = tf.image.resize_with_crop_or_pad(image, 3, 3) # crop 1227 >>> # print first channel for demo purposes; centrally cropped output 1228 >>> image[:,:,0] 1229 <tf.Tensor: shape=(3, 3), dtype=int64, numpy= 1230 array([[18, 21, 24], 1231 [33, 36, 39], 1232 [48, 51, 54]])> 1233 1234 If `width` or `height` is smaller than the specified `target_width` or 1235 `target_height` respectively, this op centrally pads with 0 along that 1236 dimension. 1237 1238 For example: 1239 1240 >>> image = np.arange(1, 28).reshape(3, 3, 3) # create 3-D image input 1241 >>> image[:,:,0] # print first channel just for demo purposes 1242 array([[ 1, 4, 7], 1243 [10, 13, 16], 1244 [19, 22, 25]]) 1245 >>> image = tf.image.resize_with_crop_or_pad(image, 5, 5) # pad 1246 >>> # print first channel for demo purposes; we should see 0 paddings 1247 >>> image[:,:,0] 1248 <tf.Tensor: shape=(5, 5), dtype=int64, numpy= 1249 array([[ 0, 0, 0, 0, 0], 1250 [ 0, 1, 4, 7, 0], 1251 [ 0, 10, 13, 16, 0], 1252 [ 0, 19, 22, 25, 0], 1253 [ 0, 0, 0, 0, 0]])> 1254 1255 Args: 1256 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1257 of shape `[height, width, channels]`. 1258 target_height: Target height. 1259 target_width: Target width. 1260 1261 Raises: 1262 ValueError: if `target_height` or `target_width` are zero or negative. 1263 1264 Returns: 1265 Cropped and/or padded image. 1266 If `images` was 4-D, a 4-D float Tensor of shape 1267 `[batch, new_height, new_width, channels]`. 1268 If `images` was 3-D, a 3-D float Tensor of shape 1269 `[new_height, new_width, channels]`. 1270 """ 1271 with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]): 1272 image = ops.convert_to_tensor(image, name='image') 1273 image_shape = image.get_shape() 1274 is_batch = True 1275 if image_shape.ndims == 3: 1276 is_batch = False 1277 image = array_ops.expand_dims(image, 0) 1278 elif image_shape.ndims is None: 1279 is_batch = False 1280 image = array_ops.expand_dims(image, 0) 1281 image.set_shape([None] * 4) 1282 elif image_shape.ndims != 4: 1283 raise ValueError( 1284 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1285 image_shape) 1286 1287 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1288 assert_ops += _assert(target_width > 0, ValueError, 1289 'target_width must be > 0.') 1290 assert_ops += _assert(target_height > 0, ValueError, 1291 'target_height must be > 0.') 1292 1293 image = control_flow_ops.with_dependencies(assert_ops, image) 1294 # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks. 1295 # Make sure our checks come first, so that error messages are clearer. 1296 if _is_tensor(target_height): 1297 target_height = control_flow_ops.with_dependencies( 1298 assert_ops, target_height) 1299 if _is_tensor(target_width): 1300 target_width = control_flow_ops.with_dependencies(assert_ops, 1301 target_width) 1302 1303 def max_(x, y): 1304 if _is_tensor(x) or _is_tensor(y): 1305 return math_ops.maximum(x, y) 1306 else: 1307 return max(x, y) 1308 1309 def min_(x, y): 1310 if _is_tensor(x) or _is_tensor(y): 1311 return math_ops.minimum(x, y) 1312 else: 1313 return min(x, y) 1314 1315 def equal_(x, y): 1316 if _is_tensor(x) or _is_tensor(y): 1317 return math_ops.equal(x, y) 1318 else: 1319 return x == y 1320 1321 _, height, width, _ = _ImageDimensions(image, rank=4) 1322 width_diff = target_width - width 1323 offset_crop_width = max_(-width_diff // 2, 0) 1324 offset_pad_width = max_(width_diff // 2, 0) 1325 1326 height_diff = target_height - height 1327 offset_crop_height = max_(-height_diff // 2, 0) 1328 offset_pad_height = max_(height_diff // 2, 0) 1329 1330 # Maybe crop if needed. 1331 cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width, 1332 min_(target_height, height), 1333 min_(target_width, width)) 1334 1335 # Maybe pad if needed. 1336 resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width, 1337 target_height, target_width) 1338 1339 # In theory all the checks below are redundant. 1340 if resized.get_shape().ndims is None: 1341 raise ValueError('resized contains no shape.') 1342 1343 _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4) 1344 1345 assert_ops = [] 1346 assert_ops += _assert( 1347 equal_(resized_height, target_height), ValueError, 1348 'resized height is not correct.') 1349 assert_ops += _assert( 1350 equal_(resized_width, target_width), ValueError, 1351 'resized width is not correct.') 1352 1353 resized = control_flow_ops.with_dependencies(assert_ops, resized) 1354 1355 if not is_batch: 1356 resized = array_ops.squeeze(resized, axis=[0]) 1357 1358 return resized 1359 1360 1361@tf_export(v1=['image.ResizeMethod']) 1362class ResizeMethodV1(object): 1363 """See `v1.image.resize` for details.""" 1364 BILINEAR = 0 1365 NEAREST_NEIGHBOR = 1 1366 BICUBIC = 2 1367 AREA = 3 1368 1369 1370@tf_export('image.ResizeMethod', v1=[]) 1371class ResizeMethod(object): 1372 """See `tf.image.resize` for details.""" 1373 BILINEAR = 'bilinear' 1374 NEAREST_NEIGHBOR = 'nearest' 1375 BICUBIC = 'bicubic' 1376 AREA = 'area' 1377 LANCZOS3 = 'lanczos3' 1378 LANCZOS5 = 'lanczos5' 1379 GAUSSIAN = 'gaussian' 1380 MITCHELLCUBIC = 'mitchellcubic' 1381 1382 1383def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name, 1384 skip_resize_if_same): 1385 """Core functionality for v1 and v2 resize functions.""" 1386 with ops.name_scope(name, 'resize', [images, size]): 1387 images = ops.convert_to_tensor(images, name='images') 1388 if images.get_shape().ndims is None: 1389 raise ValueError('\'images\' contains no shape.') 1390 # TODO(shlens): Migrate this functionality to the underlying Op's. 1391 is_batch = True 1392 if images.get_shape().ndims == 3: 1393 is_batch = False 1394 images = array_ops.expand_dims(images, 0) 1395 elif images.get_shape().ndims != 4: 1396 raise ValueError('\'images\' must have either 3 or 4 dimensions.') 1397 1398 _, height, width, _ = images.get_shape().as_list() 1399 1400 try: 1401 size = ops.convert_to_tensor(size, dtypes.int32, name='size') 1402 except (TypeError, ValueError): 1403 raise ValueError('\'size\' must be a 1-D int32 Tensor') 1404 if not size.get_shape().is_compatible_with([2]): 1405 raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: ' 1406 'new_height, new_width') 1407 1408 if preserve_aspect_ratio: 1409 # Get the current shapes of the image, even if dynamic. 1410 _, current_height, current_width, _ = _ImageDimensions(images, rank=4) 1411 1412 # do the computation to find the right scale and height/width. 1413 scale_factor_height = ( 1414 math_ops.cast(size[0], dtypes.float32) / 1415 math_ops.cast(current_height, dtypes.float32)) 1416 scale_factor_width = ( 1417 math_ops.cast(size[1], dtypes.float32) / 1418 math_ops.cast(current_width, dtypes.float32)) 1419 scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width) 1420 scaled_height_const = math_ops.cast( 1421 math_ops.round(scale_factor * 1422 math_ops.cast(current_height, dtypes.float32)), 1423 dtypes.int32) 1424 scaled_width_const = math_ops.cast( 1425 math_ops.round(scale_factor * 1426 math_ops.cast(current_width, dtypes.float32)), 1427 dtypes.int32) 1428 1429 # NOTE: Reset the size and other constants used later. 1430 size = ops.convert_to_tensor([scaled_height_const, scaled_width_const], 1431 dtypes.int32, 1432 name='size') 1433 1434 size_const_as_shape = tensor_util.constant_value_as_shape(size) 1435 new_height_const = tensor_shape.dimension_at_index(size_const_as_shape, 1436 0).value 1437 new_width_const = tensor_shape.dimension_at_index(size_const_as_shape, 1438 1).value 1439 1440 # If we can determine that the height and width will be unmodified by this 1441 # transformation, we avoid performing the resize. 1442 if skip_resize_if_same and all( 1443 x is not None 1444 for x in [new_width_const, width, new_height_const, height]) and ( 1445 width == new_width_const and height == new_height_const): 1446 if not is_batch: 1447 images = array_ops.squeeze(images, axis=[0]) 1448 return images 1449 1450 images = resizer_fn(images, size) 1451 1452 # NOTE(mrry): The shape functions for the resize ops cannot unpack 1453 # the packed values in `new_size`, so set the shape here. 1454 images.set_shape([None, new_height_const, new_width_const, None]) 1455 1456 if not is_batch: 1457 images = array_ops.squeeze(images, axis=[0]) 1458 return images 1459 1460 1461@tf_export(v1=['image.resize_images', 'image.resize']) 1462@dispatch.add_dispatch_support 1463def resize_images(images, 1464 size, 1465 method=ResizeMethodV1.BILINEAR, 1466 align_corners=False, 1467 preserve_aspect_ratio=False, 1468 name=None): 1469 """Resize `images` to `size` using the specified `method`. 1470 1471 Resized images will be distorted if their original aspect ratio is not 1472 the same as `size`. To avoid distortions see 1473 `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`. 1474 1475 The `method` can be one of: 1476 1477 * <b>`tf.image.ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.]( 1478 https://en.wikipedia.org/wiki/Bilinear_interpolation) 1479 * <b>`tf.image.ResizeMethod.NEAREST_NEIGHBOR`</b>: [ 1480 Nearest neighbor interpolation.]( 1481 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) 1482 * <b>`tf.image.ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.]( 1483 https://en.wikipedia.org/wiki/Bicubic_interpolation) 1484 * <b>`tf.image.ResizeMethod.AREA`</b>: Area interpolation. 1485 1486 The return value has the same type as `images` if `method` is 1487 `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type 1488 as `images` if the size of `images` can be statically determined to be the 1489 same as `size`, because `images` is returned in this case. Otherwise, the 1490 return value has type `float32`. 1491 1492 Args: 1493 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1494 of shape `[height, width, channels]`. 1495 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new 1496 size for the images. 1497 method: ResizeMethod. Defaults to `tf.image.ResizeMethod.BILINEAR`. 1498 align_corners: bool. If True, the centers of the 4 corner pixels of the 1499 input and output tensors are aligned, preserving the values at the corner 1500 pixels. Defaults to `False`. 1501 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, 1502 then `images` will be resized to a size that fits in `size` while 1503 preserving the aspect ratio of the original image. Scales up the image if 1504 `size` is bigger than the current size of the `image`. Defaults to False. 1505 name: A name for this operation (optional). 1506 1507 Raises: 1508 ValueError: if the shape of `images` is incompatible with the 1509 shape arguments to this function 1510 ValueError: if `size` has invalid shape or type. 1511 ValueError: if an unsupported resize method is specified. 1512 1513 Returns: 1514 If `images` was 4-D, a 4-D float Tensor of shape 1515 `[batch, new_height, new_width, channels]`. 1516 If `images` was 3-D, a 3-D float Tensor of shape 1517 `[new_height, new_width, channels]`. 1518 """ 1519 1520 def resize_fn(images_t, new_size): 1521 """Legacy resize core function, passed to _resize_images_common.""" 1522 if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR: 1523 return gen_image_ops.resize_bilinear( 1524 images_t, new_size, align_corners=align_corners) 1525 elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or 1526 method == ResizeMethod.NEAREST_NEIGHBOR): 1527 return gen_image_ops.resize_nearest_neighbor( 1528 images_t, new_size, align_corners=align_corners) 1529 elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC: 1530 return gen_image_ops.resize_bicubic( 1531 images_t, new_size, align_corners=align_corners) 1532 elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA: 1533 return gen_image_ops.resize_area( 1534 images_t, new_size, align_corners=align_corners) 1535 else: 1536 raise ValueError('Resize method is not implemented: {}'.format(method)) 1537 1538 return _resize_images_common( 1539 images, 1540 resize_fn, 1541 size, 1542 preserve_aspect_ratio=preserve_aspect_ratio, 1543 name=name, 1544 skip_resize_if_same=True) 1545 1546 1547@tf_export('image.resize', v1=[]) 1548@dispatch.add_dispatch_support 1549def resize_images_v2(images, 1550 size, 1551 method=ResizeMethod.BILINEAR, 1552 preserve_aspect_ratio=False, 1553 antialias=False, 1554 name=None): 1555 """Resize `images` to `size` using the specified `method`. 1556 1557 Resized images will be distorted if their original aspect ratio is not 1558 the same as `size`. To avoid distortions see 1559 `tf.image.resize_with_pad`. 1560 1561 >>> image = tf.constant([ 1562 ... [1,0,0,0,0], 1563 ... [0,1,0,0,0], 1564 ... [0,0,1,0,0], 1565 ... [0,0,0,1,0], 1566 ... [0,0,0,0,1], 1567 ... ]) 1568 >>> # Add "batch" and "channels" dimensions 1569 >>> image = image[tf.newaxis, ..., tf.newaxis] 1570 >>> image.shape.as_list() # [batch, height, width, channels] 1571 [1, 5, 5, 1] 1572 >>> tf.image.resize(image, [3,5])[0,...,0].numpy() 1573 array([[0.6666667, 0.3333333, 0. , 0. , 0. ], 1574 [0. , 0. , 1. , 0. , 0. ], 1575 [0. , 0. , 0. , 0.3333335, 0.6666665]], 1576 dtype=float32) 1577 1578 It works equally well with a single image instead of a batch of images: 1579 1580 >>> tf.image.resize(image[0], [3,5]).shape.as_list() 1581 [3, 5, 1] 1582 1583 When `antialias` is true, the sampling filter will anti-alias the input image 1584 as well as interpolate. When downsampling an image with [anti-aliasing]( 1585 https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter 1586 kernel is scaled in order to properly anti-alias the input image signal. 1587 `antialias` has no effect when upsampling an image: 1588 1589 >>> a = tf.image.resize(image, [5,10]) 1590 >>> b = tf.image.resize(image, [5,10], antialias=True) 1591 >>> tf.reduce_max(abs(a - b)).numpy() 1592 0.0 1593 1594 The `method` argument expects an item from the `image.ResizeMethod` enum, or 1595 the string equivalent. The options are: 1596 1597 * <b>`bilinear`</b>: [Bilinear interpolation.]( 1598 https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is 1599 true, becomes a hat/tent filter function with radius 1 when downsampling. 1600 * <b>`lanczos3`</b>: [Lanczos kernel]( 1601 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3. 1602 High-quality practical filter but may have some ringing, especially on 1603 synthetic images. 1604 * <b>`lanczos5`</b>: [Lanczos kernel] ( 1605 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5. 1606 Very-high-quality filter but may have stronger ringing. 1607 * <b>`bicubic`</b>: [Cubic interpolant]( 1608 https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to 1609 Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel, 1610 particularly when upsampling. 1611 * <b>`gaussian`</b>: [Gaussian kernel]( 1612 https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3, 1613 sigma = 1.5 / 3.0. 1614 * <b>`nearest`</b>: [Nearest neighbor interpolation.]( 1615 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) 1616 `antialias` has no effect when used with nearest neighbor interpolation. 1617 * <b>`area`</b>: Anti-aliased resampling with area interpolation. 1618 `antialias` has no effect when used with area interpolation; it 1619 always anti-aliases. 1620 * <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter. 1621 For synthetic images (especially those lacking proper prefiltering), less 1622 ringing than Keys cubic kernel but less sharp. 1623 1624 Note: Near image edges the filtering kernel may be partially outside the 1625 image boundaries. For these pixels, only input pixels inside the image will be 1626 included in the filter sum, and the output value will be appropriately 1627 normalized. 1628 1629 The return value has type `float32`, unless the `method` is 1630 `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype 1631 of `images`: 1632 1633 >>> nn = tf.image.resize(image, [5,7], method='nearest') 1634 >>> nn[0,...,0].numpy() 1635 array([[1, 0, 0, 0, 0, 0, 0], 1636 [0, 1, 1, 0, 0, 0, 0], 1637 [0, 0, 0, 1, 0, 0, 0], 1638 [0, 0, 0, 0, 1, 1, 0], 1639 [0, 0, 0, 0, 0, 0, 1]], dtype=int32) 1640 1641 With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size` 1642 is the maximum for each dimension: 1643 1644 >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True) 1645 >>> max_10_20.shape.as_list() 1646 [1, 10, 10, 1] 1647 1648 Args: 1649 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1650 of shape `[height, width, channels]`. 1651 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new 1652 size for the images. 1653 method: An `image.ResizeMethod`, or string equivalent. Defaults to 1654 `bilinear`. 1655 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, 1656 then `images` will be resized to a size that fits in `size` while 1657 preserving the aspect ratio of the original image. Scales up the image if 1658 `size` is bigger than the current size of the `image`. Defaults to False. 1659 antialias: Whether to use an anti-aliasing filter when downsampling an 1660 image. 1661 name: A name for this operation (optional). 1662 1663 Raises: 1664 ValueError: if the shape of `images` is incompatible with the 1665 shape arguments to this function 1666 ValueError: if `size` has an invalid shape or type. 1667 ValueError: if an unsupported resize method is specified. 1668 1669 Returns: 1670 If `images` was 4-D, a 4-D float Tensor of shape 1671 `[batch, new_height, new_width, channels]`. 1672 If `images` was 3-D, a 3-D float Tensor of shape 1673 `[new_height, new_width, channels]`. 1674 """ 1675 1676 def resize_fn(images_t, new_size): 1677 """Resize core function, passed to _resize_images_common.""" 1678 scale_and_translate_methods = [ 1679 ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN, 1680 ResizeMethod.MITCHELLCUBIC 1681 ] 1682 1683 def resize_with_scale_and_translate(method): 1684 scale = ( 1685 math_ops.cast(new_size, dtype=dtypes.float32) / 1686 math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32)) 1687 return gen_image_ops.scale_and_translate( 1688 images_t, 1689 new_size, 1690 scale, 1691 array_ops.zeros([2]), 1692 kernel_type=method, 1693 antialias=antialias) 1694 1695 if method == ResizeMethod.BILINEAR: 1696 if antialias: 1697 return resize_with_scale_and_translate('triangle') 1698 else: 1699 return gen_image_ops.resize_bilinear( 1700 images_t, new_size, half_pixel_centers=True) 1701 elif method == ResizeMethod.NEAREST_NEIGHBOR: 1702 return gen_image_ops.resize_nearest_neighbor( 1703 images_t, new_size, half_pixel_centers=True) 1704 elif method == ResizeMethod.BICUBIC: 1705 if antialias: 1706 return resize_with_scale_and_translate('keyscubic') 1707 else: 1708 return gen_image_ops.resize_bicubic( 1709 images_t, new_size, half_pixel_centers=True) 1710 elif method == ResizeMethod.AREA: 1711 return gen_image_ops.resize_area(images_t, new_size) 1712 elif method in scale_and_translate_methods: 1713 return resize_with_scale_and_translate(method) 1714 else: 1715 raise ValueError('Resize method is not implemented: {}'.format(method)) 1716 1717 return _resize_images_common( 1718 images, 1719 resize_fn, 1720 size, 1721 preserve_aspect_ratio=preserve_aspect_ratio, 1722 name=name, 1723 skip_resize_if_same=False) 1724 1725 1726def _resize_image_with_pad_common(image, target_height, target_width, 1727 resize_fn): 1728 """Core functionality for v1 and v2 resize_image_with_pad functions.""" 1729 with ops.name_scope(None, 'resize_image_with_pad', [image]): 1730 image = ops.convert_to_tensor(image, name='image') 1731 image_shape = image.get_shape() 1732 is_batch = True 1733 if image_shape.ndims == 3: 1734 is_batch = False 1735 image = array_ops.expand_dims(image, 0) 1736 elif image_shape.ndims is None: 1737 is_batch = False 1738 image = array_ops.expand_dims(image, 0) 1739 image.set_shape([None] * 4) 1740 elif image_shape.ndims != 4: 1741 raise ValueError( 1742 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1743 image_shape) 1744 1745 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1746 assert_ops += _assert(target_width > 0, ValueError, 1747 'target_width must be > 0.') 1748 assert_ops += _assert(target_height > 0, ValueError, 1749 'target_height must be > 0.') 1750 1751 image = control_flow_ops.with_dependencies(assert_ops, image) 1752 1753 def max_(x, y): 1754 if _is_tensor(x) or _is_tensor(y): 1755 return math_ops.maximum(x, y) 1756 else: 1757 return max(x, y) 1758 1759 _, height, width, _ = _ImageDimensions(image, rank=4) 1760 1761 # convert values to float, to ease divisions 1762 f_height = math_ops.cast(height, dtype=dtypes.float32) 1763 f_width = math_ops.cast(width, dtype=dtypes.float32) 1764 f_target_height = math_ops.cast(target_height, dtype=dtypes.float32) 1765 f_target_width = math_ops.cast(target_width, dtype=dtypes.float32) 1766 1767 # Find the ratio by which the image must be adjusted 1768 # to fit within the target 1769 ratio = max_(f_width / f_target_width, f_height / f_target_height) 1770 resized_height_float = f_height / ratio 1771 resized_width_float = f_width / ratio 1772 resized_height = math_ops.cast( 1773 math_ops.floor(resized_height_float), dtype=dtypes.int32) 1774 resized_width = math_ops.cast( 1775 math_ops.floor(resized_width_float), dtype=dtypes.int32) 1776 1777 padding_height = (f_target_height - resized_height_float) / 2 1778 padding_width = (f_target_width - resized_width_float) / 2 1779 f_padding_height = math_ops.floor(padding_height) 1780 f_padding_width = math_ops.floor(padding_width) 1781 p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32)) 1782 p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32)) 1783 1784 # Resize first, then pad to meet requested dimensions 1785 resized = resize_fn(image, [resized_height, resized_width]) 1786 1787 padded = pad_to_bounding_box(resized, p_height, p_width, target_height, 1788 target_width) 1789 1790 if padded.get_shape().ndims is None: 1791 raise ValueError('padded contains no shape.') 1792 1793 _ImageDimensions(padded, rank=4) 1794 1795 if not is_batch: 1796 padded = array_ops.squeeze(padded, axis=[0]) 1797 1798 return padded 1799 1800 1801@tf_export(v1=['image.resize_image_with_pad']) 1802@dispatch.add_dispatch_support 1803def resize_image_with_pad_v1(image, 1804 target_height, 1805 target_width, 1806 method=ResizeMethodV1.BILINEAR, 1807 align_corners=False): 1808 """Resizes and pads an image to a target width and height. 1809 1810 Resizes an image to a target width and height by keeping 1811 the aspect ratio the same without distortion. If the target 1812 dimensions don't match the image dimensions, the image 1813 is resized and then padded with zeroes to match requested 1814 dimensions. 1815 1816 Args: 1817 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1818 of shape `[height, width, channels]`. 1819 target_height: Target height. 1820 target_width: Target width. 1821 method: Method to use for resizing image. See `resize_images()` 1822 align_corners: bool. If True, the centers of the 4 corner pixels of the 1823 input and output tensors are aligned, preserving the values at the corner 1824 pixels. Defaults to `False`. 1825 1826 Raises: 1827 ValueError: if `target_height` or `target_width` are zero or negative. 1828 1829 Returns: 1830 Resized and padded image. 1831 If `images` was 4-D, a 4-D float Tensor of shape 1832 `[batch, new_height, new_width, channels]`. 1833 If `images` was 3-D, a 3-D float Tensor of shape 1834 `[new_height, new_width, channels]`. 1835 """ 1836 1837 def _resize_fn(im, new_size): 1838 return resize_images(im, new_size, method, align_corners=align_corners) 1839 1840 return _resize_image_with_pad_common(image, target_height, target_width, 1841 _resize_fn) 1842 1843 1844@tf_export('image.resize_with_pad', v1=[]) 1845@dispatch.add_dispatch_support 1846def resize_image_with_pad_v2(image, 1847 target_height, 1848 target_width, 1849 method=ResizeMethod.BILINEAR, 1850 antialias=False): 1851 """Resizes and pads an image to a target width and height. 1852 1853 Resizes an image to a target width and height by keeping 1854 the aspect ratio the same without distortion. If the target 1855 dimensions don't match the image dimensions, the image 1856 is resized and then padded with zeroes to match requested 1857 dimensions. 1858 1859 Args: 1860 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1861 of shape `[height, width, channels]`. 1862 target_height: Target height. 1863 target_width: Target width. 1864 method: Method to use for resizing image. See `image.resize()` 1865 antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'. 1866 1867 Raises: 1868 ValueError: if `target_height` or `target_width` are zero or negative. 1869 1870 Returns: 1871 Resized and padded image. 1872 If `images` was 4-D, a 4-D float Tensor of shape 1873 `[batch, new_height, new_width, channels]`. 1874 If `images` was 3-D, a 3-D float Tensor of shape 1875 `[new_height, new_width, channels]`. 1876 """ 1877 1878 def _resize_fn(im, new_size): 1879 return resize_images_v2(im, new_size, method, antialias=antialias) 1880 1881 return _resize_image_with_pad_common(image, target_height, target_width, 1882 _resize_fn) 1883 1884 1885@tf_export('image.per_image_standardization') 1886@dispatch.add_dispatch_support 1887def per_image_standardization(image): 1888 """Linearly scales each image in `image` to have mean 0 and variance 1. 1889 1890 For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`, 1891 where 1892 1893 - `mean` is the average of all values in `x` 1894 - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to 1895 protect against division by 0 when handling uniform images 1896 - `N` is the number of elements in `x` 1897 - `stddev` is the standard deviation of all values in `x` 1898 1899 Example Usage: 1900 1901 >>> image = tf.constant(np.arange(1, 13, dtype=np.int32), shape=[2, 2, 3]) 1902 >>> image # 3-D tensor 1903 <tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy= 1904 array([[[ 1, 2, 3], 1905 [ 4, 5, 6]], 1906 [[ 7, 8, 9], 1907 [10, 11, 12]]], dtype=int32)> 1908 >>> new_image = tf.image.per_image_standardization(image) 1909 >>> new_image # 3-D tensor with mean ~= 0 and variance ~= 1 1910 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 1911 array([[[-1.593255 , -1.3035723 , -1.0138896 ], 1912 [-0.7242068 , -0.4345241 , -0.14484136]], 1913 [[ 0.14484136, 0.4345241 , 0.7242068 ], 1914 [ 1.0138896 , 1.3035723 , 1.593255 ]]], dtype=float32)> 1915 1916 Args: 1917 image: An n-D `Tensor` with at least 3 dimensions, the last 3 of which are 1918 the dimensions of each image. 1919 1920 Returns: 1921 A `Tensor` with the same shape as `image` and its dtype is `float32`. 1922 1923 Raises: 1924 ValueError: The shape of `image` has fewer than 3 dimensions. 1925 """ 1926 with ops.name_scope(None, 'per_image_standardization', [image]) as scope: 1927 image = ops.convert_to_tensor(image, name='image') 1928 image = _AssertAtLeast3DImage(image) 1929 1930 image = math_ops.cast(image, dtype=dtypes.float32) 1931 num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:]) 1932 image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) 1933 1934 # Apply a minimum normalization that protects us against uniform images. 1935 stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True) 1936 min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) 1937 adjusted_stddev = math_ops.maximum(stddev, min_stddev) 1938 1939 image -= image_mean 1940 image = math_ops.divide(image, adjusted_stddev, name=scope) 1941 return image 1942 1943 1944@tf_export('image.random_brightness') 1945@dispatch.add_dispatch_support 1946def random_brightness(image, max_delta, seed=None): 1947 """Adjust the brightness of images by a random factor. 1948 1949 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the 1950 interval `[-max_delta, max_delta)`. 1951 1952 For producing deterministic results given a `seed` value, use 1953 `tf.image.stateless_random_brightness`. Unlike using the `seed` param 1954 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 1955 same results given the same seed independent of how many times the function is 1956 called, and independent of global seed settings (e.g. tf.random.set_seed). 1957 1958 Args: 1959 image: An image or images to adjust. 1960 max_delta: float, must be non-negative. 1961 seed: A Python integer. Used to create a random seed. See 1962 `tf.compat.v1.set_random_seed` for behavior. 1963 1964 Usage Example: 1965 1966 >>> x = [[[1.0, 2.0, 3.0], 1967 ... [4.0, 5.0, 6.0]], 1968 ... [[7.0, 8.0, 9.0], 1969 ... [10.0, 11.0, 12.0]]] 1970 >>> tf.image.random_brightness(x, 0.2) 1971 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 1972 1973 Returns: 1974 The brightness-adjusted image(s). 1975 1976 Raises: 1977 ValueError: if `max_delta` is negative. 1978 """ 1979 if max_delta < 0: 1980 raise ValueError('max_delta must be non-negative.') 1981 1982 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) 1983 return adjust_brightness(image, delta) 1984 1985 1986@tf_export('image.stateless_random_brightness', v1=[]) 1987@dispatch.add_dispatch_support 1988def stateless_random_brightness(image, max_delta, seed): 1989 """Adjust the brightness of images by a random factor deterministically. 1990 1991 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the 1992 interval `[-max_delta, max_delta)`. 1993 1994 Guarantees the same results given the same `seed` independent of how many 1995 times the function is called, and independent of global seed settings (e.g. 1996 `tf.random.set_seed`). 1997 1998 Usage Example: 1999 2000 >>> x = [[[1.0, 2.0, 3.0], 2001 ... [4.0, 5.0, 6.0]], 2002 ... [[7.0, 8.0, 9.0], 2003 ... [10.0, 11.0, 12.0]]] 2004 >>> seed = (1, 2) 2005 >>> tf.image.stateless_random_brightness(x, 0.2, seed) 2006 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2007 array([[[ 1.1376241, 2.1376243, 3.1376243], 2008 [ 4.1376243, 5.1376243, 6.1376243]], 2009 [[ 7.1376243, 8.137624 , 9.137624 ], 2010 [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)> 2011 2012 Args: 2013 image: An image or images to adjust. 2014 max_delta: float, must be non-negative. 2015 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2016 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2017 2018 Returns: 2019 The brightness-adjusted image(s). 2020 2021 Raises: 2022 ValueError: if `max_delta` is negative. 2023 """ 2024 if max_delta < 0: 2025 raise ValueError('max_delta must be non-negative.') 2026 2027 delta = stateless_random_ops.stateless_random_uniform( 2028 shape=[], minval=-max_delta, maxval=max_delta, seed=seed) 2029 return adjust_brightness(image, delta) 2030 2031 2032@tf_export('image.random_contrast') 2033@dispatch.add_dispatch_support 2034def random_contrast(image, lower, upper, seed=None): 2035 """Adjust the contrast of an image or images by a random factor. 2036 2037 Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly 2038 picked in the interval `[lower, upper)`. 2039 2040 For producing deterministic results given a `seed` value, use 2041 `tf.image.stateless_random_contrast`. Unlike using the `seed` param 2042 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 2043 same results given the same seed independent of how many times the function is 2044 called, and independent of global seed settings (e.g. tf.random.set_seed). 2045 2046 Args: 2047 image: An image tensor with 3 or more dimensions. 2048 lower: float. Lower bound for the random contrast factor. 2049 upper: float. Upper bound for the random contrast factor. 2050 seed: A Python integer. Used to create a random seed. See 2051 `tf.compat.v1.set_random_seed` for behavior. 2052 2053 Usage Example: 2054 2055 >>> x = [[[1.0, 2.0, 3.0], 2056 ... [4.0, 5.0, 6.0]], 2057 ... [[7.0, 8.0, 9.0], 2058 ... [10.0, 11.0, 12.0]]] 2059 >>> tf.image.random_contrast(x, 0.2, 0.5) 2060 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 2061 2062 Returns: 2063 The contrast-adjusted image(s). 2064 2065 Raises: 2066 ValueError: if `upper <= lower` or if `lower < 0`. 2067 """ 2068 if upper <= lower: 2069 raise ValueError('upper must be > lower.') 2070 2071 if lower < 0: 2072 raise ValueError('lower must be non-negative.') 2073 2074 contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed) 2075 return adjust_contrast(image, contrast_factor) 2076 2077 2078@tf_export('image.stateless_random_contrast', v1=[]) 2079@dispatch.add_dispatch_support 2080def stateless_random_contrast(image, lower, upper, seed): 2081 """Adjust the contrast of images by a random factor deterministically. 2082 2083 Guarantees the same results given the same `seed` independent of how many 2084 times the function is called, and independent of global seed settings (e.g. 2085 `tf.random.set_seed`). 2086 2087 Args: 2088 image: An image tensor with 3 or more dimensions. 2089 lower: float. Lower bound for the random contrast factor. 2090 upper: float. Upper bound for the random contrast factor. 2091 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2092 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2093 2094 Usage Example: 2095 2096 >>> x = [[[1.0, 2.0, 3.0], 2097 ... [4.0, 5.0, 6.0]], 2098 ... [[7.0, 8.0, 9.0], 2099 ... [10.0, 11.0, 12.0]]] 2100 >>> seed = (1, 2) 2101 >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed) 2102 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2103 array([[[3.4605184, 4.4605184, 5.4605184], 2104 [4.820173 , 5.820173 , 6.820173 ]], 2105 [[6.179827 , 7.179827 , 8.179828 ], 2106 [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)> 2107 2108 Returns: 2109 The contrast-adjusted image(s). 2110 2111 Raises: 2112 ValueError: if `upper <= lower` or if `lower < 0`. 2113 """ 2114 if upper <= lower: 2115 raise ValueError('upper must be > lower.') 2116 2117 if lower < 0: 2118 raise ValueError('lower must be non-negative.') 2119 2120 contrast_factor = stateless_random_ops.stateless_random_uniform( 2121 shape=[], minval=lower, maxval=upper, seed=seed) 2122 return adjust_contrast(image, contrast_factor) 2123 2124 2125@tf_export('image.adjust_brightness') 2126@dispatch.add_dispatch_support 2127def adjust_brightness(image, delta): 2128 """Adjust the brightness of RGB or Grayscale images. 2129 2130 This is a convenience method that converts RGB images to float 2131 representation, adjusts their brightness, and then converts them back to the 2132 original data type. If several adjustments are chained, it is advisable to 2133 minimize the number of redundant conversions. 2134 2135 The value `delta` is added to all components of the tensor `image`. `image` is 2136 converted to `float` and scaled appropriately if it is in fixed-point 2137 representation, and `delta` is converted to the same data type. For regular 2138 images, `delta` should be in the range `(-1,1)`, as it is added to the image 2139 in floating point representation, where pixel values are in the `[0,1)` range. 2140 2141 Usage Example: 2142 2143 >>> x = [[[1.0, 2.0, 3.0], 2144 ... [4.0, 5.0, 6.0]], 2145 ... [[7.0, 8.0, 9.0], 2146 ... [10.0, 11.0, 12.0]]] 2147 >>> tf.image.adjust_brightness(x, delta=0.1) 2148 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2149 array([[[ 1.1, 2.1, 3.1], 2150 [ 4.1, 5.1, 6.1]], 2151 [[ 7.1, 8.1, 9.1], 2152 [10.1, 11.1, 12.1]]], dtype=float32)> 2153 2154 Args: 2155 image: RGB image or images to adjust. 2156 delta: A scalar. Amount to add to the pixel values. 2157 2158 Returns: 2159 A brightness-adjusted tensor of the same shape and type as `image`. 2160 """ 2161 with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name: 2162 image = ops.convert_to_tensor(image, name='image') 2163 # Remember original dtype to so we can convert back if needed 2164 orig_dtype = image.dtype 2165 2166 if orig_dtype in [dtypes.float16, dtypes.float32]: 2167 flt_image = image 2168 else: 2169 flt_image = convert_image_dtype(image, dtypes.float32) 2170 2171 adjusted = math_ops.add( 2172 flt_image, math_ops.cast(delta, flt_image.dtype), name=name) 2173 2174 return convert_image_dtype(adjusted, orig_dtype, saturate=True) 2175 2176 2177@tf_export('image.adjust_contrast') 2178@dispatch.add_dispatch_support 2179def adjust_contrast(images, contrast_factor): 2180 """Adjust contrast of RGB or grayscale images. 2181 2182 This is a convenience method that converts RGB images to float 2183 representation, adjusts their contrast, and then converts them back to the 2184 original data type. If several adjustments are chained, it is advisable to 2185 minimize the number of redundant conversions. 2186 2187 `images` is a tensor of at least 3 dimensions. The last 3 dimensions are 2188 interpreted as `[height, width, channels]`. The other dimensions only 2189 represent a collection of images, such as `[batch, height, width, channels].` 2190 2191 Contrast is adjusted independently for each channel of each image. 2192 2193 For each channel, this Op computes the mean of the image pixels in the 2194 channel and then adjusts each component `x` of each pixel to 2195 `(x - mean) * contrast_factor + mean`. 2196 2197 Usage Example: 2198 2199 >>> x = [[[1.0, 2.0, 3.0], 2200 ... [4.0, 5.0, 6.0]], 2201 ... [[7.0, 8.0, 9.0], 2202 ... [10.0, 11.0, 12.0]]] 2203 >>> tf.image.adjust_contrast(x, 2) 2204 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2205 array([[[-3.5, -2.5, -1.5], 2206 [ 2.5, 3.5, 4.5]], 2207 [[ 8.5, 9.5, 10.5], 2208 [14.5, 15.5, 16.5]]], dtype=float32)> 2209 2210 Args: 2211 images: Images to adjust. At least 3-D. 2212 contrast_factor: A float multiplier for adjusting contrast. 2213 2214 Returns: 2215 The contrast-adjusted image or images. 2216 """ 2217 with ops.name_scope(None, 'adjust_contrast', 2218 [images, contrast_factor]) as name: 2219 images = ops.convert_to_tensor(images, name='images') 2220 # Remember original dtype to so we can convert back if needed 2221 orig_dtype = images.dtype 2222 2223 if orig_dtype in (dtypes.float16, dtypes.float32): 2224 flt_images = images 2225 else: 2226 flt_images = convert_image_dtype(images, dtypes.float32) 2227 2228 adjusted = gen_image_ops.adjust_contrastv2( 2229 flt_images, contrast_factor=contrast_factor, name=name) 2230 2231 return convert_image_dtype(adjusted, orig_dtype, saturate=True) 2232 2233 2234@tf_export('image.adjust_gamma') 2235@dispatch.add_dispatch_support 2236def adjust_gamma(image, gamma=1, gain=1): 2237 """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction). 2238 2239 on the input image. 2240 2241 Also known as Power Law Transform. This function converts the 2242 input images at first to float representation, then transforms them 2243 pixelwise according to the equation `Out = gain * In**gamma`, 2244 and then converts the back to the original data type. 2245 2246 Usage Example: 2247 2248 >>> x = [[[1.0, 2.0, 3.0], 2249 ... [4.0, 5.0, 6.0]], 2250 ... [[7.0, 8.0, 9.0], 2251 ... [10.0, 11.0, 12.0]]] 2252 >>> tf.image.adjust_gamma(x, 0.2) 2253 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2254 array([[[1. , 1.1486983, 1.2457309], 2255 [1.319508 , 1.3797297, 1.4309691]], 2256 [[1.4757731, 1.5157166, 1.5518456], 2257 [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)> 2258 2259 Args: 2260 image : RGB image or images to adjust. 2261 gamma : A scalar or tensor. Non-negative real number. 2262 gain : A scalar or tensor. The constant multiplier. 2263 2264 Returns: 2265 A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`. 2266 2267 Raises: 2268 ValueError: If gamma is negative. 2269 Notes: 2270 For gamma greater than 1, the histogram will shift towards left and 2271 the output image will be darker than the input image. 2272 For gamma less than 1, the histogram will shift towards right and 2273 the output image will be brighter than the input image. 2274 References: 2275 [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction) 2276 """ 2277 2278 with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name: 2279 image = ops.convert_to_tensor(image, name='image') 2280 # Remember original dtype to so we can convert back if needed 2281 orig_dtype = image.dtype 2282 2283 if orig_dtype in [dtypes.float16, dtypes.float32]: 2284 flt_image = image 2285 else: 2286 flt_image = convert_image_dtype(image, dtypes.float32) 2287 2288 assert_op = _assert(gamma >= 0, ValueError, 2289 'Gamma should be a non-negative real number.') 2290 if assert_op: 2291 gamma = control_flow_ops.with_dependencies(assert_op, gamma) 2292 2293 # According to the definition of gamma correction. 2294 adjusted_img = gain * flt_image**gamma 2295 2296 return convert_image_dtype(adjusted_img, orig_dtype, saturate=True) 2297 2298 2299@tf_export('image.convert_image_dtype') 2300@dispatch.add_dispatch_support 2301def convert_image_dtype(image, dtype, saturate=False, name=None): 2302 """Convert `image` to `dtype`, scaling its values if needed. 2303 2304 The operation supports data types (for `image` and `dtype`) of 2305 `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`, 2306 `float16`, `float32`, `float64`, `bfloat16`. 2307 2308 Images that are represented using floating point values are expected to have 2309 values in the range [0,1). Image data stored in integer data types are 2310 expected to have values in the range `[0,MAX]`, where `MAX` is the largest 2311 positive representable number for the data type. 2312 2313 This op converts between data types, scaling the values appropriately before 2314 casting. 2315 2316 Usage Example: 2317 2318 >>> x = [[[1, 2, 3], [4, 5, 6]], 2319 ... [[7, 8, 9], [10, 11, 12]]] 2320 >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8) 2321 >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False) 2322 <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy= 2323 array([[[0.00787, 0.01575, 0.02362], 2324 [0.0315 , 0.03937, 0.04724]], 2325 [[0.0551 , 0.063 , 0.07086], 2326 [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)> 2327 2328 Converting integer types to floating point types returns normalized floating 2329 point values in the range [0, 1); the values are normalized by the `MAX` value 2330 of the input dtype. Consider the following two examples: 2331 2332 >>> a = [[[1], [2]], [[3], [4]]] 2333 >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8) 2334 >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32) 2335 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 2336 array([[[0.00787402], 2337 [0.01574803]], 2338 [[0.02362205], 2339 [0.03149606]]], dtype=float32)> 2340 2341 >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32) 2342 >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32) 2343 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 2344 array([[[4.6566129e-10], 2345 [9.3132257e-10]], 2346 [[1.3969839e-09], 2347 [1.8626451e-09]]], dtype=float32)> 2348 2349 Despite having identical values of `a` and output dtype of `float32`, the 2350 outputs differ due to the different input dtypes (`int8` vs. `int32`). This 2351 is, again, because the values are normalized by the `MAX` value of the input 2352 dtype. 2353 2354 Note that converting floating point values to integer type may lose precision. 2355 In the example below, an image tensor `b` of dtype `float32` is converted to 2356 `int8` and back to `float32`. The final output, however, is different from 2357 the original input `b` due to precision loss. 2358 2359 >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]] 2360 >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32) 2361 >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8) 2362 >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32) 2363 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 2364 array([[[0.11811024], 2365 [0.33858266]], 2366 [[0.5590551 ], 2367 [0.77952754]]], dtype=float32)> 2368 2369 Scaling up from an integer type (input dtype) to another integer type (output 2370 dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting 2371 back and forth should result in no change. For example, as shown below, the 2372 `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767) 2373 but, when scaled back, we get the same, original values of `c`. 2374 2375 >>> c = [[[1], [2]], [[127], [127]]] 2376 >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8) 2377 >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16) 2378 >>> print(c_int16) 2379 tf.Tensor( 2380 [[[ 256] 2381 [ 512]] 2382 [[32512] 2383 [32512]]], shape=(2, 2, 1), dtype=int16) 2384 >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8) 2385 >>> print(c_int8_back) 2386 tf.Tensor( 2387 [[[ 1] 2388 [ 2]] 2389 [[127] 2390 [127]]], shape=(2, 2, 1), dtype=int8) 2391 2392 Scaling down from an integer type to another integer type can be a lossy 2393 conversion. Notice in the example below that converting `int16` to `uint8` and 2394 back to `int16` has lost precision. 2395 2396 >>> d = [[[1000], [2000]], [[3000], [4000]]] 2397 >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16) 2398 >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8) 2399 >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16) 2400 >>> print(d_int16_back) 2401 tf.Tensor( 2402 [[[ 896] 2403 [1920]] 2404 [[2944] 2405 [3968]]], shape=(2, 2, 1), dtype=int16) 2406 2407 Note that converting from floating point inputs to integer types may lead to 2408 over/underflow problems. Set saturate to `True` to avoid such problem in 2409 problematic conversions. If enabled, saturation will clip the output into the 2410 allowed range before performing a potentially dangerous cast (and only before 2411 performing such a cast, i.e., when casting from a floating point to an integer 2412 type, and when casting from a signed to an unsigned type; `saturate` has no 2413 effect on casts between floats, or on casts that increase the type's range). 2414 2415 Args: 2416 image: An image. 2417 dtype: A `DType` to convert `image` to. 2418 saturate: If `True`, clip the input before casting (if necessary). 2419 name: A name for this operation (optional). 2420 2421 Returns: 2422 `image`, converted to `dtype`. 2423 2424 Raises: 2425 AttributeError: Raises an attribute error when dtype is neither 2426 float nor integer 2427 """ 2428 image = ops.convert_to_tensor(image, name='image') 2429 dtype = dtypes.as_dtype(dtype) 2430 if not dtype.is_floating and not dtype.is_integer: 2431 raise AttributeError('dtype must be either floating point or integer') 2432 if dtype == image.dtype: 2433 return array_ops.identity(image, name=name) 2434 2435 with ops.name_scope(name, 'convert_image', [image]) as name: 2436 # Both integer: use integer multiplication in the larger range 2437 if image.dtype.is_integer and dtype.is_integer: 2438 scale_in = image.dtype.max 2439 scale_out = dtype.max 2440 if scale_in > scale_out: 2441 # Scaling down, scale first, then cast. The scaling factor will 2442 # cause in.max to be mapped to above out.max but below out.max+1, 2443 # so that the output is safely in the supported range. 2444 scale = (scale_in + 1) // (scale_out + 1) 2445 scaled = math_ops.floordiv(image, scale) 2446 2447 if saturate: 2448 return math_ops.saturate_cast(scaled, dtype, name=name) 2449 else: 2450 return math_ops.cast(scaled, dtype, name=name) 2451 else: 2452 # Scaling up, cast first, then scale. The scale will not map in.max to 2453 # out.max, but converting back and forth should result in no change. 2454 if saturate: 2455 cast = math_ops.saturate_cast(image, dtype) 2456 else: 2457 cast = math_ops.cast(image, dtype) 2458 scale = (scale_out + 1) // (scale_in + 1) 2459 return math_ops.multiply(cast, scale, name=name) 2460 elif image.dtype.is_floating and dtype.is_floating: 2461 # Both float: Just cast, no possible overflows in the allowed ranges. 2462 # Note: We're ignoring float overflows. If your image dynamic range 2463 # exceeds float range, you're on your own. 2464 return math_ops.cast(image, dtype, name=name) 2465 else: 2466 if image.dtype.is_integer: 2467 # Converting to float: first cast, then scale. No saturation possible. 2468 cast = math_ops.cast(image, dtype) 2469 scale = 1. / image.dtype.max 2470 return math_ops.multiply(cast, scale, name=name) 2471 else: 2472 # Converting from float: first scale, then cast 2473 scale = dtype.max + 0.5 # avoid rounding problems in the cast 2474 scaled = math_ops.multiply(image, scale) 2475 if saturate: 2476 return math_ops.saturate_cast(scaled, dtype, name=name) 2477 else: 2478 return math_ops.cast(scaled, dtype, name=name) 2479 2480 2481@tf_export('image.rgb_to_grayscale') 2482@dispatch.add_dispatch_support 2483def rgb_to_grayscale(images, name=None): 2484 """Converts one or more images from RGB to Grayscale. 2485 2486 Outputs a tensor of the same `DType` and rank as `images`. The size of the 2487 last dimension of the output is 1, containing the Grayscale value of the 2488 pixels. 2489 2490 >>> original = tf.constant([[[1.0, 2.0, 3.0]]]) 2491 >>> converted = tf.image.rgb_to_grayscale(original) 2492 >>> print(converted.numpy()) 2493 [[[1.81...]]] 2494 2495 Args: 2496 images: The RGB tensor to convert. The last dimension must have size 3 and 2497 should contain RGB values. 2498 name: A name for the operation (optional). 2499 2500 Returns: 2501 The converted grayscale image(s). 2502 """ 2503 with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name: 2504 images = ops.convert_to_tensor(images, name='images') 2505 # Remember original dtype to so we can convert back if needed 2506 orig_dtype = images.dtype 2507 flt_image = convert_image_dtype(images, dtypes.float32) 2508 2509 # Reference for converting between RGB and grayscale. 2510 # https://en.wikipedia.org/wiki/Luma_%28video%29 2511 rgb_weights = [0.2989, 0.5870, 0.1140] 2512 gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1]) 2513 gray_float = array_ops.expand_dims(gray_float, -1) 2514 return convert_image_dtype(gray_float, orig_dtype, name=name) 2515 2516 2517@tf_export('image.grayscale_to_rgb') 2518@dispatch.add_dispatch_support 2519def grayscale_to_rgb(images, name=None): 2520 """Converts one or more images from Grayscale to RGB. 2521 2522 Outputs a tensor of the same `DType` and rank as `images`. The size of the 2523 last dimension of the output is 3, containing the RGB value of the pixels. 2524 The input images' last dimension must be size 1. 2525 2526 >>> original = tf.constant([[[1.0], [2.0], [3.0]]]) 2527 >>> converted = tf.image.grayscale_to_rgb(original) 2528 >>> print(converted.numpy()) 2529 [[[1. 1. 1.] 2530 [2. 2. 2.] 2531 [3. 3. 3.]]] 2532 2533 Args: 2534 images: The Grayscale tensor to convert. The last dimension must be size 1. 2535 name: A name for the operation (optional). 2536 2537 Returns: 2538 The converted grayscale image(s). 2539 """ 2540 with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name: 2541 images = _AssertGrayscaleImage(images) 2542 2543 images = ops.convert_to_tensor(images, name='images') 2544 rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) 2545 shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] + 2546 [array_ops.expand_dims(3, 0)]) 2547 multiples = array_ops.concat(shape_list, 0) 2548 rgb = array_ops.tile(images, multiples, name=name) 2549 rgb.set_shape(images.get_shape()[:-1].concatenate([3])) 2550 return rgb 2551 2552 2553# pylint: disable=invalid-name 2554@tf_export('image.random_hue') 2555@dispatch.add_dispatch_support 2556def random_hue(image, max_delta, seed=None): 2557 """Adjust the hue of RGB images by a random factor. 2558 2559 Equivalent to `adjust_hue()` but uses a `delta` randomly 2560 picked in the interval `[-max_delta, max_delta)`. 2561 2562 `max_delta` must be in the interval `[0, 0.5]`. 2563 2564 Usage Example: 2565 2566 >>> x = [[[1.0, 2.0, 3.0], 2567 ... [4.0, 5.0, 6.0]], 2568 ... [[7.0, 8.0, 9.0], 2569 ... [10.0, 11.0, 12.0]]] 2570 >>> tf.image.random_hue(x, 0.2) 2571 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 2572 2573 For producing deterministic results given a `seed` value, use 2574 `tf.image.stateless_random_hue`. Unlike using the `seed` param with 2575 `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the same 2576 results given the same seed independent of how many times the function is 2577 called, and independent of global seed settings (e.g. tf.random.set_seed). 2578 2579 Args: 2580 image: RGB image or images. The size of the last dimension must be 3. 2581 max_delta: float. The maximum value for the random delta. 2582 seed: An operation-specific seed. It will be used in conjunction with the 2583 graph-level seed to determine the real seeds that will be used in this 2584 operation. Please see the documentation of set_random_seed for its 2585 interaction with the graph-level random seed. 2586 2587 Returns: 2588 Adjusted image(s), same shape and DType as `image`. 2589 2590 Raises: 2591 ValueError: if `max_delta` is invalid. 2592 """ 2593 if max_delta > 0.5: 2594 raise ValueError('max_delta must be <= 0.5.') 2595 2596 if max_delta < 0: 2597 raise ValueError('max_delta must be non-negative.') 2598 2599 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) 2600 return adjust_hue(image, delta) 2601 2602 2603@tf_export('image.stateless_random_hue', v1=[]) 2604@dispatch.add_dispatch_support 2605def stateless_random_hue(image, max_delta, seed): 2606 """Adjust the hue of RGB images by a random factor deterministically. 2607 2608 Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the 2609 interval `[-max_delta, max_delta)`. 2610 2611 Guarantees the same results given the same `seed` independent of how many 2612 times the function is called, and independent of global seed settings (e.g. 2613 `tf.random.set_seed`). 2614 2615 `max_delta` must be in the interval `[0, 0.5]`. 2616 2617 Usage Example: 2618 2619 >>> x = [[[1.0, 2.0, 3.0], 2620 ... [4.0, 5.0, 6.0]], 2621 ... [[7.0, 8.0, 9.0], 2622 ... [10.0, 11.0, 12.0]]] 2623 >>> seed = (1, 2) 2624 >>> tf.image.stateless_random_hue(x, 0.2, seed) 2625 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2626 array([[[ 1.6514902, 1. , 3. ], 2627 [ 4.65149 , 4. , 6. ]], 2628 [[ 7.65149 , 7. , 9. ], 2629 [10.65149 , 10. , 12. ]]], dtype=float32)> 2630 2631 Args: 2632 image: RGB image or images. The size of the last dimension must be 3. 2633 max_delta: float. The maximum value for the random delta. 2634 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2635 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2636 2637 Returns: 2638 Adjusted image(s), same shape and DType as `image`. 2639 2640 Raises: 2641 ValueError: if `max_delta` is invalid. 2642 """ 2643 if max_delta > 0.5: 2644 raise ValueError('max_delta must be <= 0.5.') 2645 2646 if max_delta < 0: 2647 raise ValueError('max_delta must be non-negative.') 2648 2649 delta = stateless_random_ops.stateless_random_uniform( 2650 shape=[], minval=-max_delta, maxval=max_delta, seed=seed) 2651 return adjust_hue(image, delta) 2652 2653 2654@tf_export('image.adjust_hue') 2655@dispatch.add_dispatch_support 2656def adjust_hue(image, delta, name=None): 2657 """Adjust hue of RGB images. 2658 2659 This is a convenience method that converts an RGB image to float 2660 representation, converts it to HSV, adds an offset to the 2661 hue channel, converts back to RGB and then back to the original 2662 data type. If several adjustments are chained it is advisable to minimize 2663 the number of redundant conversions. 2664 2665 `image` is an RGB image. The image hue is adjusted by converting the 2666 image(s) to HSV and rotating the hue channel (H) by 2667 `delta`. The image is then converted back to RGB. 2668 2669 `delta` must be in the interval `[-1, 1]`. 2670 2671 Usage Example: 2672 2673 >>> x = [[[1.0, 2.0, 3.0], 2674 ... [4.0, 5.0, 6.0]], 2675 ... [[7.0, 8.0, 9.0], 2676 ... [10.0, 11.0, 12.0]]] 2677 >>> tf.image.adjust_hue(x, 0.2) 2678 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2679 array([[[ 2.3999996, 1. , 3. ], 2680 [ 5.3999996, 4. , 6. ]], 2681 [[ 8.4 , 7. , 9. ], 2682 [11.4 , 10. , 12. ]]], dtype=float32)> 2683 2684 Args: 2685 image: RGB image or images. The size of the last dimension must be 3. 2686 delta: float. How much to add to the hue channel. 2687 name: A name for this operation (optional). 2688 2689 Returns: 2690 Adjusted image(s), same shape and DType as `image`. 2691 2692 Usage Example: 2693 2694 >>> image = [[[1, 2, 3], [4, 5, 6]], 2695 ... [[7, 8, 9], [10, 11, 12]], 2696 ... [[13, 14, 15], [16, 17, 18]]] 2697 >>> image = tf.constant(image) 2698 >>> tf.image.adjust_hue(image, 0.2) 2699 <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy= 2700 array([[[ 2, 1, 3], 2701 [ 5, 4, 6]], 2702 [[ 8, 7, 9], 2703 [11, 10, 12]], 2704 [[14, 13, 15], 2705 [17, 16, 18]]], dtype=int32)> 2706 """ 2707 with ops.name_scope(name, 'adjust_hue', [image]) as name: 2708 image = ops.convert_to_tensor(image, name='image') 2709 # Remember original dtype to so we can convert back if needed 2710 orig_dtype = image.dtype 2711 if orig_dtype in (dtypes.float16, dtypes.float32): 2712 flt_image = image 2713 else: 2714 flt_image = convert_image_dtype(image, dtypes.float32) 2715 2716 rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) 2717 2718 return convert_image_dtype(rgb_altered, orig_dtype) 2719 2720 2721# pylint: disable=invalid-name 2722@tf_export('image.random_jpeg_quality') 2723@dispatch.add_dispatch_support 2724def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None): 2725 """Randomly changes jpeg encoding quality for inducing jpeg noise. 2726 2727 `min_jpeg_quality` must be in the interval `[0, 100]` and less than 2728 `max_jpeg_quality`. 2729 `max_jpeg_quality` must be in the interval `[0, 100]`. 2730 2731 Usage Example: 2732 2733 >>> x = [[[1.0, 2.0, 3.0], 2734 ... [4.0, 5.0, 6.0]], 2735 ... [[7.0, 8.0, 9.0], 2736 ... [10.0, 11.0, 12.0]]] 2737 >>> tf.image.random_jpeg_quality(x, 75, 95) 2738 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 2739 2740 For producing deterministic results given a `seed` value, use 2741 `tf.image.stateless_random_jpeg_quality`. Unlike using the `seed` param 2742 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 2743 same results given the same seed independent of how many times the function is 2744 called, and independent of global seed settings (e.g. tf.random.set_seed). 2745 2746 Args: 2747 image: 3D image. Size of the last dimension must be 1 or 3. 2748 min_jpeg_quality: Minimum jpeg encoding quality to use. 2749 max_jpeg_quality: Maximum jpeg encoding quality to use. 2750 seed: An operation-specific seed. It will be used in conjunction with the 2751 graph-level seed to determine the real seeds that will be used in this 2752 operation. Please see the documentation of set_random_seed for its 2753 interaction with the graph-level random seed. 2754 2755 Returns: 2756 Adjusted image(s), same shape and DType as `image`. 2757 2758 Raises: 2759 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. 2760 """ 2761 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or 2762 max_jpeg_quality > 100): 2763 raise ValueError('jpeg encoding range must be between 0 and 100.') 2764 2765 if min_jpeg_quality >= max_jpeg_quality: 2766 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') 2767 2768 jpeg_quality = random_ops.random_uniform([], 2769 min_jpeg_quality, 2770 max_jpeg_quality, 2771 seed=seed, 2772 dtype=dtypes.int32) 2773 return adjust_jpeg_quality(image, jpeg_quality) 2774 2775 2776@tf_export('image.stateless_random_jpeg_quality', v1=[]) 2777@dispatch.add_dispatch_support 2778def stateless_random_jpeg_quality(image, 2779 min_jpeg_quality, 2780 max_jpeg_quality, 2781 seed): 2782 """Deterministically radomize jpeg encoding quality for inducing jpeg noise. 2783 2784 Guarantees the same results given the same `seed` independent of how many 2785 times the function is called, and independent of global seed settings (e.g. 2786 `tf.random.set_seed`). 2787 2788 `min_jpeg_quality` must be in the interval `[0, 100]` and less than 2789 `max_jpeg_quality`. 2790 `max_jpeg_quality` must be in the interval `[0, 100]`. 2791 2792 Usage Example: 2793 2794 >>> x = [[[1, 2, 3], 2795 ... [4, 5, 6]], 2796 ... [[7, 8, 9], 2797 ... [10, 11, 12]]] 2798 >>> x_uint8 = tf.cast(x, tf.uint8) 2799 >>> seed = (1, 2) 2800 >>> tf.image.stateless_random_jpeg_quality(x_uint8, 75, 95, seed) 2801 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy= 2802 array([[[ 0, 4, 5], 2803 [ 1, 5, 6]], 2804 [[ 5, 9, 10], 2805 [ 5, 9, 10]]], dtype=uint8)> 2806 2807 Args: 2808 image: 3D image. Size of the last dimension must be 1 or 3. 2809 min_jpeg_quality: Minimum jpeg encoding quality to use. 2810 max_jpeg_quality: Maximum jpeg encoding quality to use. 2811 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2812 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2813 2814 Returns: 2815 Adjusted image(s), same shape and DType as `image`. 2816 2817 Raises: 2818 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. 2819 """ 2820 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or 2821 max_jpeg_quality > 100): 2822 raise ValueError('jpeg encoding range must be between 0 and 100.') 2823 2824 if min_jpeg_quality >= max_jpeg_quality: 2825 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') 2826 2827 jpeg_quality = stateless_random_ops.stateless_random_uniform( 2828 shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed, 2829 dtype=dtypes.int32) 2830 return adjust_jpeg_quality(image, jpeg_quality) 2831 2832 2833@tf_export('image.adjust_jpeg_quality') 2834@dispatch.add_dispatch_support 2835def adjust_jpeg_quality(image, jpeg_quality, name=None): 2836 """Adjust jpeg encoding quality of an image. 2837 2838 This is a convenience method that converts an image to uint8 representation, 2839 encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back 2840 to the original data type. 2841 2842 `jpeg_quality` must be in the interval `[0, 100]`. 2843 2844 Usage Example: 2845 2846 >>> x = [[[1.0, 2.0, 3.0], 2847 ... [4.0, 5.0, 6.0]], 2848 ... [[7.0, 8.0, 9.0], 2849 ... [10.0, 11.0, 12.0]]] 2850 >>> tf.image.adjust_jpeg_quality(x, 75) 2851 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2852 array([[[1., 1., 1.], 2853 [1., 1., 1.]], 2854 [[1., 1., 1.], 2855 [1., 1., 1.]]], dtype=float32)> 2856 2857 Args: 2858 image: 3D image. The size of the last dimension must be None, 1 or 3. 2859 jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality. 2860 name: A name for this operation (optional). 2861 2862 Returns: 2863 Adjusted image, same shape and DType as `image`. 2864 2865 Raises: 2866 InvalidArgumentError: quality must be in [0,100] 2867 InvalidArgumentError: image must have 1 or 3 channels 2868 """ 2869 with ops.name_scope(name, 'adjust_jpeg_quality', [image]): 2870 image = ops.convert_to_tensor(image, name='image') 2871 channels = image.shape.as_list()[-1] 2872 # Remember original dtype to so we can convert back if needed 2873 orig_dtype = image.dtype 2874 image = convert_image_dtype(image, dtypes.uint8, saturate=True) 2875 if not _is_tensor(jpeg_quality): 2876 # If jpeg_quality is a int (not tensor). 2877 jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32) 2878 image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality) 2879 2880 image = gen_image_ops.decode_jpeg(image, channels=channels) 2881 return convert_image_dtype(image, orig_dtype, saturate=True) 2882 2883 2884@tf_export('image.random_saturation') 2885@dispatch.add_dispatch_support 2886def random_saturation(image, lower, upper, seed=None): 2887 """Adjust the saturation of RGB images by a random factor. 2888 2889 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly 2890 picked in the interval `[lower, upper)`. 2891 2892 Usage Example: 2893 2894 >>> x = [[[1.0, 2.0, 3.0], 2895 ... [4.0, 5.0, 6.0]], 2896 ... [[7.0, 8.0, 9.0], 2897 ... [10.0, 11.0, 12.0]]] 2898 >>> tf.image.random_saturation(x, 5, 10) 2899 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2900 array([[[ 0. , 1.5, 3. ], 2901 [ 0. , 3. , 6. ]], 2902 [[ 0. , 4.5, 9. ], 2903 [ 0. , 6. , 12. ]]], dtype=float32)> 2904 2905 For producing deterministic results given a `seed` value, use 2906 `tf.image.stateless_random_saturation`. Unlike using the `seed` param 2907 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 2908 same results given the same seed independent of how many times the function is 2909 called, and independent of global seed settings (e.g. tf.random.set_seed). 2910 2911 Args: 2912 image: RGB image or images. The size of the last dimension must be 3. 2913 lower: float. Lower bound for the random saturation factor. 2914 upper: float. Upper bound for the random saturation factor. 2915 seed: An operation-specific seed. It will be used in conjunction with the 2916 graph-level seed to determine the real seeds that will be used in this 2917 operation. Please see the documentation of set_random_seed for its 2918 interaction with the graph-level random seed. 2919 2920 Returns: 2921 Adjusted image(s), same shape and DType as `image`. 2922 2923 Raises: 2924 ValueError: if `upper <= lower` or if `lower < 0`. 2925 """ 2926 if upper <= lower: 2927 raise ValueError('upper must be > lower.') 2928 2929 if lower < 0: 2930 raise ValueError('lower must be non-negative.') 2931 2932 saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed) 2933 return adjust_saturation(image, saturation_factor) 2934 2935 2936@tf_export('image.stateless_random_saturation', v1=[]) 2937@dispatch.add_dispatch_support 2938def stateless_random_saturation(image, lower, upper, seed=None): 2939 """Adjust the saturation of RGB images by a random factor deterministically. 2940 2941 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly 2942 picked in the interval `[lower, upper)`. 2943 2944 Guarantees the same results given the same `seed` independent of how many 2945 times the function is called, and independent of global seed settings (e.g. 2946 `tf.random.set_seed`). 2947 2948 Usage Example: 2949 2950 >>> x = [[[1.0, 2.0, 3.0], 2951 ... [4.0, 5.0, 6.0]], 2952 ... [[7.0, 8.0, 9.0], 2953 ... [10.0, 11.0, 12.0]]] 2954 >>> seed = (1, 2) 2955 >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed) 2956 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2957 array([[[ 1.1559395, 2.0779698, 3. ], 2958 [ 4.1559396, 5.07797 , 6. ]], 2959 [[ 7.1559396, 8.07797 , 9. ], 2960 [10.155939 , 11.07797 , 12. ]]], dtype=float32)> 2961 2962 Args: 2963 image: RGB image or images. The size of the last dimension must be 3. 2964 lower: float. Lower bound for the random saturation factor. 2965 upper: float. Upper bound for the random saturation factor. 2966 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2967 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2968 2969 Returns: 2970 Adjusted image(s), same shape and DType as `image`. 2971 2972 Raises: 2973 ValueError: if `upper <= lower` or if `lower < 0`. 2974 """ 2975 if upper <= lower: 2976 raise ValueError('upper must be > lower.') 2977 2978 if lower < 0: 2979 raise ValueError('lower must be non-negative.') 2980 2981 saturation_factor = stateless_random_ops.stateless_random_uniform( 2982 shape=[], minval=lower, maxval=upper, seed=seed) 2983 return adjust_saturation(image, saturation_factor) 2984 2985 2986@tf_export('image.adjust_saturation') 2987@dispatch.add_dispatch_support 2988def adjust_saturation(image, saturation_factor, name=None): 2989 """Adjust saturation of RGB images. 2990 2991 This is a convenience method that converts RGB images to float 2992 representation, converts them to HSV, adds an offset to the 2993 saturation channel, converts back to RGB and then back to the original 2994 data type. If several adjustments are chained it is advisable to minimize 2995 the number of redundant conversions. 2996 2997 `image` is an RGB image or images. The image saturation is adjusted by 2998 converting the images to HSV and multiplying the saturation (S) channel by 2999 `saturation_factor` and clipping. The images are then converted back to RGB. 3000 3001 Usage Example: 3002 3003 >>> x = [[[1.0, 2.0, 3.0], 3004 ... [4.0, 5.0, 6.0]], 3005 ... [[7.0, 8.0, 9.0], 3006 ... [10.0, 11.0, 12.0]]] 3007 >>> tf.image.adjust_saturation(x, 0.5) 3008 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 3009 array([[[ 2. , 2.5, 3. ], 3010 [ 5. , 5.5, 6. ]], 3011 [[ 8. , 8.5, 9. ], 3012 [11. , 11.5, 12. ]]], dtype=float32)> 3013 3014 Args: 3015 image: RGB image or images. The size of the last dimension must be 3. 3016 saturation_factor: float. Factor to multiply the saturation by. 3017 name: A name for this operation (optional). 3018 3019 Returns: 3020 Adjusted image(s), same shape and DType as `image`. 3021 3022 Raises: 3023 InvalidArgumentError: input must have 3 channels 3024 """ 3025 with ops.name_scope(name, 'adjust_saturation', [image]) as name: 3026 image = ops.convert_to_tensor(image, name='image') 3027 # Remember original dtype to so we can convert back if needed 3028 orig_dtype = image.dtype 3029 if orig_dtype in (dtypes.float16, dtypes.float32): 3030 flt_image = image 3031 else: 3032 flt_image = convert_image_dtype(image, dtypes.float32) 3033 3034 adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor) 3035 3036 return convert_image_dtype(adjusted, orig_dtype) 3037 3038 3039@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg']) 3040def is_jpeg(contents, name=None): 3041 r"""Convenience function to check if the 'contents' encodes a JPEG image. 3042 3043 Args: 3044 contents: 0-D `string`. The encoded image bytes. 3045 name: A name for the operation (optional) 3046 3047 Returns: 3048 A scalar boolean tensor indicating if 'contents' may be a JPEG image. 3049 is_jpeg is susceptible to false positives. 3050 """ 3051 # Normal JPEGs start with \xff\xd8\xff\xe0 3052 # JPEG with EXIF starts with \xff\xd8\xff\xe1 3053 # Use \xff\xd8\xff to cover both. 3054 with ops.name_scope(name, 'is_jpeg'): 3055 substr = string_ops.substr(contents, 0, 3) 3056 return math_ops.equal(substr, b'\xff\xd8\xff', name=name) 3057 3058 3059def _is_png(contents, name=None): 3060 r"""Convenience function to check if the 'contents' encodes a PNG image. 3061 3062 Args: 3063 contents: 0-D `string`. The encoded image bytes. 3064 name: A name for the operation (optional) 3065 3066 Returns: 3067 A scalar boolean tensor indicating if 'contents' may be a PNG image. 3068 is_png is susceptible to false positives. 3069 """ 3070 with ops.name_scope(name, 'is_png'): 3071 substr = string_ops.substr(contents, 0, 3) 3072 return math_ops.equal(substr, b'\211PN', name=name) 3073 3074 3075tf_export( 3076 'io.decode_and_crop_jpeg', 3077 'image.decode_and_crop_jpeg', 3078 v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])( 3079 dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg)) 3080 3081tf_export( 3082 'io.decode_bmp', 3083 'image.decode_bmp', 3084 v1=['io.decode_bmp', 'image.decode_bmp'])( 3085 dispatch.add_dispatch_support(gen_image_ops.decode_bmp)) 3086tf_export( 3087 'io.decode_gif', 3088 'image.decode_gif', 3089 v1=['io.decode_gif', 'image.decode_gif'])( 3090 dispatch.add_dispatch_support(gen_image_ops.decode_gif)) 3091tf_export( 3092 'io.decode_jpeg', 3093 'image.decode_jpeg', 3094 v1=['io.decode_jpeg', 'image.decode_jpeg'])( 3095 dispatch.add_dispatch_support(gen_image_ops.decode_jpeg)) 3096tf_export( 3097 'io.decode_png', 3098 'image.decode_png', 3099 v1=['io.decode_png', 'image.decode_png'])( 3100 dispatch.add_dispatch_support(gen_image_ops.decode_png)) 3101 3102tf_export( 3103 'io.encode_jpeg', 3104 'image.encode_jpeg', 3105 v1=['io.encode_jpeg', 'image.encode_jpeg'])( 3106 dispatch.add_dispatch_support(gen_image_ops.encode_jpeg)) 3107tf_export( 3108 'io.extract_jpeg_shape', 3109 'image.extract_jpeg_shape', 3110 v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])( 3111 dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape)) 3112 3113 3114@tf_export('io.encode_png', 'image.encode_png') 3115@dispatch.add_dispatch_support 3116def encode_png(image, compression=-1, name=None): 3117 r"""PNG-encode an image. 3118 3119 `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` 3120 where `channels` is: 3121 3122 * 1: for grayscale. 3123 * 2: for grayscale + alpha. 3124 * 3: for RGB. 3125 * 4: for RGBA. 3126 3127 The ZLIB compression level, `compression`, can be -1 for the PNG-encoder 3128 default or a value from 0 to 9. 9 is the highest compression level, 3129 generating the smallest output, but is slower. 3130 3131 Args: 3132 image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`. 3133 3-D with shape `[height, width, channels]`. 3134 compression: An optional `int`. Defaults to `-1`. Compression level. 3135 name: A name for the operation (optional). 3136 3137 Returns: 3138 A `Tensor` of type `string`. 3139 """ 3140 return gen_image_ops.encode_png( 3141 ops.convert_to_tensor(image), compression, name) 3142 3143 3144@tf_export( 3145 'io.decode_image', 3146 'image.decode_image', 3147 v1=['io.decode_image', 'image.decode_image']) 3148@dispatch.add_dispatch_support 3149def decode_image(contents, 3150 channels=None, 3151 dtype=dtypes.uint8, 3152 name=None, 3153 expand_animations=True): 3154 """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. 3155 3156 Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the 3157 appropriate operation to convert the input bytes `string` into a `Tensor` 3158 of type `dtype`. 3159 3160 Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as 3161 opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D 3162 arrays `[height, width, num_channels]`. Make sure to take this into account 3163 when constructing your graph if you are intermixing GIF files with BMP, JPEG, 3164 and/or PNG files. Alternately, set the `expand_animations` argument of this 3165 function to `False`, in which case the op will return 3-dimensional tensors 3166 and will truncate animated GIF files to the first frame. 3167 3168 NOTE: If the first frame of an animated GIF does not occupy the entire 3169 canvas (maximum frame width x maximum frame height), then it fills the 3170 unoccupied areas (in the first frame) with zeros (black). For frames after the 3171 first frame that does not occupy the entire canvas, it uses the previous 3172 frame to fill the unoccupied areas. 3173 3174 Args: 3175 contents: A `Tensor` of type `string`. 0-D. The encoded image bytes. 3176 channels: An optional `int`. Defaults to `0`. Number of color channels for 3177 the decoded image. 3178 dtype: The desired DType of the returned `Tensor`. 3179 name: A name for the operation (optional) 3180 expand_animations: An optional `bool`. Defaults to `True`. Controls the 3181 shape of the returned op's output. If `True`, the returned op will produce 3182 a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs, 3183 whether animated or not. If, `False`, the returned op will produce a 3-D 3184 tensor for all file types and will truncate animated GIFs to the first 3185 frame. 3186 3187 Returns: 3188 `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on 3189 the file type and the value of the `expand_animations` parameter. 3190 3191 Raises: 3192 ValueError: On incorrect number of channels. 3193 """ 3194 with ops.name_scope(name, 'decode_image'): 3195 channels = 0 if channels is None else channels 3196 if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]: 3197 dest_dtype = dtype 3198 dtype = dtypes.uint16 3199 return convert_image_dtype( 3200 gen_image_ops.decode_image( 3201 contents=contents, 3202 channels=channels, 3203 expand_animations=expand_animations, 3204 dtype=dtype), dest_dtype) 3205 else: 3206 return gen_image_ops.decode_image( 3207 contents=contents, 3208 channels=channels, 3209 expand_animations=expand_animations, 3210 dtype=dtype) 3211 3212 3213@tf_export('image.total_variation') 3214@dispatch.add_dispatch_support 3215def total_variation(images, name=None): 3216 """Calculate and return the total variation for one or more images. 3217 3218 The total variation is the sum of the absolute differences for neighboring 3219 pixel-values in the input images. This measures how much noise is in the 3220 images. 3221 3222 This can be used as a loss-function during optimization so as to suppress 3223 noise in images. If you have a batch of images, then you should calculate 3224 the scalar loss-value as the sum: 3225 `loss = tf.reduce_sum(tf.image.total_variation(images))` 3226 3227 This implements the anisotropic 2-D version of the formula described here: 3228 3229 https://en.wikipedia.org/wiki/Total_variation_denoising 3230 3231 Args: 3232 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 3233 of shape `[height, width, channels]`. 3234 name: A name for the operation (optional). 3235 3236 Raises: 3237 ValueError: if images.shape is not a 3-D or 4-D vector. 3238 3239 Returns: 3240 The total variation of `images`. 3241 3242 If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the 3243 total variation for each image in the batch. 3244 If `images` was 3-D, return a scalar float with the total variation for 3245 that image. 3246 """ 3247 3248 with ops.name_scope(name, 'total_variation'): 3249 ndims = images.get_shape().ndims 3250 3251 if ndims == 3: 3252 # The input is a single image with shape [height, width, channels]. 3253 3254 # Calculate the difference of neighboring pixel-values. 3255 # The images are shifted one pixel along the height and width by slicing. 3256 pixel_dif1 = images[1:, :, :] - images[:-1, :, :] 3257 pixel_dif2 = images[:, 1:, :] - images[:, :-1, :] 3258 3259 # Sum for all axis. (None is an alias for all axis.) 3260 sum_axis = None 3261 elif ndims == 4: 3262 # The input is a batch of images with shape: 3263 # [batch, height, width, channels]. 3264 3265 # Calculate the difference of neighboring pixel-values. 3266 # The images are shifted one pixel along the height and width by slicing. 3267 pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :] 3268 pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :] 3269 3270 # Only sum for the last 3 axis. 3271 # This results in a 1-D tensor with the total variation for each image. 3272 sum_axis = [1, 2, 3] 3273 else: 3274 raise ValueError('\'images\' must be either 3 or 4-dimensional.') 3275 3276 # Calculate the total variation by taking the absolute value of the 3277 # pixel-differences and summing over the appropriate axis. 3278 tot_var = ( 3279 math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) + 3280 math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis)) 3281 3282 return tot_var 3283 3284 3285@tf_export('image.sample_distorted_bounding_box', v1=[]) 3286@dispatch.add_dispatch_support 3287def sample_distorted_bounding_box_v2(image_size, 3288 bounding_boxes, 3289 seed=0, 3290 min_object_covered=0.1, 3291 aspect_ratio_range=None, 3292 area_range=None, 3293 max_attempts=None, 3294 use_image_if_no_bounding_boxes=None, 3295 name=None): 3296 """Generate a single randomly distorted bounding box for an image. 3297 3298 Bounding box annotations are often supplied in addition to ground-truth labels 3299 in image recognition or object localization tasks. A common technique for 3300 training such a system is to randomly distort an image while preserving 3301 its content, i.e. *data augmentation*. This Op outputs a randomly distorted 3302 localization of an object, i.e. bounding box, given an `image_size`, 3303 `bounding_boxes` and a series of constraints. 3304 3305 The output of this Op is a single bounding box that may be used to crop the 3306 original image. The output is returned as 3 tensors: `begin`, `size` and 3307 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 3308 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 3309 visualize what the bounding box looks like. 3310 3311 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 3312 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 3313 and the height of the underlying image. 3314 3315 For example, 3316 3317 ```python 3318 # Generate a single distorted bounding box. 3319 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( 3320 tf.shape(image), 3321 bounding_boxes=bounding_boxes, 3322 min_object_covered=0.1) 3323 3324 # Draw the bounding box in an image summary. 3325 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), 3326 bbox_for_draw) 3327 tf.compat.v1.summary.image('images_with_box', image_with_box) 3328 3329 # Employ the bounding box to distort the image. 3330 distorted_image = tf.slice(image, begin, size) 3331 ``` 3332 3333 Note that if no bounding box information is available, setting 3334 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit 3335 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 3336 false and no bounding boxes are supplied, an error is raised. 3337 3338 For producing deterministic results given a `seed` value, use 3339 `tf.image.stateless_sample_distorted_bounding_box`. Unlike using the `seed` 3340 param with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops 3341 guarantee the same results given the same seed independent of how many times 3342 the function is called, and independent of global seed settings 3343 (e.g. tf.random.set_seed). 3344 3345 Args: 3346 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 3347 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 3348 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 3349 describing the N bounding boxes associated with the image. 3350 seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the 3351 random number generator is seeded by the given `seed`. Otherwise, it is 3352 seeded by a random seed. 3353 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 3354 cropped area of the image must contain at least this fraction of any 3355 bounding box supplied. The value of this parameter should be non-negative. 3356 In the case of 0, the cropped area does not need to overlap any of the 3357 bounding boxes supplied. 3358 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 3359 1.33]`. The cropped area of the image must have an aspect `ratio = width / 3360 height` within this range. 3361 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 3362 cropped area of the image must contain a fraction of the supplied image 3363 within this range. 3364 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 3365 generating a cropped region of the image of the specified constraints. 3366 After `max_attempts` failures, return the entire image. 3367 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 3368 Controls behavior if no bounding boxes supplied. If true, assume an 3369 implicit bounding box covering the whole input. If false, raise an error. 3370 name: A name for the operation (optional). 3371 3372 Returns: 3373 A tuple of `Tensor` objects (begin, size, bboxes). 3374 3375 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3376 `[offset_height, offset_width, 0]`. Provide as input to 3377 `tf.slice`. 3378 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3379 `[target_height, target_width, -1]`. Provide as input to 3380 `tf.slice`. 3381 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 3382 the distorted bounding box. 3383 Provide as input to `tf.image.draw_bounding_boxes`. 3384 """ 3385 seed1, seed2 = random_seed.get_seed(seed) if seed else (0, 0) 3386 with ops.name_scope(name, 'sample_distorted_bounding_box'): 3387 return gen_image_ops.sample_distorted_bounding_box_v2( 3388 image_size, 3389 bounding_boxes, 3390 seed=seed1, 3391 seed2=seed2, 3392 min_object_covered=min_object_covered, 3393 aspect_ratio_range=aspect_ratio_range, 3394 area_range=area_range, 3395 max_attempts=max_attempts, 3396 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 3397 name=name) 3398 3399 3400@tf_export('image.stateless_sample_distorted_bounding_box', v1=[]) 3401@dispatch.add_dispatch_support 3402def stateless_sample_distorted_bounding_box(image_size, 3403 bounding_boxes, 3404 seed, 3405 min_object_covered=0.1, 3406 aspect_ratio_range=None, 3407 area_range=None, 3408 max_attempts=None, 3409 use_image_if_no_bounding_boxes=None, 3410 name=None): 3411 """Generate a randomly distorted bounding box for an image deterministically. 3412 3413 Bounding box annotations are often supplied in addition to ground-truth labels 3414 in image recognition or object localization tasks. A common technique for 3415 training such a system is to randomly distort an image while preserving 3416 its content, i.e. *data augmentation*. This Op, given the same `seed`, 3417 deterministically outputs a randomly distorted localization of an object, i.e. 3418 bounding box, given an `image_size`, `bounding_boxes` and a series of 3419 constraints. 3420 3421 The output of this Op is a single bounding box that may be used to crop the 3422 original image. The output is returned as 3 tensors: `begin`, `size` and 3423 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 3424 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 3425 visualize what the bounding box looks like. 3426 3427 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 3428 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 3429 and the height of the underlying image. 3430 3431 The output of this Op is guaranteed to be the same given the same `seed` and 3432 is independent of how many times the function is called, and independent of 3433 global seed settings (e.g. `tf.random.set_seed`). 3434 3435 Example usage: 3436 3437 >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]]) 3438 >>> bbox = tf.constant( 3439 ... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) 3440 >>> seed = (1, 2) 3441 >>> # Generate a single distorted bounding box. 3442 >>> bbox_begin, bbox_size, bbox_draw = ( 3443 ... tf.image.stateless_sample_distorted_bounding_box( 3444 ... tf.shape(image), bounding_boxes=bbox, seed=seed)) 3445 >>> # Employ the bounding box to distort the image. 3446 >>> tf.slice(image, bbox_begin, bbox_size) 3447 <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy= 3448 array([[[1], 3449 [2]], 3450 [[4], 3451 [5]]])> 3452 >>> # Draw the bounding box in an image summary. 3453 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 3454 >>> tf.image.draw_bounding_boxes( 3455 ... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors) 3456 <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy= 3457 array([[[[1.], 3458 [1.], 3459 [3.]], 3460 [[1.], 3461 [1.], 3462 [6.]], 3463 [[7.], 3464 [8.], 3465 [9.]]]], dtype=float32)> 3466 3467 Note that if no bounding box information is available, setting 3468 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit 3469 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 3470 false and no bounding boxes are supplied, an error is raised. 3471 3472 Args: 3473 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 3474 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 3475 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 3476 describing the N bounding boxes associated with the image. 3477 seed: A shape [2] Tensor, the seed to the random number generator. Must have 3478 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 3479 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 3480 cropped area of the image must contain at least this fraction of any 3481 bounding box supplied. The value of this parameter should be non-negative. 3482 In the case of 0, the cropped area does not need to overlap any of the 3483 bounding boxes supplied. 3484 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 3485 1.33]`. The cropped area of the image must have an aspect `ratio = width / 3486 height` within this range. 3487 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 3488 cropped area of the image must contain a fraction of the supplied image 3489 within this range. 3490 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 3491 generating a cropped region of the image of the specified constraints. 3492 After `max_attempts` failures, return the entire image. 3493 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 3494 Controls behavior if no bounding boxes supplied. If true, assume an 3495 implicit bounding box covering the whole input. If false, raise an error. 3496 name: A name for the operation (optional). 3497 3498 Returns: 3499 A tuple of `Tensor` objects (begin, size, bboxes). 3500 3501 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3502 `[offset_height, offset_width, 0]`. Provide as input to 3503 `tf.slice`. 3504 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3505 `[target_height, target_width, -1]`. Provide as input to 3506 `tf.slice`. 3507 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 3508 the distorted bounding box. 3509 Provide as input to `tf.image.draw_bounding_boxes`. 3510 """ 3511 with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'): 3512 return gen_image_ops.stateless_sample_distorted_bounding_box( 3513 image_size=image_size, 3514 bounding_boxes=bounding_boxes, 3515 seed=seed, 3516 min_object_covered=min_object_covered, 3517 aspect_ratio_range=aspect_ratio_range, 3518 area_range=area_range, 3519 max_attempts=max_attempts, 3520 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 3521 name=name) 3522 3523 3524@tf_export(v1=['image.sample_distorted_bounding_box']) 3525@dispatch.add_dispatch_support 3526@deprecation.deprecated( 3527 date=None, 3528 instructions='`seed2` arg is deprecated.' 3529 'Use sample_distorted_bounding_box_v2 instead.') 3530def sample_distorted_bounding_box(image_size, 3531 bounding_boxes, 3532 seed=None, 3533 seed2=None, 3534 min_object_covered=0.1, 3535 aspect_ratio_range=None, 3536 area_range=None, 3537 max_attempts=None, 3538 use_image_if_no_bounding_boxes=None, 3539 name=None): 3540 """Generate a single randomly distorted bounding box for an image. 3541 3542 Bounding box annotations are often supplied in addition to ground-truth labels 3543 in image recognition or object localization tasks. A common technique for 3544 training such a system is to randomly distort an image while preserving 3545 its content, i.e. *data augmentation*. This Op outputs a randomly distorted 3546 localization of an object, i.e. bounding box, given an `image_size`, 3547 `bounding_boxes` and a series of constraints. 3548 3549 The output of this Op is a single bounding box that may be used to crop the 3550 original image. The output is returned as 3 tensors: `begin`, `size` and 3551 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 3552 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 3553 visualize what the bounding box looks like. 3554 3555 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 3556 The 3557 bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and 3558 height of the underlying image. 3559 3560 For example, 3561 3562 ```python 3563 # Generate a single distorted bounding box. 3564 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( 3565 tf.shape(image), 3566 bounding_boxes=bounding_boxes, 3567 min_object_covered=0.1) 3568 3569 # Draw the bounding box in an image summary. 3570 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), 3571 bbox_for_draw) 3572 tf.compat.v1.summary.image('images_with_box', image_with_box) 3573 3574 # Employ the bounding box to distort the image. 3575 distorted_image = tf.slice(image, begin, size) 3576 ``` 3577 3578 Note that if no bounding box information is available, setting 3579 `use_image_if_no_bounding_boxes = True` will assume there is a single implicit 3580 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 3581 false and no bounding boxes are supplied, an error is raised. 3582 3583 Args: 3584 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 3585 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 3586 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 3587 describing the N bounding boxes associated with the image. 3588 seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are 3589 set to non-zero, the random number generator is seeded by the given 3590 `seed`. Otherwise, it is seeded by a random seed. 3591 seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed 3592 collision. 3593 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 3594 cropped area of the image must contain at least this fraction of any 3595 bounding box supplied. The value of this parameter should be non-negative. 3596 In the case of 0, the cropped area does not need to overlap any of the 3597 bounding boxes supplied. 3598 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 3599 1.33]`. The cropped area of the image must have an aspect ratio = width / 3600 height within this range. 3601 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 3602 cropped area of the image must contain a fraction of the supplied image 3603 within this range. 3604 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 3605 generating a cropped region of the image of the specified constraints. 3606 After `max_attempts` failures, return the entire image. 3607 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 3608 Controls behavior if no bounding boxes supplied. If true, assume an 3609 implicit bounding box covering the whole input. If false, raise an error. 3610 name: A name for the operation (optional). 3611 3612 Returns: 3613 A tuple of `Tensor` objects (begin, size, bboxes). 3614 3615 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3616 `[offset_height, offset_width, 0]`. Provide as input to 3617 `tf.slice`. 3618 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3619 `[target_height, target_width, -1]`. Provide as input to 3620 `tf.slice`. 3621 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 3622 the distorted bounding box. 3623 Provide as input to `tf.image.draw_bounding_boxes`. 3624 """ 3625 with ops.name_scope(name, 'sample_distorted_bounding_box'): 3626 return gen_image_ops.sample_distorted_bounding_box_v2( 3627 image_size, 3628 bounding_boxes, 3629 seed=seed, 3630 seed2=seed2, 3631 min_object_covered=min_object_covered, 3632 aspect_ratio_range=aspect_ratio_range, 3633 area_range=area_range, 3634 max_attempts=max_attempts, 3635 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 3636 name=name) 3637 3638 3639@tf_export('image.non_max_suppression') 3640@dispatch.add_dispatch_support 3641def non_max_suppression(boxes, 3642 scores, 3643 max_output_size, 3644 iou_threshold=0.5, 3645 score_threshold=float('-inf'), 3646 name=None): 3647 """Greedily selects a subset of bounding boxes in descending order of score. 3648 3649 Prunes away boxes that have high intersection-over-union (IOU) overlap 3650 with previously selected boxes. Bounding boxes are supplied as 3651 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 3652 diagonal pair of box corners and the coordinates can be provided as normalized 3653 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm 3654 is agnostic to where the origin is in the coordinate system. Note that this 3655 algorithm is invariant to orthogonal transformations and translations 3656 of the coordinate system; thus translating or reflections of the coordinate 3657 system result in the same boxes being selected by the algorithm. 3658 The output of this operation is a set of integers indexing into the input 3659 collection of bounding boxes representing the selected boxes. The bounding 3660 box coordinates corresponding to the selected indices can then be obtained 3661 using the `tf.gather` operation. For example: 3662 ```python 3663 selected_indices = tf.image.non_max_suppression( 3664 boxes, scores, max_output_size, iou_threshold) 3665 selected_boxes = tf.gather(boxes, selected_indices) 3666 ``` 3667 3668 Args: 3669 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 3670 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 3671 score corresponding to each box (each row of boxes). 3672 max_output_size: A scalar integer `Tensor` representing the maximum number 3673 of boxes to be selected by non-max suppression. 3674 iou_threshold: A 0-D float tensor representing the threshold for deciding 3675 whether boxes overlap too much with respect to IOU. 3676 score_threshold: A 0-D float tensor representing the threshold for deciding 3677 when to remove boxes based on score. 3678 name: A name for the operation (optional). 3679 3680 Returns: 3681 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 3682 selected indices from the boxes tensor, where `M <= max_output_size`. 3683 """ 3684 with ops.name_scope(name, 'non_max_suppression'): 3685 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 3686 score_threshold = ops.convert_to_tensor( 3687 score_threshold, name='score_threshold') 3688 return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size, 3689 iou_threshold, score_threshold) 3690 3691 3692@tf_export('image.non_max_suppression_with_scores') 3693@dispatch.add_dispatch_support 3694def non_max_suppression_with_scores(boxes, 3695 scores, 3696 max_output_size, 3697 iou_threshold=0.5, 3698 score_threshold=float('-inf'), 3699 soft_nms_sigma=0.0, 3700 name=None): 3701 """Greedily selects a subset of bounding boxes in descending order of score. 3702 3703 Prunes away boxes that have high intersection-over-union (IOU) overlap 3704 with previously selected boxes. Bounding boxes are supplied as 3705 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 3706 diagonal pair of box corners and the coordinates can be provided as normalized 3707 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm 3708 is agnostic to where the origin is in the coordinate system. Note that this 3709 algorithm is invariant to orthogonal transformations and translations 3710 of the coordinate system; thus translating or reflections of the coordinate 3711 system result in the same boxes being selected by the algorithm. 3712 The output of this operation is a set of integers indexing into the input 3713 collection of bounding boxes representing the selected boxes. The bounding 3714 box coordinates corresponding to the selected indices can then be obtained 3715 using the `tf.gather` operation. For example: 3716 ```python 3717 selected_indices, selected_scores = tf.image.non_max_suppression_padded( 3718 boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1, 3719 soft_nms_sigma=0.5) 3720 selected_boxes = tf.gather(boxes, selected_indices) 3721 ``` 3722 3723 This function generalizes the `tf.image.non_max_suppression` op by also 3724 supporting a Soft-NMS (with Gaussian weighting) mode (c.f. 3725 Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score 3726 of other overlapping boxes instead of directly causing them to be pruned. 3727 Consequently, in contrast to `tf.image.non_max_suppression`, 3728 `tf.image.non_max_suppression_padded` returns the new scores of each input box 3729 in the second output, `selected_scores`. 3730 3731 To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be 3732 larger than 0. When `soft_nms_sigma` equals 0, the behavior of 3733 `tf.image.non_max_suppression_padded` is identical to that of 3734 `tf.image.non_max_suppression` (except for the extra output) both in function 3735 and in running time. 3736 3737 Args: 3738 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 3739 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 3740 score corresponding to each box (each row of boxes). 3741 max_output_size: A scalar integer `Tensor` representing the maximum number 3742 of boxes to be selected by non-max suppression. 3743 iou_threshold: A 0-D float tensor representing the threshold for deciding 3744 whether boxes overlap too much with respect to IOU. 3745 score_threshold: A 0-D float tensor representing the threshold for deciding 3746 when to remove boxes based on score. 3747 soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft 3748 NMS; see Bodla et al (c.f. https://arxiv.org/abs/1704.04503). When 3749 `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) 3750 NMS. 3751 name: A name for the operation (optional). 3752 3753 Returns: 3754 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 3755 selected indices from the boxes tensor, where `M <= max_output_size`. 3756 selected_scores: A 1-D float tensor of shape `[M]` representing the 3757 corresponding scores for each selected box, where `M <= max_output_size`. 3758 Scores only differ from corresponding input scores when using Soft NMS 3759 (i.e. when `soft_nms_sigma>0`) 3760 """ 3761 with ops.name_scope(name, 'non_max_suppression_with_scores'): 3762 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 3763 score_threshold = ops.convert_to_tensor( 3764 score_threshold, name='score_threshold') 3765 soft_nms_sigma = ops.convert_to_tensor( 3766 soft_nms_sigma, name='soft_nms_sigma') 3767 (selected_indices, selected_scores, 3768 _) = gen_image_ops.non_max_suppression_v5( 3769 boxes, 3770 scores, 3771 max_output_size, 3772 iou_threshold, 3773 score_threshold, 3774 soft_nms_sigma, 3775 pad_to_max_output_size=False) 3776 return selected_indices, selected_scores 3777 3778 3779@tf_export('image.non_max_suppression_overlaps') 3780@dispatch.add_dispatch_support 3781def non_max_suppression_with_overlaps(overlaps, 3782 scores, 3783 max_output_size, 3784 overlap_threshold=0.5, 3785 score_threshold=float('-inf'), 3786 name=None): 3787 """Greedily selects a subset of bounding boxes in descending order of score. 3788 3789 Prunes away boxes that have high overlap with previously selected boxes. 3790 N-by-n overlap values are supplied as square matrix. 3791 The output of this operation is a set of integers indexing into the input 3792 collection of bounding boxes representing the selected boxes. The bounding 3793 box coordinates corresponding to the selected indices can then be obtained 3794 using the `tf.gather` operation. For example: 3795 ```python 3796 selected_indices = tf.image.non_max_suppression_overlaps( 3797 overlaps, scores, max_output_size, iou_threshold) 3798 selected_boxes = tf.gather(boxes, selected_indices) 3799 ``` 3800 3801 Args: 3802 overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]` 3803 representing the n-by-n box overlap values. 3804 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 3805 score corresponding to each box (each row of boxes). 3806 max_output_size: A scalar integer `Tensor` representing the maximum number 3807 of boxes to be selected by non-max suppression. 3808 overlap_threshold: A 0-D float tensor representing the threshold for 3809 deciding whether boxes overlap too much with respect to the provided 3810 overlap values. 3811 score_threshold: A 0-D float tensor representing the threshold for deciding 3812 when to remove boxes based on score. 3813 name: A name for the operation (optional). 3814 3815 Returns: 3816 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 3817 selected indices from the overlaps tensor, where `M <= max_output_size`. 3818 """ 3819 with ops.name_scope(name, 'non_max_suppression_overlaps'): 3820 overlap_threshold = ops.convert_to_tensor( 3821 overlap_threshold, name='overlap_threshold') 3822 # pylint: disable=protected-access 3823 return gen_image_ops.non_max_suppression_with_overlaps( 3824 overlaps, scores, max_output_size, overlap_threshold, score_threshold) 3825 # pylint: enable=protected-access 3826 3827 3828_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115], 3829 [0.587, -0.27455667, -0.52273617], 3830 [0.114, -0.32134392, 0.31119955]] 3831 3832 3833@tf_export('image.rgb_to_yiq') 3834@dispatch.add_dispatch_support 3835def rgb_to_yiq(images): 3836 """Converts one or more images from RGB to YIQ. 3837 3838 Outputs a tensor of the same shape as the `images` tensor, containing the YIQ 3839 value of the pixels. 3840 The output is only well defined if the value in images are in [0,1]. 3841 3842 Usage Example: 3843 3844 >>> x = tf.constant([[[1.0, 2.0, 3.0]]]) 3845 >>> tf.image.rgb_to_yiq(x) 3846 <tf.Tensor: shape=(1, 1, 3), dtype=float32, 3847 numpy=array([[[ 1.815 , -0.91724455, 0.09962624]]], dtype=float32)> 3848 3849 Args: 3850 images: 2-D or higher rank. Image data to convert. Last dimension must be 3851 size 3. 3852 3853 Returns: 3854 images: tensor with the same shape as `images`. 3855 """ 3856 images = ops.convert_to_tensor(images, name='images') 3857 kernel = ops.convert_to_tensor( 3858 _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel') 3859 ndims = images.get_shape().ndims 3860 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 3861 3862 3863_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021], 3864 [0.6208248, -0.64720424, 1.70423049]] 3865 3866 3867@tf_export('image.yiq_to_rgb') 3868@dispatch.add_dispatch_support 3869def yiq_to_rgb(images): 3870 """Converts one or more images from YIQ to RGB. 3871 3872 Outputs a tensor of the same shape as the `images` tensor, containing the RGB 3873 value of the pixels. 3874 The output is only well defined if the Y value in images are in [0,1], 3875 I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226]. 3876 3877 Args: 3878 images: 2-D or higher rank. Image data to convert. Last dimension must be 3879 size 3. 3880 3881 Returns: 3882 images: tensor with the same shape as `images`. 3883 """ 3884 images = ops.convert_to_tensor(images, name='images') 3885 kernel = ops.convert_to_tensor( 3886 _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel') 3887 ndims = images.get_shape().ndims 3888 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 3889 3890 3891_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538], 3892 [0.587, -0.28886916, -0.51496512], 3893 [0.114, 0.43601035, -0.10001026]] 3894 3895 3896@tf_export('image.rgb_to_yuv') 3897@dispatch.add_dispatch_support 3898def rgb_to_yuv(images): 3899 """Converts one or more images from RGB to YUV. 3900 3901 Outputs a tensor of the same shape as the `images` tensor, containing the YUV 3902 value of the pixels. 3903 The output is only well defined if the value in images are in [0, 1]. 3904 There are two ways of representing an image: [0, 255] pixel values range or 3905 [0, 1] (as float) pixel values range. Users need to convert the input image 3906 into a float [0, 1] range. 3907 3908 Args: 3909 images: 2-D or higher rank. Image data to convert. Last dimension must be 3910 size 3. 3911 3912 Returns: 3913 images: tensor with the same shape as `images`. 3914 """ 3915 images = ops.convert_to_tensor(images, name='images') 3916 kernel = ops.convert_to_tensor( 3917 _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel') 3918 ndims = images.get_shape().ndims 3919 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 3920 3921 3922_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185], 3923 [1.13988303, -0.58062185, 0]] 3924 3925 3926@tf_export('image.yuv_to_rgb') 3927@dispatch.add_dispatch_support 3928def yuv_to_rgb(images): 3929 """Converts one or more images from YUV to RGB. 3930 3931 Outputs a tensor of the same shape as the `images` tensor, containing the RGB 3932 value of the pixels. 3933 The output is only well defined if the Y value in images are in [0,1], 3934 U and V value are in [-0.5,0.5]. 3935 3936 As per the above description, you need to scale your YUV images if their 3937 pixel values are not in the required range. Below given example illustrates 3938 preprocessing of each channel of images before feeding them to `yuv_to_rgb`. 3939 3940 ```python 3941 yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255) 3942 last_dimension_axis = len(yuv_images.shape) - 1 3943 yuv_tensor_images = tf.truediv( 3944 tf.subtract( 3945 yuv_images, 3946 tf.reduce_min(yuv_images) 3947 ), 3948 tf.subtract( 3949 tf.reduce_max(yuv_images), 3950 tf.reduce_min(yuv_images) 3951 ) 3952 ) 3953 y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis) 3954 target_uv_min, target_uv_max = -0.5, 0.5 3955 u = u * (target_uv_max - target_uv_min) + target_uv_min 3956 v = v * (target_uv_max - target_uv_min) + target_uv_min 3957 preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis) 3958 rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images) 3959 ``` 3960 3961 Args: 3962 images: 2-D or higher rank. Image data to convert. Last dimension must be 3963 size 3. 3964 3965 Returns: 3966 images: tensor with the same shape as `images`. 3967 """ 3968 images = ops.convert_to_tensor(images, name='images') 3969 kernel = ops.convert_to_tensor( 3970 _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel') 3971 ndims = images.get_shape().ndims 3972 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 3973 3974 3975def _verify_compatible_image_shapes(img1, img2): 3976 """Checks if two image tensors are compatible for applying SSIM or PSNR. 3977 3978 This function checks if two sets of images have ranks at least 3, and if the 3979 last three dimensions match. 3980 3981 Args: 3982 img1: Tensor containing the first image batch. 3983 img2: Tensor containing the second image batch. 3984 3985 Returns: 3986 A tuple containing: the first tensor shape, the second tensor shape, and a 3987 list of control_flow_ops.Assert() ops implementing the checks. 3988 3989 Raises: 3990 ValueError: When static shape check fails. 3991 """ 3992 shape1 = img1.get_shape().with_rank_at_least(3) 3993 shape2 = img2.get_shape().with_rank_at_least(3) 3994 shape1[-3:].assert_is_compatible_with(shape2[-3:]) 3995 3996 if shape1.ndims is not None and shape2.ndims is not None: 3997 for dim1, dim2 in zip( 3998 reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])): 3999 if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)): 4000 raise ValueError('Two images are not compatible: %s and %s' % 4001 (shape1, shape2)) 4002 4003 # Now assign shape tensors. 4004 shape1, shape2 = array_ops.shape_n([img1, img2]) 4005 4006 # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable. 4007 checks = [] 4008 checks.append( 4009 control_flow_ops.Assert( 4010 math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2], 4011 summarize=10)) 4012 checks.append( 4013 control_flow_ops.Assert( 4014 math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])), 4015 [shape1, shape2], 4016 summarize=10)) 4017 return shape1, shape2, checks 4018 4019 4020@tf_export('image.psnr') 4021@dispatch.add_dispatch_support 4022def psnr(a, b, max_val, name=None): 4023 """Returns the Peak Signal-to-Noise Ratio between a and b. 4024 4025 This is intended to be used on signals (or images). Produces a PSNR value for 4026 each image in batch. 4027 4028 The last three dimensions of input are expected to be [height, width, depth]. 4029 4030 Example: 4031 4032 ```python 4033 # Read images from file. 4034 im1 = tf.decode_png('path/to/im1.png') 4035 im2 = tf.decode_png('path/to/im2.png') 4036 # Compute PSNR over tf.uint8 Tensors. 4037 psnr1 = tf.image.psnr(im1, im2, max_val=255) 4038 4039 # Compute PSNR over tf.float32 Tensors. 4040 im1 = tf.image.convert_image_dtype(im1, tf.float32) 4041 im2 = tf.image.convert_image_dtype(im2, tf.float32) 4042 psnr2 = tf.image.psnr(im1, im2, max_val=1.0) 4043 # psnr1 and psnr2 both have type tf.float32 and are almost equal. 4044 ``` 4045 4046 Args: 4047 a: First set of images. 4048 b: Second set of images. 4049 max_val: The dynamic range of the images (i.e., the difference between the 4050 maximum the and minimum allowed values). 4051 name: Namespace to embed the computation in. 4052 4053 Returns: 4054 The scalar PSNR between a and b. The returned tensor has type `tf.float32` 4055 and shape [batch_size, 1]. 4056 """ 4057 with ops.name_scope(name, 'PSNR', [a, b]): 4058 # Need to convert the images to float32. Scale max_val accordingly so that 4059 # PSNR is computed correctly. 4060 max_val = math_ops.cast(max_val, a.dtype) 4061 max_val = convert_image_dtype(max_val, dtypes.float32) 4062 a = convert_image_dtype(a, dtypes.float32) 4063 b = convert_image_dtype(b, dtypes.float32) 4064 mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1]) 4065 psnr_val = math_ops.subtract( 4066 20 * math_ops.log(max_val) / math_ops.log(10.0), 4067 np.float32(10 / np.log(10)) * math_ops.log(mse), 4068 name='psnr') 4069 4070 _, _, checks = _verify_compatible_image_shapes(a, b) 4071 with ops.control_dependencies(checks): 4072 return array_ops.identity(psnr_val) 4073 4074 4075def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03): 4076 r"""Helper function for computing SSIM. 4077 4078 SSIM estimates covariances with weighted sums. The default parameters 4079 use a biased estimate of the covariance: 4080 Suppose `reducer` is a weighted sum, then the mean estimators are 4081 \mu_x = \sum_i w_i x_i, 4082 \mu_y = \sum_i w_i y_i, 4083 where w_i's are the weighted-sum weights, and covariance estimator is 4084 cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) 4085 with assumption \sum_i w_i = 1. This covariance estimator is biased, since 4086 E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y). 4087 For SSIM measure with unbiased covariance estimators, pass as `compensation` 4088 argument (1 - \sum_i w_i ^ 2). 4089 4090 Args: 4091 x: First set of images. 4092 y: Second set of images. 4093 reducer: Function that computes 'local' averages from the set of images. For 4094 non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and 4095 for convolutional version, this is usually tf.nn.avg_pool2d or 4096 tf.nn.conv2d with weighted-sum kernel. 4097 max_val: The dynamic range (i.e., the difference between the maximum 4098 possible allowed value and the minimum allowed value). 4099 compensation: Compensation factor. See above. 4100 k1: Default value 0.01 4101 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4102 it would be better if we took the values in the range of 0 < K2 < 0.4). 4103 4104 Returns: 4105 A pair containing the luminance measure, and the contrast-structure measure. 4106 """ 4107 4108 c1 = (k1 * max_val)**2 4109 c2 = (k2 * max_val)**2 4110 4111 # SSIM luminance measure is 4112 # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1). 4113 mean0 = reducer(x) 4114 mean1 = reducer(y) 4115 num0 = mean0 * mean1 * 2.0 4116 den0 = math_ops.square(mean0) + math_ops.square(mean1) 4117 luminance = (num0 + c1) / (den0 + c1) 4118 4119 # SSIM contrast-structure measure is 4120 # (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2). 4121 # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then 4122 # cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) 4123 # = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j). 4124 num1 = reducer(x * y) * 2.0 4125 den1 = reducer(math_ops.square(x) + math_ops.square(y)) 4126 c2 *= compensation 4127 cs = (num1 - num0 + c2) / (den1 - den0 + c2) 4128 4129 # SSIM score is the product of the luminance and contrast-structure measures. 4130 return luminance, cs 4131 4132 4133def _fspecial_gauss(size, sigma): 4134 """Function to mimic the 'fspecial' gaussian MATLAB function.""" 4135 size = ops.convert_to_tensor(size, dtypes.int32) 4136 sigma = ops.convert_to_tensor(sigma) 4137 4138 coords = math_ops.cast(math_ops.range(size), sigma.dtype) 4139 coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0 4140 4141 g = math_ops.square(coords) 4142 g *= -0.5 / math_ops.square(sigma) 4143 4144 g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1]) 4145 g = array_ops.reshape(g, shape=[1, -1]) # For tf.nn.softmax(). 4146 g = nn_ops.softmax(g) 4147 return array_ops.reshape(g, shape=[size, size, 1, 1]) 4148 4149 4150def _ssim_per_channel(img1, 4151 img2, 4152 max_val=1.0, 4153 filter_size=11, 4154 filter_sigma=1.5, 4155 k1=0.01, 4156 k2=0.03): 4157 """Computes SSIM index between img1 and img2 per color channel. 4158 4159 This function matches the standard SSIM implementation from: 4160 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image 4161 quality assessment: from error visibility to structural similarity. IEEE 4162 transactions on image processing. 4163 4164 Details: 4165 - 11x11 Gaussian filter of width 1.5 is used. 4166 - k1 = 0.01, k2 = 0.03 as in the original paper. 4167 4168 Args: 4169 img1: First image batch. 4170 img2: Second image batch. 4171 max_val: The dynamic range of the images (i.e., the difference between the 4172 maximum the and minimum allowed values). 4173 filter_size: Default value 11 (size of gaussian filter). 4174 filter_sigma: Default value 1.5 (width of gaussian filter). 4175 k1: Default value 0.01 4176 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4177 it would be better if we took the values in the range of 0 < K2 < 0.4). 4178 4179 Returns: 4180 A pair of tensors containing and channel-wise SSIM and contrast-structure 4181 values. The shape is [..., channels]. 4182 """ 4183 filter_size = constant_op.constant(filter_size, dtype=dtypes.int32) 4184 filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype) 4185 4186 shape1, shape2 = array_ops.shape_n([img1, img2]) 4187 checks = [ 4188 control_flow_ops.Assert( 4189 math_ops.reduce_all( 4190 math_ops.greater_equal(shape1[-3:-1], filter_size)), 4191 [shape1, filter_size], 4192 summarize=8), 4193 control_flow_ops.Assert( 4194 math_ops.reduce_all( 4195 math_ops.greater_equal(shape2[-3:-1], filter_size)), 4196 [shape2, filter_size], 4197 summarize=8) 4198 ] 4199 4200 # Enforce the check to run before computation. 4201 with ops.control_dependencies(checks): 4202 img1 = array_ops.identity(img1) 4203 4204 # TODO(sjhwang): Try to cache kernels and compensation factor. 4205 kernel = _fspecial_gauss(filter_size, filter_sigma) 4206 kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) 4207 4208 # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`, 4209 # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead. 4210 compensation = 1.0 4211 4212 # TODO(sjhwang): Try FFT. 4213 # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying 4214 # 1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter. 4215 def reducer(x): 4216 shape = array_ops.shape(x) 4217 x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) 4218 y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') 4219 return array_ops.reshape( 4220 y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) 4221 4222 luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1, 4223 k2) 4224 4225 # Average over the second and the third from the last: height, width. 4226 axes = constant_op.constant([-3, -2], dtype=dtypes.int32) 4227 ssim_val = math_ops.reduce_mean(luminance * cs, axes) 4228 cs = math_ops.reduce_mean(cs, axes) 4229 return ssim_val, cs 4230 4231 4232@tf_export('image.ssim') 4233@dispatch.add_dispatch_support 4234def ssim(img1, 4235 img2, 4236 max_val, 4237 filter_size=11, 4238 filter_sigma=1.5, 4239 k1=0.01, 4240 k2=0.03): 4241 """Computes SSIM index between img1 and img2. 4242 4243 This function is based on the standard SSIM implementation from: 4244 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image 4245 quality assessment: from error visibility to structural similarity. IEEE 4246 transactions on image processing. 4247 4248 Note: The true SSIM is only defined on grayscale. This function does not 4249 perform any colorspace transform. (If the input is already YUV, then it will 4250 compute YUV SSIM average.) 4251 4252 Details: 4253 - 11x11 Gaussian filter of width 1.5 is used. 4254 - k1 = 0.01, k2 = 0.03 as in the original paper. 4255 4256 The image sizes must be at least 11x11 because of the filter size. 4257 4258 Example: 4259 4260 ```python 4261 # Read images (of size 255 x 255) from file. 4262 im1 = tf.image.decode_image(tf.io.read_file('path/to/im1.png')) 4263 im2 = tf.image.decode_image(tf.io.read_file('path/to/im2.png')) 4264 tf.shape(im1) # `img1.png` has 3 channels; shape is `(255, 255, 3)` 4265 tf.shape(im2) # `img2.png` has 3 channels; shape is `(255, 255, 3)` 4266 # Add an outer batch for each image. 4267 im1 = tf.expand_dims(im1, axis=0) 4268 im2 = tf.expand_dims(im2, axis=0) 4269 # Compute SSIM over tf.uint8 Tensors. 4270 ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11, 4271 filter_sigma=1.5, k1=0.01, k2=0.03) 4272 4273 # Compute SSIM over tf.float32 Tensors. 4274 im1 = tf.image.convert_image_dtype(im1, tf.float32) 4275 im2 = tf.image.convert_image_dtype(im2, tf.float32) 4276 ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11, 4277 filter_sigma=1.5, k1=0.01, k2=0.03) 4278 # ssim1 and ssim2 both have type tf.float32 and are almost equal. 4279 ``` 4280 4281 Args: 4282 img1: First image batch. 4-D Tensor of shape `[batch, height, width, 4283 channels]` with only Positive Pixel Values. 4284 img2: Second image batch. 4-D Tensor of shape `[batch, height, width, 4285 channels]` with only Positive Pixel Values. 4286 max_val: The dynamic range of the images (i.e., the difference between the 4287 maximum the and minimum allowed values). 4288 filter_size: Default value 11 (size of gaussian filter). 4289 filter_sigma: Default value 1.5 (width of gaussian filter). 4290 k1: Default value 0.01 4291 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4292 it would be better if we took the values in the range of 0 < K2 < 0.4). 4293 4294 Returns: 4295 A tensor containing an SSIM value for each image in batch. Returned SSIM 4296 values are in range (-1, 1], when pixel values are non-negative. Returns 4297 a tensor with shape: broadcast(img1.shape[:-3], img2.shape[:-3]). 4298 """ 4299 with ops.name_scope(None, 'SSIM', [img1, img2]): 4300 # Convert to tensor if needed. 4301 img1 = ops.convert_to_tensor(img1, name='img1') 4302 img2 = ops.convert_to_tensor(img2, name='img2') 4303 # Shape checking. 4304 _, _, checks = _verify_compatible_image_shapes(img1, img2) 4305 with ops.control_dependencies(checks): 4306 img1 = array_ops.identity(img1) 4307 4308 # Need to convert the images to float32. Scale max_val accordingly so that 4309 # SSIM is computed correctly. 4310 max_val = math_ops.cast(max_val, img1.dtype) 4311 max_val = convert_image_dtype(max_val, dtypes.float32) 4312 img1 = convert_image_dtype(img1, dtypes.float32) 4313 img2 = convert_image_dtype(img2, dtypes.float32) 4314 ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size, 4315 filter_sigma, k1, k2) 4316 # Compute average over color channels. 4317 return math_ops.reduce_mean(ssim_per_channel, [-1]) 4318 4319 4320# Default values obtained by Wang et al. 4321_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333) 4322 4323 4324@tf_export('image.ssim_multiscale') 4325@dispatch.add_dispatch_support 4326def ssim_multiscale(img1, 4327 img2, 4328 max_val, 4329 power_factors=_MSSSIM_WEIGHTS, 4330 filter_size=11, 4331 filter_sigma=1.5, 4332 k1=0.01, 4333 k2=0.03): 4334 """Computes the MS-SSIM between img1 and img2. 4335 4336 This function assumes that `img1` and `img2` are image batches, i.e. the last 4337 three dimensions are [height, width, channels]. 4338 4339 Note: The true SSIM is only defined on grayscale. This function does not 4340 perform any colorspace transform. (If the input is already YUV, then it will 4341 compute YUV SSIM average.) 4342 4343 Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale 4344 structural similarity for image quality assessment." Signals, Systems and 4345 Computers, 2004. 4346 4347 Args: 4348 img1: First image batch with only Positive Pixel Values. 4349 img2: Second image batch with only Positive Pixel Values. Must have the 4350 same rank as img1. 4351 max_val: The dynamic range of the images (i.e., the difference between the 4352 maximum the and minimum allowed values). 4353 power_factors: Iterable of weights for each of the scales. The number of 4354 scales used is the length of the list. Index 0 is the unscaled 4355 resolution's weight and each increasing scale corresponds to the image 4356 being downsampled by 2. Defaults to (0.0448, 0.2856, 0.3001, 0.2363, 4357 0.1333), which are the values obtained in the original paper. 4358 filter_size: Default value 11 (size of gaussian filter). 4359 filter_sigma: Default value 1.5 (width of gaussian filter). 4360 k1: Default value 0.01 4361 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4362 it would be better if we took the values in the range of 0 < K2 < 0.4). 4363 4364 Returns: 4365 A tensor containing an MS-SSIM value for each image in batch. The values 4366 are in range [0, 1]. Returns a tensor with shape: 4367 broadcast(img1.shape[:-3], img2.shape[:-3]). 4368 """ 4369 with ops.name_scope(None, 'MS-SSIM', [img1, img2]): 4370 # Convert to tensor if needed. 4371 img1 = ops.convert_to_tensor(img1, name='img1') 4372 img2 = ops.convert_to_tensor(img2, name='img2') 4373 # Shape checking. 4374 shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2) 4375 with ops.control_dependencies(checks): 4376 img1 = array_ops.identity(img1) 4377 4378 # Need to convert the images to float32. Scale max_val accordingly so that 4379 # SSIM is computed correctly. 4380 max_val = math_ops.cast(max_val, img1.dtype) 4381 max_val = convert_image_dtype(max_val, dtypes.float32) 4382 img1 = convert_image_dtype(img1, dtypes.float32) 4383 img2 = convert_image_dtype(img2, dtypes.float32) 4384 4385 imgs = [img1, img2] 4386 shapes = [shape1, shape2] 4387 4388 # img1 and img2 are assumed to be a (multi-dimensional) batch of 4389 # 3-dimensional images (height, width, channels). `heads` contain the batch 4390 # dimensions, and `tails` contain the image dimensions. 4391 heads = [s[:-3] for s in shapes] 4392 tails = [s[-3:] for s in shapes] 4393 4394 divisor = [1, 2, 2, 1] 4395 divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32) 4396 4397 def do_pad(images, remainder): 4398 padding = array_ops.expand_dims(remainder, -1) 4399 padding = array_ops.pad(padding, [[1, 0], [1, 0]]) 4400 return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images] 4401 4402 mcs = [] 4403 for k in range(len(power_factors)): 4404 with ops.name_scope(None, 'Scale%d' % k, imgs): 4405 if k > 0: 4406 # Avg pool takes rank 4 tensors. Flatten leading dimensions. 4407 flat_imgs = [ 4408 array_ops.reshape(x, array_ops.concat([[-1], t], 0)) 4409 for x, t in zip(imgs, tails) 4410 ] 4411 4412 remainder = tails[0] % divisor_tensor 4413 need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0)) 4414 # pylint: disable=cell-var-from-loop 4415 padded = control_flow_ops.cond(need_padding, 4416 lambda: do_pad(flat_imgs, remainder), 4417 lambda: flat_imgs) 4418 # pylint: enable=cell-var-from-loop 4419 4420 downscaled = [ 4421 nn_ops.avg_pool( 4422 x, ksize=divisor, strides=divisor, padding='VALID') 4423 for x in padded 4424 ] 4425 tails = [x[1:] for x in array_ops.shape_n(downscaled)] 4426 imgs = [ 4427 array_ops.reshape(x, array_ops.concat([h, t], 0)) 4428 for x, h, t in zip(downscaled, heads, tails) 4429 ] 4430 4431 # Overwrite previous ssim value since we only need the last one. 4432 ssim_per_channel, cs = _ssim_per_channel( 4433 *imgs, 4434 max_val=max_val, 4435 filter_size=filter_size, 4436 filter_sigma=filter_sigma, 4437 k1=k1, 4438 k2=k2) 4439 mcs.append(nn_ops.relu(cs)) 4440 4441 # Remove the cs score for the last scale. In the MS-SSIM calculation, 4442 # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p). 4443 mcs.pop() # Remove the cs score for the last scale. 4444 mcs_and_ssim = array_ops.stack( 4445 mcs + [nn_ops.relu(ssim_per_channel)], axis=-1) 4446 # Take weighted geometric mean across the scale axis. 4447 ms_ssim = math_ops.reduce_prod( 4448 math_ops.pow(mcs_and_ssim, power_factors), [-1]) 4449 4450 return math_ops.reduce_mean(ms_ssim, [-1]) # Avg over color channels. 4451 4452 4453@tf_export('image.image_gradients') 4454@dispatch.add_dispatch_support 4455def image_gradients(image): 4456 """Returns image gradients (dy, dx) for each color channel. 4457 4458 Both output tensors have the same shape as the input: [batch_size, h, w, 4459 d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in 4460 location (x, y). That means that dy will always have zeros in the last row, 4461 and dx will always have zeros in the last column. 4462 4463 Usage Example: 4464 ```python 4465 BATCH_SIZE = 1 4466 IMAGE_HEIGHT = 5 4467 IMAGE_WIDTH = 5 4468 CHANNELS = 1 4469 image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS, 4470 delta=1, dtype=tf.float32), 4471 shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS)) 4472 dy, dx = tf.image.image_gradients(image) 4473 print(image[0, :,:,0]) 4474 tf.Tensor( 4475 [[ 0. 1. 2. 3. 4.] 4476 [ 5. 6. 7. 8. 9.] 4477 [10. 11. 12. 13. 14.] 4478 [15. 16. 17. 18. 19.] 4479 [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32) 4480 print(dy[0, :,:,0]) 4481 tf.Tensor( 4482 [[5. 5. 5. 5. 5.] 4483 [5. 5. 5. 5. 5.] 4484 [5. 5. 5. 5. 5.] 4485 [5. 5. 5. 5. 5.] 4486 [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32) 4487 print(dx[0, :,:,0]) 4488 tf.Tensor( 4489 [[1. 1. 1. 1. 0.] 4490 [1. 1. 1. 1. 0.] 4491 [1. 1. 1. 1. 0.] 4492 [1. 1. 1. 1. 0.] 4493 [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32) 4494 ``` 4495 4496 Args: 4497 image: Tensor with shape [batch_size, h, w, d]. 4498 4499 Returns: 4500 Pair of tensors (dy, dx) holding the vertical and horizontal image 4501 gradients (1-step finite difference). 4502 4503 Raises: 4504 ValueError: If `image` is not a 4D tensor. 4505 """ 4506 if image.get_shape().ndims != 4: 4507 raise ValueError('image_gradients expects a 4D tensor ' 4508 '[batch_size, h, w, d], not {}.'.format(image.get_shape())) 4509 image_shape = array_ops.shape(image) 4510 batch_size, height, width, depth = array_ops.unstack(image_shape) 4511 dy = image[:, 1:, :, :] - image[:, :-1, :, :] 4512 dx = image[:, :, 1:, :] - image[:, :, :-1, :] 4513 4514 # Return tensors with same size as original image by concatenating 4515 # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y). 4516 shape = array_ops.stack([batch_size, 1, width, depth]) 4517 dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1) 4518 dy = array_ops.reshape(dy, image_shape) 4519 4520 shape = array_ops.stack([batch_size, height, 1, depth]) 4521 dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2) 4522 dx = array_ops.reshape(dx, image_shape) 4523 4524 return dy, dx 4525 4526 4527@tf_export('image.sobel_edges') 4528@dispatch.add_dispatch_support 4529def sobel_edges(image): 4530 """Returns a tensor holding Sobel edge maps. 4531 4532 Example usage: 4533 4534 For general usage, `image` would be loaded from a file as below: 4535 4536 ```python 4537 image_bytes = tf.io.read_file(path_to_image_file) 4538 image = tf.image.decode_image(image_bytes) 4539 image = tf.cast(image, tf.float32) 4540 image = tf.expand_dims(image, 0) 4541 ``` 4542 But for demo purposes, we are using randomly generated values for `image`: 4543 4544 >>> image = tf.random.uniform( 4545 ... maxval=255, shape=[1, 28, 28, 3], dtype=tf.float32) 4546 >>> sobel = tf.image.sobel_edges(image) 4547 >>> sobel_y = np.asarray(sobel[0, :, :, :, 0]) # sobel in y-direction 4548 >>> sobel_x = np.asarray(sobel[0, :, :, :, 1]) # sobel in x-direction 4549 4550 For displaying the sobel results, PIL's [Image Module]( 4551 https://pillow.readthedocs.io/en/stable/reference/Image.html) can be used: 4552 4553 ```python 4554 # Display edge maps for the first channel (at index 0) 4555 Image.fromarray(sobel_y[..., 0] / 4 + 0.5).show() 4556 Image.fromarray(sobel_x[..., 0] / 4 + 0.5).show() 4557 ``` 4558 4559 Args: 4560 image: Image tensor with shape [batch_size, h, w, d] and type float32 or 4561 float64. The image(s) must be 2x2 or larger. 4562 4563 Returns: 4564 Tensor holding edge maps for each channel. Returns a tensor with shape 4565 [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]], 4566 [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter. 4567 """ 4568 # Define vertical and horizontal Sobel filters. 4569 static_image_shape = image.get_shape() 4570 image_shape = array_ops.shape(image) 4571 kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]], 4572 [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]] 4573 num_kernels = len(kernels) 4574 kernels = np.transpose(np.asarray(kernels), (1, 2, 0)) 4575 kernels = np.expand_dims(kernels, -2) 4576 kernels_tf = constant_op.constant(kernels, dtype=image.dtype) 4577 4578 kernels_tf = array_ops.tile( 4579 kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters') 4580 4581 # Use depth-wise convolution to calculate edge maps per channel. 4582 pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]] 4583 padded = array_ops.pad(image, pad_sizes, mode='REFLECT') 4584 4585 # Output tensor has shape [batch_size, h, w, d * num_kernels]. 4586 strides = [1, 1, 1, 1] 4587 output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID') 4588 4589 # Reshape to [batch_size, h, w, d, num_kernels]. 4590 shape = array_ops.concat([image_shape, [num_kernels]], 0) 4591 output = array_ops.reshape(output, shape=shape) 4592 output.set_shape(static_image_shape.concatenate([num_kernels])) 4593 return output 4594 4595 4596def resize_bicubic(images, 4597 size, 4598 align_corners=False, 4599 name=None, 4600 half_pixel_centers=False): 4601 return gen_image_ops.resize_bicubic( 4602 images=images, 4603 size=size, 4604 align_corners=align_corners, 4605 half_pixel_centers=half_pixel_centers, 4606 name=name) 4607 4608 4609def resize_bilinear(images, 4610 size, 4611 align_corners=False, 4612 name=None, 4613 half_pixel_centers=False): 4614 return gen_image_ops.resize_bilinear( 4615 images=images, 4616 size=size, 4617 align_corners=align_corners, 4618 half_pixel_centers=half_pixel_centers, 4619 name=name) 4620 4621 4622def resize_nearest_neighbor(images, 4623 size, 4624 align_corners=False, 4625 name=None, 4626 half_pixel_centers=False): 4627 return gen_image_ops.resize_nearest_neighbor( 4628 images=images, 4629 size=size, 4630 align_corners=align_corners, 4631 half_pixel_centers=half_pixel_centers, 4632 name=name) 4633 4634 4635resize_area_deprecation = deprecation.deprecated( 4636 date=None, 4637 instructions=( 4638 'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.')) 4639tf_export(v1=['image.resize_area'])( 4640 resize_area_deprecation( 4641 dispatch.add_dispatch_support(gen_image_ops.resize_area))) 4642 4643resize_bicubic_deprecation = deprecation.deprecated( 4644 date=None, 4645 instructions=( 4646 'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.')) 4647tf_export(v1=['image.resize_bicubic'])( 4648 dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic))) 4649 4650resize_bilinear_deprecation = deprecation.deprecated( 4651 date=None, 4652 instructions=( 4653 'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.')) 4654tf_export(v1=['image.resize_bilinear'])( 4655 dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear))) 4656 4657resize_nearest_neighbor_deprecation = deprecation.deprecated( 4658 date=None, 4659 instructions=( 4660 'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` ' 4661 'instead.')) 4662tf_export(v1=['image.resize_nearest_neighbor'])( 4663 dispatch.add_dispatch_support( 4664 resize_nearest_neighbor_deprecation(resize_nearest_neighbor))) 4665 4666 4667@tf_export('image.crop_and_resize', v1=[]) 4668@dispatch.add_dispatch_support 4669def crop_and_resize_v2(image, 4670 boxes, 4671 box_indices, 4672 crop_size, 4673 method='bilinear', 4674 extrapolation_value=0, 4675 name=None): 4676 """Extracts crops from the input image tensor and resizes them. 4677 4678 Extracts crops from the input image tensor and resizes them using bilinear 4679 sampling or nearest neighbor sampling (possibly with aspect ratio change) to a 4680 common output size specified by `crop_size`. This is more general than the 4681 `crop_to_bounding_box` op which extracts a fixed size slice from the input 4682 image and does not allow resizing or aspect ratio change. 4683 4684 Returns a tensor with `crops` from the input `image` at positions defined at 4685 the bounding box locations in `boxes`. The cropped boxes are all resized (with 4686 bilinear or nearest neighbor interpolation) to a fixed 4687 `size = [crop_height, crop_width]`. The result is a 4-D tensor 4688 `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned. 4689 In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical 4690 results to using `tf.compat.v1.image.resize_bilinear()` or 4691 `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method` 4692 argument) with 4693 `align_corners=True`. 4694 4695 Args: 4696 image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. 4697 Both `image_height` and `image_width` need to be positive. 4698 boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor 4699 specifies the coordinates of a box in the `box_ind[i]` image and is 4700 specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized 4701 coordinate value of `y` is mapped to the image coordinate at `y * 4702 (image_height - 1)`, so as the `[0, 1]` interval of normalized image 4703 height is mapped to `[0, image_height - 1]` in image height coordinates. 4704 We do allow `y1` > `y2`, in which case the sampled crop is an up-down 4705 flipped version of the original image. The width dimension is treated 4706 similarly. Normalized coordinates outside the `[0, 1]` range are allowed, 4707 in which case we use `extrapolation_value` to extrapolate the input image 4708 values. 4709 box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, 4710 batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box 4711 refers to. 4712 crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. 4713 All cropped image patches are resized to this size. The aspect ratio of 4714 the image content is not preserved. Both `crop_height` and `crop_width` 4715 need to be positive. 4716 method: An optional string specifying the sampling method for resizing. It 4717 can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`. 4718 Currently two sampling methods are supported: Bilinear and Nearest 4719 Neighbor. 4720 extrapolation_value: An optional `float`. Defaults to `0`. Value used for 4721 extrapolation, when applicable. 4722 name: A name for the operation (optional). 4723 4724 Returns: 4725 A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. 4726 4727 Example: 4728 4729 ```python 4730 import tensorflow as tf 4731 BATCH_SIZE = 1 4732 NUM_BOXES = 5 4733 IMAGE_HEIGHT = 256 4734 IMAGE_WIDTH = 256 4735 CHANNELS = 3 4736 CROP_SIZE = (24, 24) 4737 4738 image = tf.random.normal(shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 4739 CHANNELS) ) 4740 boxes = tf.random.uniform(shape=(NUM_BOXES, 4)) 4741 box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0, 4742 maxval=BATCH_SIZE, dtype=tf.int32) 4743 output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE) 4744 output.shape #=> (5, 24, 24, 3) 4745 ``` 4746 """ 4747 return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size, 4748 method, extrapolation_value, name) 4749 4750 4751@tf_export(v1=['image.crop_and_resize']) 4752@dispatch.add_dispatch_support 4753@deprecation.deprecated_args(None, 4754 'box_ind is deprecated, use box_indices instead', 4755 'box_ind') 4756def crop_and_resize_v1( # pylint: disable=missing-docstring 4757 image, 4758 boxes, 4759 box_ind=None, 4760 crop_size=None, 4761 method='bilinear', 4762 extrapolation_value=0, 4763 name=None, 4764 box_indices=None): 4765 box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices, 4766 'box_ind', box_ind) 4767 return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method, 4768 extrapolation_value, name) 4769 4770 4771crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__ 4772 4773 4774@tf_export(v1=['image.extract_glimpse']) 4775@dispatch.add_dispatch_support 4776def extract_glimpse( 4777 input, # pylint: disable=redefined-builtin 4778 size, 4779 offsets, 4780 centered=True, 4781 normalized=True, 4782 uniform_noise=True, 4783 name=None): 4784 """Extracts a glimpse from the input tensor. 4785 4786 Returns a set of windows called glimpses extracted at location 4787 `offsets` from the input tensor. If the windows only partially 4788 overlaps the inputs, the non-overlapping areas will be filled with 4789 random noise. 4790 4791 The result is a 4-D tensor of shape `[batch_size, glimpse_height, 4792 glimpse_width, channels]`. The channels and batch dimensions are the 4793 same as that of the input tensor. The height and width of the output 4794 windows are specified in the `size` parameter. 4795 4796 The argument `normalized` and `centered` controls how the windows are built: 4797 4798 * If the coordinates are normalized but not centered, 0.0 and 1.0 4799 correspond to the minimum and maximum of each height and width 4800 dimension. 4801 * If the coordinates are both normalized and centered, they range from 4802 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper 4803 left corner, the lower right corner is located at (1.0, 1.0) and the 4804 center is at (0, 0). 4805 * If the coordinates are not normalized they are interpreted as 4806 numbers of pixels. 4807 4808 Usage Example: 4809 4810 >>> x = [[[[0.0], 4811 ... [1.0], 4812 ... [2.0]], 4813 ... [[3.0], 4814 ... [4.0], 4815 ... [5.0]], 4816 ... [[6.0], 4817 ... [7.0], 4818 ... [8.0]]]] 4819 >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], 4820 ... centered=False, normalized=False) 4821 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= 4822 array([[[[0.], 4823 [1.]], 4824 [[3.], 4825 [4.]]]], dtype=float32)> 4826 4827 Args: 4828 input: A `Tensor` of type `float32`. A 4-D float tensor of shape 4829 `[batch_size, height, width, channels]`. 4830 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the 4831 size of the glimpses to extract. The glimpse height must be specified 4832 first, following by the glimpse width. 4833 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape 4834 `[batch_size, 2]` containing the y, x locations of the center of each 4835 window. 4836 centered: An optional `bool`. Defaults to `True`. indicates if the offset 4837 coordinates are centered relative to the image, in which case the (0, 0) 4838 offset is relative to the center of the input images. If false, the (0,0) 4839 offset corresponds to the upper left corner of the input images. 4840 normalized: An optional `bool`. Defaults to `True`. indicates if the offset 4841 coordinates are normalized. 4842 uniform_noise: An optional `bool`. Defaults to `True`. indicates if the 4843 noise should be generated using a uniform distribution or a Gaussian 4844 distribution. 4845 name: A name for the operation (optional). 4846 4847 Returns: 4848 A `Tensor` of type `float32`. 4849 """ 4850 return gen_image_ops.extract_glimpse( 4851 input=input, 4852 size=size, 4853 offsets=offsets, 4854 centered=centered, 4855 normalized=normalized, 4856 uniform_noise=uniform_noise, 4857 name=name) 4858 4859 4860@tf_export('image.extract_glimpse', v1=[]) 4861@dispatch.add_dispatch_support 4862def extract_glimpse_v2( 4863 input, # pylint: disable=redefined-builtin 4864 size, 4865 offsets, 4866 centered=True, 4867 normalized=True, 4868 noise='uniform', 4869 name=None): 4870 """Extracts a glimpse from the input tensor. 4871 4872 Returns a set of windows called glimpses extracted at location 4873 `offsets` from the input tensor. If the windows only partially 4874 overlaps the inputs, the non-overlapping areas will be filled with 4875 random noise. 4876 4877 The result is a 4-D tensor of shape `[batch_size, glimpse_height, 4878 glimpse_width, channels]`. The channels and batch dimensions are the 4879 same as that of the input tensor. The height and width of the output 4880 windows are specified in the `size` parameter. 4881 4882 The argument `normalized` and `centered` controls how the windows are built: 4883 4884 * If the coordinates are normalized but not centered, 0.0 and 1.0 4885 correspond to the minimum and maximum of each height and width 4886 dimension. 4887 * If the coordinates are both normalized and centered, they range from 4888 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper 4889 left corner, the lower right corner is located at (1.0, 1.0) and the 4890 center is at (0, 0). 4891 * If the coordinates are not normalized they are interpreted as 4892 numbers of pixels. 4893 4894 Usage Example: 4895 4896 >>> x = [[[[0.0], 4897 ... [1.0], 4898 ... [2.0]], 4899 ... [[3.0], 4900 ... [4.0], 4901 ... [5.0]], 4902 ... [[6.0], 4903 ... [7.0], 4904 ... [8.0]]]] 4905 >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], 4906 ... centered=False, normalized=False) 4907 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= 4908 array([[[[4.], 4909 [5.]], 4910 [[7.], 4911 [8.]]]], dtype=float32)> 4912 4913 Args: 4914 input: A `Tensor` of type `float32`. A 4-D float tensor of shape 4915 `[batch_size, height, width, channels]`. 4916 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the 4917 size of the glimpses to extract. The glimpse height must be specified 4918 first, following by the glimpse width. 4919 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape 4920 `[batch_size, 2]` containing the y, x locations of the center of each 4921 window. 4922 centered: An optional `bool`. Defaults to `True`. indicates if the offset 4923 coordinates are centered relative to the image, in which case the (0, 0) 4924 offset is relative to the center of the input images. If false, the (0,0) 4925 offset corresponds to the upper left corner of the input images. 4926 normalized: An optional `bool`. Defaults to `True`. indicates if the offset 4927 coordinates are normalized. 4928 noise: An optional `string`. Defaults to `uniform`. indicates if the noise 4929 should be `uniform` (uniform distribution), `gaussian` (gaussian 4930 distribution), or `zero` (zero padding). 4931 name: A name for the operation (optional). 4932 4933 Returns: 4934 A `Tensor` of type `float32`. 4935 """ 4936 return gen_image_ops.extract_glimpse_v2( 4937 input=input, 4938 size=size, 4939 offsets=offsets, 4940 centered=centered, 4941 normalized=normalized, 4942 noise=noise, 4943 uniform_noise=False, 4944 name=name) 4945 4946 4947@tf_export('image.combined_non_max_suppression') 4948@dispatch.add_dispatch_support 4949def combined_non_max_suppression(boxes, 4950 scores, 4951 max_output_size_per_class, 4952 max_total_size, 4953 iou_threshold=0.5, 4954 score_threshold=float('-inf'), 4955 pad_per_class=False, 4956 clip_boxes=True, 4957 name=None): 4958 """Greedily selects a subset of bounding boxes in descending order of score. 4959 4960 This operation performs non_max_suppression on the inputs per batch, across 4961 all classes. 4962 Prunes away boxes that have high intersection-over-union (IOU) overlap 4963 with previously selected boxes. Bounding boxes are supplied as 4964 [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any 4965 diagonal pair of box corners and the coordinates can be provided as normalized 4966 (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm 4967 is agnostic to where the origin is in the coordinate system. Also note that 4968 this algorithm is invariant to orthogonal transformations and translations 4969 of the coordinate system; thus translating or reflections of the coordinate 4970 system result in the same boxes being selected by the algorithm. 4971 The output of this operation is the final boxes, scores and classes tensor 4972 returned after performing non_max_suppression. 4973 4974 Args: 4975 boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q` 4976 is 1 then same boxes are used for all classes otherwise, if `q` is equal 4977 to number of classes, class-specific boxes are used. 4978 scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]` 4979 representing a single score corresponding to each box (each row of boxes). 4980 max_output_size_per_class: A scalar integer `Tensor` representing the 4981 maximum number of boxes to be selected by non-max suppression per class 4982 max_total_size: A int32 scalar representing maximum number of boxes retained 4983 over all classes. Note that setting this value to a large number may 4984 result in OOM error depending on the system workload. 4985 iou_threshold: A float representing the threshold for deciding whether boxes 4986 overlap too much with respect to IOU. 4987 score_threshold: A float representing the threshold for deciding when to 4988 remove boxes based on score. 4989 pad_per_class: If false, the output nmsed boxes, scores and classes are 4990 padded/clipped to `max_total_size`. If true, the output nmsed boxes, 4991 scores and classes are padded to be of length 4992 `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in 4993 which case it is clipped to `max_total_size`. Defaults to false. 4994 clip_boxes: If true, the coordinates of output nmsed boxes will be clipped 4995 to [0, 1]. If false, output the box coordinates as it is. Defaults to 4996 true. 4997 name: A name for the operation (optional). 4998 4999 Returns: 5000 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor 5001 containing the non-max suppressed boxes. 5002 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing 5003 the scores for the boxes. 5004 'nmsed_classes': A [batch_size, max_detections] float32 tensor 5005 containing the class for boxes. 5006 'valid_detections': A [batch_size] int32 tensor indicating the number of 5007 valid detections per batch item. Only the top valid_detections[i] entries 5008 in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the 5009 entries are zero paddings. 5010 """ 5011 with ops.name_scope(name, 'combined_non_max_suppression'): 5012 iou_threshold = ops.convert_to_tensor( 5013 iou_threshold, dtype=dtypes.float32, name='iou_threshold') 5014 score_threshold = ops.convert_to_tensor( 5015 score_threshold, dtype=dtypes.float32, name='score_threshold') 5016 5017 # Convert `max_total_size` to tensor *without* setting the `dtype` param. 5018 # This allows us to catch `int32` overflow case with `max_total_size` 5019 # whose expected dtype is `int32` by the op registration. Any number within 5020 # `int32` will get converted to `int32` tensor. Anything larger will get 5021 # converted to `int64`. Passing in `int64` for `max_total_size` to the op 5022 # will throw dtype mismatch exception. 5023 # TODO(b/173251596): Once there is a more general solution to warn against 5024 # int overflow conversions, revisit this check. 5025 max_total_size = ops.convert_to_tensor(max_total_size) 5026 5027 return gen_image_ops.combined_non_max_suppression( 5028 boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, 5029 score_threshold, pad_per_class, clip_boxes) 5030 5031 5032def _bbox_overlap(boxes_a, boxes_b): 5033 """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b. 5034 5035 Args: 5036 boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of 5037 boxes per image. The last dimension is the pixel coordinates in 5038 [ymin, xmin, ymax, xmax] form. 5039 boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of 5040 boxes. The last dimension is the pixel coordinates in 5041 [ymin, xmin, ymax, xmax] form. 5042 Returns: 5043 intersection_over_union: a tensor with as a shape of [batch_size, N, M], 5044 representing the ratio of intersection area over union area (IoU) between 5045 two boxes 5046 """ 5047 with ops.name_scope('bbox_overlap'): 5048 a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split( 5049 value=boxes_a, num_or_size_splits=4, axis=2) 5050 b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split( 5051 value=boxes_b, num_or_size_splits=4, axis=2) 5052 5053 # Calculates the intersection area. 5054 i_xmin = math_ops.maximum( 5055 a_x_min, array_ops.transpose(b_x_min, [0, 2, 1])) 5056 i_xmax = math_ops.minimum( 5057 a_x_max, array_ops.transpose(b_x_max, [0, 2, 1])) 5058 i_ymin = math_ops.maximum( 5059 a_y_min, array_ops.transpose(b_y_min, [0, 2, 1])) 5060 i_ymax = math_ops.minimum( 5061 a_y_max, array_ops.transpose(b_y_max, [0, 2, 1])) 5062 i_area = math_ops.maximum( 5063 (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0) 5064 5065 # Calculates the union area. 5066 a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min) 5067 b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min) 5068 EPSILON = 1e-8 5069 # Adds a small epsilon to avoid divide-by-zero. 5070 u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON 5071 5072 # Calculates IoU. 5073 intersection_over_union = i_area / u_area 5074 5075 return intersection_over_union 5076 5077 5078def _self_suppression(iou, _, iou_sum, iou_threshold): 5079 """Suppress boxes in the same tile. 5080 5081 Compute boxes that cannot be suppressed by others (i.e., 5082 can_suppress_others), and then use them to suppress boxes in the same tile. 5083 5084 Args: 5085 iou: a tensor of shape [batch_size, num_boxes_with_padding] representing 5086 intersection over union. 5087 iou_sum: a scalar tensor. 5088 iou_threshold: a scalar tensor. 5089 5090 Returns: 5091 iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding]. 5092 iou_diff: a scalar tensor representing whether any box is supressed in 5093 this step. 5094 iou_sum_new: a scalar tensor of shape [batch_size] that represents 5095 the iou sum after suppression. 5096 iou_threshold: a scalar tensor. 5097 """ 5098 batch_size = array_ops.shape(iou)[0] 5099 can_suppress_others = math_ops.cast( 5100 array_ops.reshape( 5101 math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]), 5102 iou.dtype) 5103 iou_after_suppression = array_ops.reshape( 5104 math_ops.cast( 5105 math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold, 5106 iou.dtype), 5107 [batch_size, -1, 1]) * iou 5108 iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2]) 5109 return [ 5110 iou_after_suppression, 5111 math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new, 5112 iou_threshold 5113 ] 5114 5115 5116def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size): 5117 """Suppress boxes between different tiles. 5118 5119 Args: 5120 boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4] 5121 box_slice: a tensor of shape [batch_size, tile_size, 4] 5122 iou_threshold: a scalar tensor 5123 inner_idx: a scalar tensor representing the tile index of the tile 5124 that is used to supress box_slice 5125 tile_size: an integer representing the number of boxes in a tile 5126 5127 Returns: 5128 boxes: unchanged boxes as input 5129 box_slice_after_suppression: box_slice after suppression 5130 iou_threshold: unchanged 5131 """ 5132 batch_size = array_ops.shape(boxes)[0] 5133 new_slice = array_ops.slice( 5134 boxes, [0, inner_idx * tile_size, 0], 5135 [batch_size, tile_size, 4]) 5136 iou = _bbox_overlap(new_slice, box_slice) 5137 box_slice_after_suppression = array_ops.expand_dims( 5138 math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]), 5139 box_slice.dtype), 5140 2) * box_slice 5141 return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1 5142 5143 5144def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size): 5145 """Process boxes in the range [idx*tile_size, (idx+1)*tile_size). 5146 5147 Args: 5148 boxes: a tensor with a shape of [batch_size, anchors, 4]. 5149 iou_threshold: a float representing the threshold for deciding whether boxes 5150 overlap too much with respect to IOU. 5151 output_size: an int32 tensor of size [batch_size]. Representing the number 5152 of selected boxes for each batch. 5153 idx: an integer scalar representing induction variable. 5154 tile_size: an integer representing the number of boxes in a tile 5155 5156 Returns: 5157 boxes: updated boxes. 5158 iou_threshold: pass down iou_threshold to the next iteration. 5159 output_size: the updated output_size. 5160 idx: the updated induction variable. 5161 """ 5162 with ops.name_scope('suppression_loop_body'): 5163 num_tiles = array_ops.shape(boxes)[1] // tile_size 5164 batch_size = array_ops.shape(boxes)[0] 5165 5166 def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx): 5167 return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, 5168 tile_size) 5169 5170 # Iterates over tiles that can possibly suppress the current tile. 5171 box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0], 5172 [batch_size, tile_size, 4]) 5173 _, box_slice, _, _ = control_flow_ops.while_loop( 5174 lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, 5175 cross_suppression_func, 5176 [boxes, box_slice, iou_threshold, constant_op.constant(0)]) 5177 5178 # Iterates over the current tile to compute self-suppression. 5179 iou = _bbox_overlap(box_slice, box_slice) 5180 mask = array_ops.expand_dims( 5181 array_ops.reshape( 5182 math_ops.range(tile_size), [1, -1]) > array_ops.reshape( 5183 math_ops.range(tile_size), [-1, 1]), 0) 5184 iou *= math_ops.cast( 5185 math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype) 5186 suppressed_iou, _, _, _ = control_flow_ops.while_loop( 5187 lambda _iou, loop_condition, _iou_sum, _: loop_condition, 5188 _self_suppression, 5189 [iou, constant_op.constant(True), math_ops.reduce_sum(iou, [1, 2]), 5190 iou_threshold]) 5191 suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0 5192 box_slice *= array_ops.expand_dims( 5193 1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2) 5194 5195 # Uses box_slice to update the input boxes. 5196 mask = array_ops.reshape( 5197 math_ops.cast( 5198 math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype), 5199 [1, -1, 1, 1]) 5200 boxes = array_ops.tile(array_ops.expand_dims( 5201 box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape( 5202 boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask) 5203 boxes = array_ops.reshape(boxes, [batch_size, -1, 4]) 5204 5205 # Updates output_size. 5206 output_size += math_ops.reduce_sum( 5207 math_ops.cast( 5208 math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1]) 5209 return boxes, iou_threshold, output_size, idx + 1 5210 5211 5212@tf_export('image.non_max_suppression_padded') 5213@dispatch.add_dispatch_support 5214def non_max_suppression_padded(boxes, 5215 scores, 5216 max_output_size, 5217 iou_threshold=0.5, 5218 score_threshold=float('-inf'), 5219 pad_to_max_output_size=False, 5220 name=None, 5221 sorted_input=False, 5222 canonicalized_coordinates=False, 5223 tile_size=512): 5224 """Greedily selects a subset of bounding boxes in descending order of score. 5225 5226 Performs algorithmically equivalent operation to tf.image.non_max_suppression, 5227 with the addition of an optional parameter which zero-pads the output to 5228 be of size `max_output_size`. 5229 The output of this operation is a tuple containing the set of integers 5230 indexing into the input collection of bounding boxes representing the selected 5231 boxes and the number of valid indices in the index set. The bounding box 5232 coordinates corresponding to the selected indices can then be obtained using 5233 the `tf.slice` and `tf.gather` operations. For example: 5234 ```python 5235 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( 5236 boxes, scores, max_output_size, iou_threshold, 5237 score_threshold, pad_to_max_output_size=True) 5238 selected_indices = tf.slice( 5239 selected_indices_padded, tf.constant([0]), num_valid) 5240 selected_boxes = tf.gather(boxes, selected_indices) 5241 ``` 5242 5243 Args: 5244 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. 5245 Dimensions except the last two are batch dimensions. 5246 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. 5247 max_output_size: a scalar integer `Tensor` representing the maximum number 5248 of boxes to be selected by non max suppression. Note that setting this 5249 value to a large number may result in OOM error depending on the system 5250 workload. 5251 iou_threshold: a float representing the threshold for deciding whether boxes 5252 overlap too much with respect to IoU (intersection over union). 5253 score_threshold: a float representing the threshold for box scores. Boxes 5254 with a score that is not larger than this threshold will be suppressed. 5255 pad_to_max_output_size: whether to pad the output idx to max_output_size. 5256 Must be set to True when the input is a batch of images. 5257 name: name of operation. 5258 sorted_input: a boolean indicating whether the input boxes and scores 5259 are sorted in descending order by the score. 5260 canonicalized_coordinates: if box coordinates are given as 5261 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant 5262 computation to canonicalize box coordinates. 5263 tile_size: an integer representing the number of boxes in a tile, i.e., 5264 the maximum number of boxes per image that can be used to suppress other 5265 boxes in parallel; larger tile_size means larger parallelism and 5266 potentially more redundant work. 5267 Returns: 5268 idx: a tensor with a shape of [..., num_boxes] representing the 5269 indices selected by non-max suppression. The leading dimensions 5270 are the batch dimensions of the input boxes. All numbers are within 5271 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] 5272 indices (i.e., idx[i][:num_valid[i]]) are valid. 5273 num_valid: a tensor of rank 0 or higher with a shape of [...] 5274 representing the number of valid indices in idx. Its dimensions are the 5275 batch dimensions of the input boxes. 5276 Raises: 5277 ValueError: When set pad_to_max_output_size to False for batched input. 5278 """ 5279 with ops.name_scope(name, 'non_max_suppression_padded'): 5280 if not pad_to_max_output_size: 5281 # pad_to_max_output_size may be set to False only when the shape of 5282 # boxes is [num_boxes, 4], i.e., a single image. We make best effort to 5283 # detect violations at compile time. If `boxes` does not have a static 5284 # rank, the check allows computation to proceed. 5285 if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2: 5286 raise ValueError("'pad_to_max_output_size' (value {}) must be True for " 5287 'batched input'.format(pad_to_max_output_size)) 5288 if name is None: 5289 name = '' 5290 idx, num_valid = non_max_suppression_padded_v2( 5291 boxes, scores, max_output_size, iou_threshold, score_threshold, 5292 sorted_input, canonicalized_coordinates, tile_size) 5293 # def_function.function seems to lose shape information, so set it here. 5294 if not pad_to_max_output_size: 5295 idx = idx[0, :num_valid] 5296 else: 5297 batch_dims = array_ops.concat([ 5298 array_ops.shape(boxes)[:-2], 5299 array_ops.expand_dims(max_output_size, 0) 5300 ], 0) 5301 idx = array_ops.reshape(idx, batch_dims) 5302 return idx, num_valid 5303 5304 5305# TODO(b/158709815): Improve performance regression due to 5306# def_function.function. 5307@def_function.function( 5308 experimental_implements='non_max_suppression_padded_v2') 5309def non_max_suppression_padded_v2(boxes, 5310 scores, 5311 max_output_size, 5312 iou_threshold=0.5, 5313 score_threshold=float('-inf'), 5314 sorted_input=False, 5315 canonicalized_coordinates=False, 5316 tile_size=512): 5317 """Non-maximum suppression. 5318 5319 Prunes away boxes that have high intersection-over-union (IOU) overlap 5320 with previously selected boxes. Bounding boxes are supplied as 5321 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 5322 diagonal pair of box corners and the coordinates can be provided as normalized 5323 (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box 5324 coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`, 5325 where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower 5326 left and upper right corner. User may indiciate the input box coordinates are 5327 already canonicalized to eliminate redundant work by setting 5328 canonicalized_coordinates to `True`. Note that this algorithm is agnostic to 5329 where the origin is in the coordinate system. Note that this algorithm is 5330 invariant to orthogonal transformations and translations of the coordinate 5331 system; thus translating or reflections of the coordinate system result in the 5332 same boxes being selected by the algorithm. 5333 5334 Similar to tf.image.non_max_suppression, non_max_suppression_padded 5335 implements hard NMS but can operate on a batch of images and improves 5336 performance by titling the bounding boxes. Non_max_suppression_padded should 5337 be preferred over tf.image_non_max_suppression when running on devices with 5338 abundant parallelsim for higher computation speed. For soft NMS, refer to 5339 tf.image.non_max_suppression_with_scores. 5340 5341 While a serial NMS algorithm iteratively uses the highest-scored unprocessed 5342 box to suppress boxes, this algorithm uses many boxes to suppress other boxes 5343 in parallel. The key idea is to partition boxes into tiles based on their 5344 score and suppresses boxes tile by tile, thus achieving parallelism within a 5345 tile. The tile size determines the degree of parallelism. 5346 5347 In cross suppression (using boxes of tile A to suppress boxes of tile B), 5348 all boxes in A can independently suppress boxes in B. 5349 5350 Self suppression (suppressing boxes of the same tile) needs to be iteratively 5351 applied until there's no more suppression. In each iteration, boxes that 5352 cannot be suppressed are used to suppress boxes in the same tile. 5353 5354 boxes = boxes.pad_to_multiply_of(tile_size) 5355 num_tiles = len(boxes) // tile_size 5356 output_boxes = [] 5357 for i in range(num_tiles): 5358 box_tile = boxes[i*tile_size : (i+1)*tile_size] 5359 for j in range(i - 1): 5360 # in parallel suppress boxes in box_tile using boxes from suppressing_tile 5361 suppressing_tile = boxes[j*tile_size : (j+1)*tile_size] 5362 iou = _bbox_overlap(box_tile, suppressing_tile) 5363 # if the box is suppressed in iou, clear it to a dot 5364 box_tile *= _update_boxes(iou) 5365 # Iteratively handle the diagnal tile. 5366 iou = _box_overlap(box_tile, box_tile) 5367 iou_changed = True 5368 while iou_changed: 5369 # boxes that are not suppressed by anything else 5370 suppressing_boxes = _get_suppressing_boxes(iou) 5371 # boxes that are suppressed by suppressing_boxes 5372 suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes) 5373 # clear iou to 0 for boxes that are suppressed, as they cannot be used 5374 # to suppress other boxes any more 5375 new_iou = _clear_iou(iou, suppressed_boxes) 5376 iou_changed = (new_iou != iou) 5377 iou = new_iou 5378 # remaining boxes that can still suppress others, are selected boxes. 5379 output_boxes.append(_get_suppressing_boxes(iou)) 5380 if len(output_boxes) >= max_output_size: 5381 break 5382 5383 Args: 5384 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. 5385 Dimensions except the last two are batch dimensions. The last dimension 5386 represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates 5387 on each dimension can be given in any order 5388 (see also `canonicalized_coordinates`) but must describe a box with 5389 a positive area. 5390 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. 5391 max_output_size: a scalar integer `Tensor` representing the maximum number 5392 of boxes to be selected by non max suppression. 5393 iou_threshold: a float representing the threshold for deciding whether boxes 5394 overlap too much with respect to IoU (intersection over union). 5395 score_threshold: a float representing the threshold for box scores. Boxes 5396 with a score that is not larger than this threshold will be suppressed. 5397 sorted_input: a boolean indicating whether the input boxes and scores 5398 are sorted in descending order by the score. 5399 canonicalized_coordinates: if box coordinates are given as 5400 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant 5401 computation to canonicalize box coordinates. 5402 tile_size: an integer representing the number of boxes in a tile, i.e., 5403 the maximum number of boxes per image that can be used to suppress other 5404 boxes in parallel; larger tile_size means larger parallelism and 5405 potentially more redundant work. 5406 Returns: 5407 idx: a tensor with a shape of [..., num_boxes] representing the 5408 indices selected by non-max suppression. The leading dimensions 5409 are the batch dimensions of the input boxes. All numbers are within 5410 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] 5411 indices (i.e., idx[i][:num_valid[i]]) are valid. 5412 num_valid: a tensor of rank 0 or higher with a shape of [...] 5413 representing the number of valid indices in idx. Its dimensions are the 5414 batch dimensions of the input boxes. 5415 Raises: 5416 ValueError: When set pad_to_max_output_size to False for batched input. 5417 """ 5418 def _sort_scores_and_boxes(scores, boxes): 5419 """Sort boxes based their score from highest to lowest. 5420 5421 Args: 5422 scores: a tensor with a shape of [batch_size, num_boxes] representing 5423 the scores of boxes. 5424 boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing 5425 the boxes. 5426 Returns: 5427 sorted_scores: a tensor with a shape of [batch_size, num_boxes] 5428 representing the sorted scores. 5429 sorted_boxes: a tensor representing the sorted boxes. 5430 sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes] 5431 representing the index of the scores in a sorted descending order. 5432 """ 5433 with ops.name_scope('sort_scores_and_boxes'): 5434 batch_size = array_ops.shape(boxes)[0] 5435 num_boxes = array_ops.shape(boxes)[1] 5436 sorted_scores_indices = sort_ops.argsort( 5437 scores, axis=1, direction='DESCENDING') 5438 index_offsets = math_ops.range(batch_size) * num_boxes 5439 indices = array_ops.reshape( 5440 sorted_scores_indices + array_ops.expand_dims(index_offsets, 1), [-1]) 5441 sorted_scores = array_ops.reshape( 5442 array_ops.gather(array_ops.reshape(scores, [-1]), indices), 5443 [batch_size, -1]) 5444 sorted_boxes = array_ops.reshape( 5445 array_ops.gather(array_ops.reshape(boxes, [-1, 4]), indices), 5446 [batch_size, -1, 4]) 5447 return sorted_scores, sorted_boxes, sorted_scores_indices 5448 5449 batch_dims = array_ops.shape(boxes)[:-2] 5450 num_boxes = array_ops.shape(boxes)[-2] 5451 boxes = array_ops.reshape(boxes, [-1, num_boxes, 4]) 5452 scores = array_ops.reshape(scores, [-1, num_boxes]) 5453 batch_size = array_ops.shape(boxes)[0] 5454 if score_threshold != float('-inf'): 5455 with ops.name_scope('filter_by_score'): 5456 score_mask = math_ops.cast(scores > score_threshold, scores.dtype) 5457 scores *= score_mask 5458 box_mask = array_ops.expand_dims( 5459 math_ops.cast(score_mask, boxes.dtype), 2) 5460 boxes *= box_mask 5461 5462 if not canonicalized_coordinates: 5463 with ops.name_scope('canonicalize_coordinates'): 5464 y_1, x_1, y_2, x_2 = array_ops.split( 5465 value=boxes, num_or_size_splits=4, axis=2) 5466 y_1_is_min = math_ops.reduce_all( 5467 math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0])) 5468 y_min, y_max = control_flow_ops.cond( 5469 y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1)) 5470 x_1_is_min = math_ops.reduce_all( 5471 math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0])) 5472 x_min, x_max = control_flow_ops.cond( 5473 x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1)) 5474 boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2) 5475 5476 if not sorted_input: 5477 scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes) 5478 else: 5479 # Default value required for Autograph. 5480 sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32) 5481 5482 pad = math_ops.cast( 5483 math_ops.ceil( 5484 math_ops.cast( 5485 math_ops.maximum(num_boxes, max_output_size), dtypes.float32) / 5486 math_ops.cast(tile_size, dtypes.float32)), 5487 dtypes.int32) * tile_size - num_boxes 5488 boxes = array_ops.pad( 5489 math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]]) 5490 scores = array_ops.pad( 5491 math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]]) 5492 num_boxes_after_padding = num_boxes + pad 5493 num_iterations = num_boxes_after_padding // tile_size 5494 def _loop_cond(unused_boxes, unused_threshold, output_size, idx): 5495 return math_ops.logical_and( 5496 math_ops.reduce_min(output_size) < max_output_size, 5497 idx < num_iterations) 5498 5499 def suppression_loop_body(boxes, iou_threshold, output_size, idx): 5500 return _suppression_loop_body( 5501 boxes, iou_threshold, output_size, idx, tile_size) 5502 5503 selected_boxes, _, output_size, _ = control_flow_ops.while_loop( 5504 _loop_cond, 5505 suppression_loop_body, 5506 [ 5507 boxes, iou_threshold, 5508 array_ops.zeros([batch_size], dtypes.int32), 5509 constant_op.constant(0) 5510 ], 5511 shape_invariants=[ 5512 tensor_shape.TensorShape([None, None, 4]), 5513 tensor_shape.TensorShape([]), 5514 tensor_shape.TensorShape([None]), 5515 tensor_shape.TensorShape([]), 5516 ], 5517 ) 5518 num_valid = math_ops.minimum(output_size, max_output_size) 5519 idx = num_boxes_after_padding - math_ops.cast( 5520 nn_ops.top_k( 5521 math_ops.cast(math_ops.reduce_any( 5522 selected_boxes > 0, [2]), dtypes.int32) * 5523 array_ops.expand_dims( 5524 math_ops.range(num_boxes_after_padding, 0, -1), 0), 5525 max_output_size)[0], dtypes.int32) 5526 idx = math_ops.minimum(idx, num_boxes - 1) 5527 5528 if not sorted_input: 5529 index_offsets = math_ops.range(batch_size) * num_boxes 5530 gather_idx = array_ops.reshape( 5531 idx + array_ops.expand_dims(index_offsets, 1), [-1]) 5532 idx = array_ops.reshape( 5533 array_ops.gather(array_ops.reshape(sorted_indices, [-1]), 5534 gather_idx), 5535 [batch_size, -1]) 5536 invalid_index = array_ops.zeros([batch_size, max_output_size], 5537 dtype=dtypes.int32) 5538 idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0) 5539 num_valid_expanded = array_ops.expand_dims(num_valid, 1) 5540 idx = array_ops.where(idx_index < num_valid_expanded, 5541 idx, invalid_index) 5542 5543 num_valid = array_ops.reshape(num_valid, batch_dims) 5544 return idx, num_valid 5545 5546 5547def non_max_suppression_padded_v1(boxes, 5548 scores, 5549 max_output_size, 5550 iou_threshold=0.5, 5551 score_threshold=float('-inf'), 5552 pad_to_max_output_size=False, 5553 name=None): 5554 """Greedily selects a subset of bounding boxes in descending order of score. 5555 5556 Performs algorithmically equivalent operation to tf.image.non_max_suppression, 5557 with the addition of an optional parameter which zero-pads the output to 5558 be of size `max_output_size`. 5559 The output of this operation is a tuple containing the set of integers 5560 indexing into the input collection of bounding boxes representing the selected 5561 boxes and the number of valid indices in the index set. The bounding box 5562 coordinates corresponding to the selected indices can then be obtained using 5563 the `tf.slice` and `tf.gather` operations. For example: 5564 ```python 5565 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( 5566 boxes, scores, max_output_size, iou_threshold, 5567 score_threshold, pad_to_max_output_size=True) 5568 selected_indices = tf.slice( 5569 selected_indices_padded, tf.constant([0]), num_valid) 5570 selected_boxes = tf.gather(boxes, selected_indices) 5571 ``` 5572 5573 Args: 5574 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 5575 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 5576 score corresponding to each box (each row of boxes). 5577 max_output_size: A scalar integer `Tensor` representing the maximum number 5578 of boxes to be selected by non-max suppression. 5579 iou_threshold: A float representing the threshold for deciding whether boxes 5580 overlap too much with respect to IOU. 5581 score_threshold: A float representing the threshold for deciding when to 5582 remove boxes based on score. 5583 pad_to_max_output_size: bool. If True, size of `selected_indices` output is 5584 padded to `max_output_size`. 5585 name: A name for the operation (optional). 5586 5587 Returns: 5588 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 5589 selected indices from the boxes tensor, where `M <= max_output_size`. 5590 valid_outputs: A scalar integer `Tensor` denoting how many elements in 5591 `selected_indices` are valid. Valid elements occur first, then padding. 5592 """ 5593 with ops.name_scope(name, 'non_max_suppression_padded'): 5594 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 5595 score_threshold = ops.convert_to_tensor( 5596 score_threshold, name='score_threshold') 5597 return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size, 5598 iou_threshold, score_threshold, 5599 pad_to_max_output_size) 5600 5601 5602@tf_export('image.draw_bounding_boxes', v1=[]) 5603@dispatch.add_dispatch_support 5604def draw_bounding_boxes_v2(images, boxes, colors, name=None): 5605 """Draw bounding boxes on a batch of images. 5606 5607 Outputs a copy of `images` but draws on top of the pixels zero or more 5608 bounding boxes specified by the locations in `boxes`. The coordinates of the 5609 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. 5610 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 5611 and the height of the underlying image. 5612 5613 For example, if an image is 100 x 200 pixels (height x width) and the bounding 5614 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of 5615 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). 5616 5617 Parts of the bounding box may fall outside the image. 5618 5619 Args: 5620 images: A `Tensor`. Must be one of the following types: `float32`, `half`. 5621 4-D with shape `[batch, height, width, depth]`. A batch of images. 5622 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, 5623 num_bounding_boxes, 4]` containing bounding boxes. 5624 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle 5625 through for the boxes. 5626 name: A name for the operation (optional). 5627 5628 Returns: 5629 A `Tensor`. Has the same type as `images`. 5630 5631 Usage Example: 5632 5633 >>> # create an empty image 5634 >>> img = tf.zeros([1, 3, 3, 3]) 5635 >>> # draw a box around the image 5636 >>> box = np.array([0, 0, 1, 1]) 5637 >>> boxes = box.reshape([1, 1, 4]) 5638 >>> # alternate between red and blue 5639 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 5640 >>> tf.image.draw_bounding_boxes(img, boxes, colors) 5641 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= 5642 array([[[[1., 0., 0.], 5643 [1., 0., 0.], 5644 [1., 0., 0.]], 5645 [[1., 0., 0.], 5646 [0., 0., 0.], 5647 [1., 0., 0.]], 5648 [[1., 0., 0.], 5649 [1., 0., 0.], 5650 [1., 0., 0.]]]], dtype=float32)> 5651 """ 5652 if colors is None: 5653 return gen_image_ops.draw_bounding_boxes(images, boxes, name) 5654 return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name) 5655 5656 5657@tf_export(v1=['image.draw_bounding_boxes']) 5658@dispatch.add_dispatch_support 5659def draw_bounding_boxes(images, boxes, name=None, colors=None): 5660 """Draw bounding boxes on a batch of images. 5661 5662 Outputs a copy of `images` but draws on top of the pixels zero or more 5663 bounding boxes specified by the locations in `boxes`. The coordinates of the 5664 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. 5665 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 5666 and the height of the underlying image. 5667 5668 For example, if an image is 100 x 200 pixels (height x width) and the bounding 5669 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of 5670 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). 5671 5672 Parts of the bounding box may fall outside the image. 5673 5674 Args: 5675 images: A `Tensor`. Must be one of the following types: `float32`, `half`. 5676 4-D with shape `[batch, height, width, depth]`. A batch of images. 5677 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, 5678 num_bounding_boxes, 4]` containing bounding boxes. 5679 name: A name for the operation (optional). 5680 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle 5681 through for the boxes. 5682 5683 Returns: 5684 A `Tensor`. Has the same type as `images`. 5685 5686 Usage Example: 5687 5688 >>> # create an empty image 5689 >>> img = tf.zeros([1, 3, 3, 3]) 5690 >>> # draw a box around the image 5691 >>> box = np.array([0, 0, 1, 1]) 5692 >>> boxes = box.reshape([1, 1, 4]) 5693 >>> # alternate between red and blue 5694 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 5695 >>> tf.image.draw_bounding_boxes(img, boxes, colors) 5696 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= 5697 array([[[[1., 0., 0.], 5698 [1., 0., 0.], 5699 [1., 0., 0.]], 5700 [[1., 0., 0.], 5701 [0., 0., 0.], 5702 [1., 0., 0.]], 5703 [[1., 0., 0.], 5704 [1., 0., 0.], 5705 [1., 0., 0.]]]], dtype=float32)> 5706 """ 5707 return draw_bounding_boxes_v2(images, boxes, colors, name) 5708 5709 5710@tf_export('image.generate_bounding_box_proposals') 5711@dispatch.add_dispatch_support 5712def generate_bounding_box_proposals(scores, 5713 bbox_deltas, 5714 image_info, 5715 anchors, 5716 nms_threshold=0.7, 5717 pre_nms_topn=6000, 5718 min_size=16, 5719 post_nms_topn=300, 5720 name=None): 5721 """Generate bounding box proposals from encoded bounding boxes. 5722 5723 Args: 5724 scores: A 4-D float `Tensor` of shape 5725 `[num_images, height, width, num_achors]` containing scores of 5726 the boxes for given anchors, can be unsorted. 5727 bbox_deltas: A 4-D float `Tensor` of shape 5728 `[num_images, height, width, 4 x num_anchors]` encoding boxes 5729 with respect to each anchor. Coordinates are given 5730 in the form `[dy, dx, dh, dw]`. 5731 image_info: A 2-D float `Tensor` of shape `[num_images, 5]` 5732 containing image information Height, Width, Scale. 5733 anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]` 5734 describing the anchor boxes. 5735 Boxes are formatted in the form `[y1, x1, y2, x2]`. 5736 nms_threshold: A scalar float `Tensor` for non-maximal-suppression 5737 threshold. Defaults to 0.7. 5738 pre_nms_topn: A scalar int `Tensor` for the number of 5739 top scoring boxes to be used as input. Defaults to 6000. 5740 min_size: A scalar float `Tensor`. Any box that has a smaller size 5741 than min_size will be discarded. Defaults to 16. 5742 post_nms_topn: An integer. Maximum number of rois in the output. 5743 name: A name for this operation (optional). 5744 5745 Returns: 5746 rois: Region of interest boxes sorted by their scores. 5747 roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`. 5748 """ 5749 return gen_image_ops.generate_bounding_box_proposals( 5750 scores=scores, 5751 bbox_deltas=bbox_deltas, 5752 image_info=image_info, 5753 anchors=anchors, 5754 nms_threshold=nms_threshold, 5755 pre_nms_topn=pre_nms_topn, 5756 min_size=min_size, 5757 post_nms_topn=post_nms_topn, 5758 name=name) 5759