• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Implementation of image ops."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import functools
22import numpy as np
23
24from tensorflow.python.eager import def_function
25from tensorflow.python.framework import constant_op
26from tensorflow.python.framework import dtypes
27from tensorflow.python.framework import ops
28from tensorflow.python.framework import random_seed
29from tensorflow.python.framework import tensor_shape
30from tensorflow.python.framework import tensor_util
31from tensorflow.python.ops import array_ops
32from tensorflow.python.ops import check_ops
33from tensorflow.python.ops import control_flow_ops
34from tensorflow.python.ops import gen_image_ops
35from tensorflow.python.ops import math_ops
36from tensorflow.python.ops import nn
37from tensorflow.python.ops import nn_ops
38from tensorflow.python.ops import random_ops
39from tensorflow.python.ops import sort_ops
40from tensorflow.python.ops import stateless_random_ops
41from tensorflow.python.ops import string_ops
42from tensorflow.python.ops import variables
43from tensorflow.python.util import deprecation
44from tensorflow.python.util import dispatch
45from tensorflow.python.util.tf_export import tf_export
46
47ops.NotDifferentiable('RandomCrop')
48# TODO(b/31222613): This op may be differentiable, and there may be
49# latent bugs here.
50ops.NotDifferentiable('HSVToRGB')
51ops.NotDifferentiable('DrawBoundingBoxes')
52ops.NotDifferentiable('SampleDistortedBoundingBox')
53ops.NotDifferentiable('SampleDistortedBoundingBoxV2')
54# TODO(bsteiner): Implement the gradient function for extract_glimpse
55# TODO(b/31222613): This op may be differentiable, and there may be
56# latent bugs here.
57ops.NotDifferentiable('ExtractGlimpse')
58ops.NotDifferentiable('NonMaxSuppression')
59ops.NotDifferentiable('NonMaxSuppressionV2')
60ops.NotDifferentiable('NonMaxSuppressionWithOverlaps')
61ops.NotDifferentiable('GenerateBoundingBoxProposals')
62
63
64# pylint: disable=invalid-name
65def _assert(cond, ex_type, msg):
66  """A polymorphic assert, works with tensors and boolean expressions.
67
68  If `cond` is not a tensor, behave like an ordinary assert statement, except
69  that a empty list is returned. If `cond` is a tensor, return a list
70  containing a single TensorFlow assert op.
71
72  Args:
73    cond: Something evaluates to a boolean value. May be a tensor.
74    ex_type: The exception class to use.
75    msg: The error message.
76
77  Returns:
78    A list, containing at most one assert op.
79  """
80  if _is_tensor(cond):
81    return [control_flow_ops.Assert(cond, [msg])]
82  else:
83    if not cond:
84      raise ex_type(msg)
85    else:
86      return []
87
88
89def _is_tensor(x):
90  """Returns `True` if `x` is a symbolic tensor-like object.
91
92  Args:
93    x: A python object to check.
94
95  Returns:
96    `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
97  """
98  return isinstance(x, (ops.Tensor, variables.Variable))
99
100
101def _ImageDimensions(image, rank):
102  """Returns the dimensions of an image tensor.
103
104  Args:
105    image: A rank-D Tensor. For 3-D  of shape: `[height, width, channels]`.
106    rank: The expected rank of the image
107
108  Returns:
109    A list of corresponding to the dimensions of the
110    input image.  Dimensions that are statically known are python integers,
111    otherwise, they are integer scalar tensors.
112  """
113  if image.get_shape().is_fully_defined():
114    return image.get_shape().as_list()
115  else:
116    static_shape = image.get_shape().with_rank(rank).as_list()
117    dynamic_shape = array_ops.unstack(array_ops.shape(image), rank)
118    return [
119        s if s is not None else d for s, d in zip(static_shape, dynamic_shape)
120    ]
121
122
123def _Check3DImage(image, require_static=True):
124  """Assert that we are working with a properly shaped image.
125
126  Args:
127    image: 3-D Tensor of shape [height, width, channels]
128    require_static: If `True`, requires that all dimensions of `image` are known
129      and non-zero.
130
131  Raises:
132    ValueError: if `image.shape` is not a 3-vector.
133
134  Returns:
135    An empty list, if `image` has fully defined dimensions. Otherwise, a list
136    containing an assert op is returned.
137  """
138  try:
139    image_shape = image.get_shape().with_rank(3)
140  except ValueError:
141    raise ValueError("'image' (shape %s) must be three-dimensional." %
142                     image.shape)
143  if require_static and not image_shape.is_fully_defined():
144    raise ValueError("'image' (shape %s) must be fully defined." % image_shape)
145  if any(x == 0 for x in image_shape):
146    raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape)
147  if not image_shape.is_fully_defined():
148    return [
149        check_ops.assert_positive(
150            array_ops.shape(image),
151            ["all dims of 'image.shape' "
152             'must be > 0.'])
153    ]
154  else:
155    return []
156
157
158def _Assert3DImage(image):
159  """Assert that we are working with a properly shaped image.
160
161  Performs the check statically if possible (i.e. if the shape
162  is statically known). Otherwise adds a control dependency
163  to an assert op that checks the dynamic shape.
164
165  Args:
166    image: 3-D Tensor of shape [height, width, channels]
167
168  Raises:
169    ValueError: if `image.shape` is not a 3-vector.
170
171  Returns:
172    If the shape of `image` could be verified statically, `image` is
173    returned unchanged, otherwise there will be a control dependency
174    added that asserts the correct dynamic shape.
175  """
176  return control_flow_ops.with_dependencies(
177      _Check3DImage(image, require_static=False), image)
178
179
180def _AssertAtLeast3DImage(image):
181  """Assert that we are working with a properly shaped image.
182
183  Performs the check statically if possible (i.e. if the shape
184  is statically known). Otherwise adds a control dependency
185  to an assert op that checks the dynamic shape.
186
187  Args:
188    image: >= 3-D Tensor of size [*, height, width, depth]
189
190  Raises:
191    ValueError: if image.shape is not a [>= 3] vector.
192
193  Returns:
194    If the shape of `image` could be verified statically, `image` is
195    returned unchanged, otherwise there will be a control dependency
196    added that asserts the correct dynamic shape.
197  """
198  return control_flow_ops.with_dependencies(
199      _CheckAtLeast3DImage(image, require_static=False), image)
200
201
202def _CheckAtLeast3DImage(image, require_static=True):
203  """Assert that we are working with a properly shaped image.
204
205  Args:
206    image: >= 3-D Tensor of size [*, height, width, depth]
207    require_static: If `True`, requires that all dimensions of `image` are known
208      and non-zero.
209
210  Raises:
211    ValueError: if image.shape is not a [>= 3] vector.
212
213  Returns:
214    An empty list, if `image` has fully defined dimensions. Otherwise, a list
215    containing an assert op is returned.
216  """
217  try:
218    if image.get_shape().ndims is None:
219      image_shape = image.get_shape().with_rank(3)
220    else:
221      image_shape = image.get_shape().with_rank_at_least(3)
222  except ValueError:
223    raise ValueError("'image' (shape %s) must be at least three-dimensional." %
224                     image.shape)
225  if require_static and not image_shape.is_fully_defined():
226    raise ValueError('\'image\' must be fully defined.')
227  if any(x == 0 for x in image_shape[-3:]):
228    raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' %
229                     image_shape)
230  if not image_shape[-3:].is_fully_defined():
231    return [
232        check_ops.assert_positive(
233            array_ops.shape(image)[-3:],
234            ["inner 3 dims of 'image.shape' "
235             'must be > 0.']),
236        check_ops.assert_greater_equal(
237            array_ops.rank(image),
238            3,
239            message="'image' must be at least three-dimensional.")
240    ]
241  else:
242    return []
243
244
245def _AssertGrayscaleImage(image):
246  """Assert that we are working with a properly shaped grayscale image.
247
248  Performs the check statically if possible (i.e. if the shape
249  is statically known). Otherwise adds a control dependency
250  to an assert op that checks the dynamic shape.
251
252  Args:
253    image: >= 2-D Tensor of size [*, 1]
254
255  Raises:
256    ValueError: if image.shape is not a [>= 2] vector or if
257              last dimension is not size 1.
258
259  Returns:
260    If the shape of `image` could be verified statically, `image` is
261    returned unchanged, otherwise there will be a control dependency
262    added that asserts the correct dynamic shape.
263  """
264  return control_flow_ops.with_dependencies(
265      _CheckGrayscaleImage(image, require_static=False), image)
266
267
268def _CheckGrayscaleImage(image, require_static=True):
269  """Assert that we are working with properly shaped grayscale image.
270
271  Args:
272    image: >= 2-D Tensor of size [*, 1]
273    require_static: Boolean, whether static shape is required.
274
275  Raises:
276    ValueError: if image.shape is not a [>= 2] vector or if
277              last dimension is not size 1.
278
279  Returns:
280    An empty list, if `image` has fully defined dimensions. Otherwise, a list
281    containing an assert op is returned.
282  """
283  try:
284    if image.get_shape().ndims is None:
285      image_shape = image.get_shape().with_rank(2)
286    else:
287      image_shape = image.get_shape().with_rank_at_least(2)
288  except ValueError:
289    raise ValueError('A grayscale image (shape %s) must be at least '
290                     'two-dimensional.' % image.shape)
291  if require_static and not image_shape.is_fully_defined():
292    raise ValueError('\'image\' must be fully defined.')
293  if image_shape.is_fully_defined():
294    if image_shape[-1] != 1:
295      raise ValueError('Last dimension of a grayscale image should be size 1.')
296  if not image_shape.is_fully_defined():
297    return [
298        check_ops.assert_equal(
299            array_ops.shape(image)[-1],
300            1,
301            message='Last dimension of a grayscale image should be size 1.'),
302        check_ops.assert_greater_equal(
303            array_ops.rank(image),
304            3,
305            message='A grayscale image must be at least two-dimensional.')
306    ]
307  else:
308    return []
309
310
311def fix_image_flip_shape(image, result):
312  """Set the shape to 3 dimensional if we don't know anything else.
313
314  Args:
315    image: original image size
316    result: flipped or transformed image
317
318  Returns:
319    An image whose shape is at least (None, None, None).
320  """
321
322  image_shape = image.get_shape()
323  if image_shape == tensor_shape.unknown_shape():
324    result.set_shape([None, None, None])
325  else:
326    result.set_shape(image_shape)
327  return result
328
329
330@tf_export('image.random_flip_up_down')
331@dispatch.add_dispatch_support
332def random_flip_up_down(image, seed=None):
333  """Randomly flips an image vertically (upside down).
334
335  With a 1 in 2 chance, outputs the contents of `image` flipped along the first
336  dimension, which is `height`.  Otherwise, output the image as-is.
337  When passing a batch of images, each image will be randomly flipped
338  independent of other images.
339
340  Example usage:
341
342  >>> image = np.array([[[1], [2]], [[3], [4]]])
343  >>> tf.image.random_flip_up_down(image, 3).numpy().tolist()
344  [[[3], [4]], [[1], [2]]]
345
346  Randomly flip multiple images.
347
348  >>> images = np.array(
349  ... [
350  ...     [[[1], [2]], [[3], [4]]],
351  ...     [[[5], [6]], [[7], [8]]]
352  ... ])
353  >>> tf.image.random_flip_up_down(images, 4).numpy().tolist()
354  [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]]
355
356  For producing deterministic results given a `seed` value, use
357  `tf.image.stateless_random_flip_up_down`. Unlike using the `seed` param
358  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
359  same results given the same seed independent of how many times the function is
360  called, and independent of global seed settings (e.g. tf.random.set_seed).
361
362  Args:
363    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
364      of shape `[height, width, channels]`.
365    seed: A Python integer. Used to create a random seed. See
366      `tf.compat.v1.set_random_seed` for behavior.
367
368  Returns:
369    A tensor of the same type and shape as `image`.
370  Raises:
371    ValueError: if the shape of `image` not supported.
372  """
373  random_func = functools.partial(random_ops.random_uniform, seed=seed)
374  return _random_flip(image, 0, random_func, 'random_flip_up_down')
375
376
377@tf_export('image.random_flip_left_right')
378@dispatch.add_dispatch_support
379def random_flip_left_right(image, seed=None):
380  """Randomly flip an image horizontally (left to right).
381
382  With a 1 in 2 chance, outputs the contents of `image` flipped along the
383  second dimension, which is `width`.  Otherwise output the image as-is.
384  When passing a batch of images, each image will be randomly flipped
385  independent of other images.
386
387  Example usage:
388
389  >>> image = np.array([[[1], [2]], [[3], [4]]])
390  >>> tf.image.random_flip_left_right(image, 5).numpy().tolist()
391  [[[2], [1]], [[4], [3]]]
392
393  Randomly flip multiple images.
394
395  >>> images = np.array(
396  ... [
397  ...     [[[1], [2]], [[3], [4]]],
398  ...     [[[5], [6]], [[7], [8]]]
399  ... ])
400  >>> tf.image.random_flip_left_right(images, 6).numpy().tolist()
401  [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]]
402
403  For producing deterministic results given a `seed` value, use
404  `tf.image.stateless_random_flip_left_right`. Unlike using the `seed` param
405  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
406  same results given the same seed independent of how many times the function is
407  called, and independent of global seed settings (e.g. tf.random.set_seed).
408
409  Args:
410    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
411      of shape `[height, width, channels]`.
412    seed: A Python integer. Used to create a random seed. See
413      `tf.compat.v1.set_random_seed` for behavior.
414
415  Returns:
416    A tensor of the same type and shape as `image`.
417
418  Raises:
419    ValueError: if the shape of `image` not supported.
420  """
421  random_func = functools.partial(random_ops.random_uniform, seed=seed)
422  return _random_flip(image, 1, random_func, 'random_flip_left_right')
423
424
425@tf_export('image.stateless_random_flip_left_right', v1=[])
426@dispatch.add_dispatch_support
427def stateless_random_flip_left_right(image, seed):
428  """Randomly flip an image horizontally (left to right) deterministically.
429
430  Guarantees the same results given the same `seed` independent of how many
431  times the function is called, and independent of global seed settings (e.g.
432  `tf.random.set_seed`).
433
434  Example usage:
435
436  >>> image = np.array([[[1], [2]], [[3], [4]]])
437  >>> seed = (2, 3)
438  >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist()
439  [[[2], [1]], [[4], [3]]]
440
441  Args:
442    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
443      of shape `[height, width, channels]`.
444    seed: A shape [2] Tensor, the seed to the random number generator. Must have
445      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
446
447  Returns:
448    A tensor of the same type and shape as `image`.
449  """
450  random_func = functools.partial(
451      stateless_random_ops.stateless_random_uniform, seed=seed)
452  return _random_flip(
453      image, 1, random_func, 'stateless_random_flip_left_right')
454
455
456@tf_export('image.stateless_random_flip_up_down', v1=[])
457@dispatch.add_dispatch_support
458def stateless_random_flip_up_down(image, seed):
459  """Randomly flip an image vertically (upside down) deterministically.
460
461  Guarantees the same results given the same `seed` independent of how many
462  times the function is called, and independent of global seed settings (e.g.
463  `tf.random.set_seed`).
464
465  Example usage:
466
467  >>> image = np.array([[[1], [2]], [[3], [4]]])
468  >>> seed = (2, 3)
469  >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist()
470  [[[3], [4]], [[1], [2]]]
471
472  Args:
473    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
474      of shape `[height, width, channels]`.
475    seed: A shape [2] Tensor, the seed to the random number generator. Must have
476      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
477
478  Returns:
479    A tensor of the same type and shape as `image`.
480  """
481  random_func = functools.partial(
482      stateless_random_ops.stateless_random_uniform, seed=seed)
483  return _random_flip(
484      image, 0, random_func, 'stateless_random_flip_up_down')
485
486
487def _random_flip(image, flip_index, random_func, scope_name):
488  """Randomly (50% chance) flip an image along axis `flip_index`.
489
490  Args:
491    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
492      of shape `[height, width, channels]`.
493    flip_index: Dimension along which to flip the image.
494      Vertical is 0, Horizontal is 1.
495    random_func: partial function for calling either stateful or stateless
496      random ops with `seed` parameter specified.
497    scope_name: Name of the scope in which the ops are added.
498
499  Returns:
500    A tensor of the same type and shape as `image`.
501
502  Raises:
503    ValueError: if the shape of `image` not supported.
504  """
505  with ops.name_scope(None, scope_name, [image]) as scope:
506    image = ops.convert_to_tensor(image, name='image')
507    image = _AssertAtLeast3DImage(image)
508    shape = image.get_shape()
509
510    def f_rank3():
511      uniform_random = random_func(shape=[], minval=0, maxval=1.0)
512      mirror_cond = math_ops.less(uniform_random, .5)
513      result = control_flow_ops.cond(
514          mirror_cond,
515          lambda: array_ops.reverse(image, [flip_index]),
516          lambda: image,
517          name=scope)
518      return fix_image_flip_shape(image, result)
519
520    def f_rank4():
521      batch_size = array_ops.shape(image)[0]
522      uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0)
523      flips = math_ops.round(
524          array_ops.reshape(uniform_random, [batch_size, 1, 1, 1]))
525      flips = math_ops.cast(flips, image.dtype)
526      flipped_input = array_ops.reverse(image, [flip_index + 1])
527      return flips * flipped_input + (1 - flips) * image
528
529    if shape.ndims is None:
530      rank = array_ops.rank(image)
531      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
532    if shape.ndims == 3:
533      return f_rank3()
534    elif shape.ndims == 4:
535      return f_rank4()
536    else:
537      raise ValueError(
538          '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
539
540
541@tf_export('image.flip_left_right')
542@dispatch.add_dispatch_support
543def flip_left_right(image):
544  """Flip an image horizontally (left to right).
545
546  Outputs the contents of `image` flipped along the width dimension.
547
548  See also `tf.reverse`.
549
550  Usage Example:
551
552  >>> x = [[[1.0, 2.0, 3.0],
553  ...       [4.0, 5.0, 6.0]],
554  ...     [[7.0, 8.0, 9.0],
555  ...       [10.0, 11.0, 12.0]]]
556  >>> tf.image.flip_left_right(x)
557  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
558  array([[[ 4.,  5.,  6.],
559          [ 1.,  2.,  3.]],
560         [[10., 11., 12.],
561          [ 7.,  8.,  9.]]], dtype=float32)>
562
563  Args:
564    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
565      of shape `[height, width, channels]`.
566
567  Returns:
568    A tensor of the same type and shape as `image`.
569
570  Raises:
571    ValueError: if the shape of `image` not supported.
572  """
573  return _flip(image, 1, 'flip_left_right')
574
575
576@tf_export('image.flip_up_down')
577@dispatch.add_dispatch_support
578def flip_up_down(image):
579  """Flip an image vertically (upside down).
580
581  Outputs the contents of `image` flipped along the height dimension.
582
583  See also `reverse()`.
584
585  Usage Example:
586
587  >>> x = [[[1.0, 2.0, 3.0],
588  ...       [4.0, 5.0, 6.0]],
589  ...     [[7.0, 8.0, 9.0],
590  ...       [10.0, 11.0, 12.0]]]
591  >>> tf.image.flip_up_down(x)
592  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
593  array([[[ 7.,  8.,  9.],
594          [10., 11., 12.]],
595         [[ 1.,  2.,  3.],
596          [ 4.,  5.,  6.]]], dtype=float32)>
597
598  Args:
599    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
600      of shape `[height, width, channels]`.
601
602  Returns:
603    A `Tensor` of the same type and shape as `image`.
604
605  Raises:
606    ValueError: if the shape of `image` not supported.
607  """
608  return _flip(image, 0, 'flip_up_down')
609
610
611def _flip(image, flip_index, scope_name):
612  """Flip an image either horizontally or vertically.
613
614  Outputs the contents of `image` flipped along the dimension `flip_index`.
615
616  See also `reverse()`.
617
618  Args:
619    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
620      of shape `[height, width, channels]`.
621    flip_index: 0 For vertical, 1 for horizontal.
622    scope_name: string, scope name.
623
624  Returns:
625    A `Tensor` of the same type and shape as `image`.
626
627  Raises:
628    ValueError: if the shape of `image` not supported.
629  """
630  with ops.name_scope(None, scope_name, [image]):
631    image = ops.convert_to_tensor(image, name='image')
632    image = _AssertAtLeast3DImage(image)
633    shape = image.get_shape()
634
635    def f_rank3():
636      return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index]))
637
638    def f_rank4():
639      return array_ops.reverse(image, [flip_index + 1])
640
641    if shape.ndims is None:
642      rank = array_ops.rank(image)
643      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
644    elif shape.ndims == 3:
645      return f_rank3()
646    elif shape.ndims == 4:
647      return f_rank4()
648    else:
649      raise ValueError(
650          '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape)
651
652
653@tf_export('image.rot90')
654@dispatch.add_dispatch_support
655def rot90(image, k=1, name=None):
656  """Rotate image(s) counter-clockwise by 90 degrees.
657
658
659  For example:
660
661  >>> a=tf.constant([[[1],[2]],
662  ...                [[3],[4]]])
663  >>> # rotating `a` counter clockwise by 90 degrees
664  >>> a_rot=tf.image.rot90(a)
665  >>> print(a_rot[...,0].numpy())
666  [[2 4]
667   [1 3]]
668  >>> # rotating `a` counter clockwise by 270 degrees
669  >>> a_rot=tf.image.rot90(a, k=3)
670  >>> print(a_rot[...,0].numpy())
671  [[3 1]
672   [4 2]]
673
674  Args:
675    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
676      of shape `[height, width, channels]`.
677    k: A scalar integer. The number of times the image is rotated by 90 degrees.
678    name: A name for this operation (optional).
679
680  Returns:
681    A rotated tensor of the same type and shape as `image`.
682
683  Raises:
684    ValueError: if the shape of `image` not supported.
685  """
686  with ops.name_scope(name, 'rot90', [image, k]) as scope:
687    image = ops.convert_to_tensor(image, name='image')
688    image = _AssertAtLeast3DImage(image)
689    k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')
690    k.get_shape().assert_has_rank(0)
691    k = math_ops.mod(k, 4)
692
693    shape = image.get_shape()
694    if shape.ndims is None:
695      rank = array_ops.rank(image)
696
697      def f_rank3():
698        return _rot90_3D(image, k, scope)
699
700      def f_rank4():
701        return _rot90_4D(image, k, scope)
702
703      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
704    elif shape.ndims == 3:
705      return _rot90_3D(image, k, scope)
706    elif shape.ndims == 4:
707      return _rot90_4D(image, k, scope)
708    else:
709      raise ValueError(
710          '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
711
712
713def _rot90_3D(image, k, name_scope):
714  """Rotate image counter-clockwise by 90 degrees `k` times.
715
716  Args:
717    image: 3-D Tensor of shape `[height, width, channels]`.
718    k: A scalar integer. The number of times the image is rotated by 90 degrees.
719    name_scope: A valid TensorFlow name scope.
720
721  Returns:
722    A 3-D tensor of the same type and shape as `image`.
723
724  """
725
726  def _rot90():
727    return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2])
728
729  def _rot180():
730    return array_ops.reverse_v2(image, [0, 1])
731
732  def _rot270():
733    return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1])
734
735  cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
736           (math_ops.equal(k, 3), _rot270)]
737
738  result = control_flow_ops.case(
739      cases, default=lambda: image, exclusive=True, name=name_scope)
740  result.set_shape([None, None, image.get_shape()[2]])
741  return result
742
743
744def _rot90_4D(images, k, name_scope):
745  """Rotate batch of images counter-clockwise by 90 degrees `k` times.
746
747  Args:
748    images: 4-D Tensor of shape `[height, width, channels]`.
749    k: A scalar integer. The number of times the images are rotated by 90
750      degrees.
751    name_scope: A valid TensorFlow name scope.
752
753  Returns:
754    A 4-D `Tensor` of the same type and shape as `images`.
755  """
756
757  def _rot90():
758    return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3])
759
760  def _rot180():
761    return array_ops.reverse_v2(images, [1, 2])
762
763  def _rot270():
764    return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2])
765
766  cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
767           (math_ops.equal(k, 3), _rot270)]
768
769  result = control_flow_ops.case(
770      cases, default=lambda: images, exclusive=True, name=name_scope)
771  shape = result.get_shape()
772  result.set_shape([shape[0], None, None, shape[3]])
773  return result
774
775
776@tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image'])
777@dispatch.add_dispatch_support
778def transpose(image, name=None):
779  """Transpose image(s) by swapping the height and width dimension.
780
781  Usage Example:
782
783  >>> x = [[[1.0, 2.0, 3.0],
784  ...       [4.0, 5.0, 6.0]],
785  ...     [[7.0, 8.0, 9.0],
786  ...       [10.0, 11.0, 12.0]]]
787  >>> tf.image.transpose(x)
788  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
789  array([[[ 1.,  2.,  3.],
790          [ 7.,  8.,  9.]],
791         [[ 4.,  5.,  6.],
792          [10., 11., 12.]]], dtype=float32)>
793
794  Args:
795    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
796      of shape `[height, width, channels]`.
797    name: A name for this operation (optional).
798
799  Returns:
800    If `image` was 4-D, a 4-D float Tensor of shape
801   `[batch, width, height, channels]`
802    If `image` was 3-D, a 3-D float Tensor of shape
803   `[width, height, channels]`
804
805  Raises:
806    ValueError: if the shape of `image` not supported.
807
808  Usage Example:
809
810  >>> image = [[[1, 2], [3, 4]],
811  ...         [[5, 6], [7, 8]],
812  ...         [[9, 10], [11, 12]]]
813  >>> image = tf.constant(image)
814  >>> tf.image.transpose(image)
815  <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy=
816  array([[[ 1,  2],
817         [ 5,  6],
818         [ 9, 10]],
819        [[ 3,  4],
820         [ 7,  8],
821         [11, 12]]], dtype=int32)>
822  """
823  with ops.name_scope(name, 'transpose', [image]):
824    image = ops.convert_to_tensor(image, name='image')
825    image = _AssertAtLeast3DImage(image)
826    shape = image.get_shape()
827    if shape.ndims is None:
828      rank = array_ops.rank(image)
829
830      def f_rank3():
831        return array_ops.transpose(image, [1, 0, 2], name=name)
832
833      def f_rank4():
834        return array_ops.transpose(image, [0, 2, 1, 3], name=name)
835
836      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
837    elif shape.ndims == 3:
838      return array_ops.transpose(image, [1, 0, 2], name=name)
839    elif shape.ndims == 4:
840      return array_ops.transpose(image, [0, 2, 1, 3], name=name)
841    else:
842      raise ValueError(
843          '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
844
845
846@tf_export('image.central_crop')
847@dispatch.add_dispatch_support
848def central_crop(image, central_fraction):
849  """Crop the central region of the image(s).
850
851  Remove the outer parts of an image but retain the central region of the image
852  along each dimension. If we specify central_fraction = 0.5, this function
853  returns the region marked with "X" in the below diagram.
854
855       --------
856      |        |
857      |  XXXX  |
858      |  XXXX  |
859      |        |   where "X" is the central 50% of the image.
860       --------
861
862  This function works on either a single image (`image` is a 3-D Tensor), or a
863  batch of images (`image` is a 4-D Tensor).
864
865  Usage Example:
866
867  >>> x = [[[1.0, 2.0, 3.0],
868  ...       [4.0, 5.0, 6.0],
869  ...       [7.0, 8.0, 9.0],
870  ...       [10.0, 11.0, 12.0]],
871  ...     [[13.0, 14.0, 15.0],
872  ...       [16.0, 17.0, 18.0],
873  ...       [19.0, 20.0, 21.0],
874  ...       [22.0, 23.0, 24.0]],
875  ...     [[25.0, 26.0, 27.0],
876  ...       [28.0, 29.0, 30.0],
877  ...       [31.0, 32.0, 33.0],
878  ...       [34.0, 35.0, 36.0]],
879  ...     [[37.0, 38.0, 39.0],
880  ...       [40.0, 41.0, 42.0],
881  ...       [43.0, 44.0, 45.0],
882  ...       [46.0, 47.0, 48.0]]]
883  >>> tf.image.central_crop(x, 0.5)
884  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
885  array([[[16., 17., 18.],
886          [19., 20., 21.]],
887         [[28., 29., 30.],
888          [31., 32., 33.]]], dtype=float32)>
889
890  Args:
891    image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D
892      Tensor of shape [batch_size, height, width, depth].
893    central_fraction: float (0, 1], fraction of size to crop
894
895  Raises:
896    ValueError: if central_crop_fraction is not within (0, 1].
897
898  Returns:
899    3-D / 4-D float Tensor, as per the input.
900  """
901  with ops.name_scope(None, 'central_crop', [image]):
902    image = ops.convert_to_tensor(image, name='image')
903    central_fraction_static = tensor_util.constant_value(central_fraction)
904    if central_fraction_static is not None:
905      if central_fraction_static <= 0.0 or central_fraction_static > 1.0:
906        raise ValueError('central_fraction must be within (0, 1]')
907      if central_fraction_static == 1.0:
908        return image
909    else:
910      assert_ops = _assert(
911          math_ops.logical_or(central_fraction > 0.0, central_fraction <= 1.0),
912          ValueError, 'central_fraction must be within (0, 1]')
913      image = control_flow_ops.with_dependencies(assert_ops, image)
914
915    _AssertAtLeast3DImage(image)
916    rank = image.get_shape().ndims
917    if rank != 3 and rank != 4:
918      raise ValueError('`image` should either be a Tensor with rank = 3 or '
919                       'rank = 4. Had rank = {}.'.format(rank))
920
921    # Helper method to return the `idx`-th dimension of `tensor`, along with
922    # a boolean signifying if the dimension is dynamic.
923    def _get_dim(tensor, idx):
924      static_shape = tensor.get_shape().dims[idx].value
925      if static_shape is not None:
926        return static_shape, False
927      return array_ops.shape(tensor)[idx], True
928
929    # Get the height, width, depth (and batch size, if the image is a 4-D
930    # tensor).
931    if rank == 3:
932      img_h, dynamic_h = _get_dim(image, 0)
933      img_w, dynamic_w = _get_dim(image, 1)
934      img_d = image.get_shape()[2]
935    else:
936      img_bs = image.get_shape()[0]
937      img_h, dynamic_h = _get_dim(image, 1)
938      img_w, dynamic_w = _get_dim(image, 2)
939      img_d = image.get_shape()[3]
940
941    dynamic_h = dynamic_h or (central_fraction_static is None)
942    dynamic_w = dynamic_w or (central_fraction_static is None)
943
944    # Compute the bounding boxes for the crop. The type and value of the
945    # bounding boxes depend on the `image` tensor's rank and whether / not the
946    # dimensions are statically defined.
947    if dynamic_h:
948      img_hd = math_ops.cast(img_h, dtypes.float64)
949      bbox_h_start = math_ops.cast(
950          (img_hd - img_hd * math_ops.cast(central_fraction, dtypes.float64)) /
951          2, dtypes.int32)
952    else:
953      img_hd = float(img_h)
954      bbox_h_start = int((img_hd - img_hd * central_fraction_static) / 2)
955
956    if dynamic_w:
957      img_wd = math_ops.cast(img_w, dtypes.float64)
958      bbox_w_start = math_ops.cast(
959          (img_wd - img_wd * math_ops.cast(central_fraction, dtypes.float64)) /
960          2, dtypes.int32)
961    else:
962      img_wd = float(img_w)
963      bbox_w_start = int((img_wd - img_wd * central_fraction_static) / 2)
964
965    bbox_h_size = img_h - bbox_h_start * 2
966    bbox_w_size = img_w - bbox_w_start * 2
967
968    if rank == 3:
969      bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0])
970      bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1])
971    else:
972      bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0])
973      bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1])
974
975    image = array_ops.slice(image, bbox_begin, bbox_size)
976
977    # Reshape the `image` tensor to the desired size.
978    if rank == 3:
979      image.set_shape([
980          None if dynamic_h else bbox_h_size,
981          None if dynamic_w else bbox_w_size, img_d
982      ])
983    else:
984      image.set_shape([
985          img_bs, None if dynamic_h else bbox_h_size,
986          None if dynamic_w else bbox_w_size, img_d
987      ])
988    return image
989
990
991@tf_export('image.pad_to_bounding_box')
992@dispatch.add_dispatch_support
993def pad_to_bounding_box(image, offset_height, offset_width, target_height,
994                        target_width):
995  """Pad `image` with zeros to the specified `height` and `width`.
996
997  Adds `offset_height` rows of zeros on top, `offset_width` columns of
998  zeros on the left, and then pads the image on the bottom and right
999  with zeros until it has dimensions `target_height`, `target_width`.
1000
1001  This op does nothing if `offset_*` is zero and the image already has size
1002  `target_height` by `target_width`.
1003
1004  Usage Example:
1005
1006  >>> x = [[[1., 2., 3.],
1007  ...       [4., 5., 6.]],
1008  ...       [[7., 8., 9.],
1009  ...       [10., 11., 12.]]]
1010  >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4)
1011  >>> padded_image
1012  <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy=
1013  array([[[ 0.,  0.,  0.],
1014  [ 0.,  0.,  0.],
1015  [ 0.,  0.,  0.],
1016  [ 0.,  0.,  0.]],
1017  [[ 0.,  0.,  0.],
1018  [ 1.,  2.,  3.],
1019  [ 4.,  5.,  6.],
1020  [ 0.,  0.,  0.]],
1021  [[ 0.,  0.,  0.],
1022  [ 7.,  8.,  9.],
1023  [10., 11., 12.],
1024  [ 0.,  0.,  0.]],
1025  [[ 0.,  0.,  0.],
1026  [ 0.,  0.,  0.],
1027  [ 0.,  0.,  0.],
1028  [ 0.,  0.,  0.]]], dtype=float32)>
1029
1030  Args:
1031    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1032      of shape `[height, width, channels]`.
1033    offset_height: Number of rows of zeros to add on top.
1034    offset_width: Number of columns of zeros to add on the left.
1035    target_height: Height of output image.
1036    target_width: Width of output image.
1037
1038  Returns:
1039    If `image` was 4-D, a 4-D float Tensor of shape
1040    `[batch, target_height, target_width, channels]`
1041    If `image` was 3-D, a 3-D float Tensor of shape
1042    `[target_height, target_width, channels]`
1043
1044  Raises:
1045    ValueError: If the shape of `image` is incompatible with the `offset_*` or
1046      `target_*` arguments, or either `offset_height` or `offset_width` is
1047      negative.
1048  """
1049  with ops.name_scope(None, 'pad_to_bounding_box', [image]):
1050    image = ops.convert_to_tensor(image, name='image')
1051
1052    is_batch = True
1053    image_shape = image.get_shape()
1054    if image_shape.ndims == 3:
1055      is_batch = False
1056      image = array_ops.expand_dims(image, 0)
1057    elif image_shape.ndims is None:
1058      is_batch = False
1059      image = array_ops.expand_dims(image, 0)
1060      image.set_shape([None] * 4)
1061    elif image_shape.ndims != 4:
1062      raise ValueError(
1063          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1064          image_shape)
1065
1066    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1067    batch, height, width, depth = _ImageDimensions(image, rank=4)
1068
1069    after_padding_width = target_width - offset_width - width
1070
1071    after_padding_height = target_height - offset_height - height
1072
1073    assert_ops += _assert(offset_height >= 0, ValueError,
1074                          'offset_height must be >= 0')
1075    assert_ops += _assert(offset_width >= 0, ValueError,
1076                          'offset_width must be >= 0')
1077    assert_ops += _assert(after_padding_width >= 0, ValueError,
1078                          'width must be <= target - offset')
1079    assert_ops += _assert(after_padding_height >= 0, ValueError,
1080                          'height must be <= target - offset')
1081    image = control_flow_ops.with_dependencies(assert_ops, image)
1082
1083    # Do not pad on the depth dimensions.
1084    paddings = array_ops.reshape(
1085        array_ops.stack([
1086            0, 0, offset_height, after_padding_height, offset_width,
1087            after_padding_width, 0, 0
1088        ]), [4, 2])
1089    padded = array_ops.pad(image, paddings)
1090
1091    padded_shape = [
1092        None if _is_tensor(i) else i
1093        for i in [batch, target_height, target_width, depth]
1094    ]
1095    padded.set_shape(padded_shape)
1096
1097    if not is_batch:
1098      padded = array_ops.squeeze(padded, axis=[0])
1099
1100    return padded
1101
1102
1103@tf_export('image.crop_to_bounding_box')
1104@dispatch.add_dispatch_support
1105def crop_to_bounding_box(image, offset_height, offset_width, target_height,
1106                         target_width):
1107  """Crops an `image` to a specified bounding box.
1108
1109  This op cuts a rectangular bounding box out of `image`. The top-left corner
1110  of the bounding box is at `offset_height, offset_width` in `image`, and the
1111  lower-right corner is at
1112  `offset_height + target_height, offset_width + target_width`.
1113
1114  Example Usage:
1115
1116  >>> image = tf.constant(np.arange(1, 28, dtype=np.float32), shape=[3, 3, 3])
1117  >>> image[:,:,0] # print the first channel of the 3-D tensor
1118  <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
1119  array([[ 1.,  4.,  7.],
1120         [10., 13., 16.],
1121         [19., 22., 25.]], dtype=float32)>
1122  >>> cropped_image = tf.image.crop_to_bounding_box(image, 0, 0, 2, 2)
1123  >>> cropped_image[:,:,0] # print the first channel of the cropped 3-D tensor
1124  <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
1125  array([[ 1.,  4.],
1126         [10., 13.]], dtype=float32)>
1127
1128  Args:
1129    image: 4-D `Tensor` of shape `[batch, height, width, channels]` or 3-D
1130      `Tensor` of shape `[height, width, channels]`.
1131    offset_height: Vertical coordinate of the top-left corner of the bounding
1132      box in `image`.
1133    offset_width: Horizontal coordinate of the top-left corner of the bounding
1134      box in `image`.
1135    target_height: Height of the bounding box.
1136    target_width: Width of the bounding box.
1137
1138  Returns:
1139    If `image` was 4-D, a 4-D `Tensor` of shape
1140    `[batch, target_height, target_width, channels]`.
1141    If `image` was 3-D, a 3-D `Tensor` of shape
1142    `[target_height, target_width, channels]`.
1143    It has the same dtype with `image`.
1144
1145  Raises:
1146    ValueError: `image` is not a 3-D or 4-D `Tensor`.
1147    ValueError: `offset_width < 0` or `offset_height < 0`.
1148    ValueError: `target_width <= 0` or `target_width <= 0`.
1149    ValueError: `width < offset_width + target_width` or
1150      `height < offset_height + target_height`.
1151  """
1152  with ops.name_scope(None, 'crop_to_bounding_box', [image]):
1153    image = ops.convert_to_tensor(image, name='image')
1154
1155    is_batch = True
1156    image_shape = image.get_shape()
1157    if image_shape.ndims == 3:
1158      is_batch = False
1159      image = array_ops.expand_dims(image, 0)
1160    elif image_shape.ndims is None:
1161      is_batch = False
1162      image = array_ops.expand_dims(image, 0)
1163      image.set_shape([None] * 4)
1164    elif image_shape.ndims != 4:
1165      raise ValueError(
1166          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1167          image_shape)
1168
1169    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1170
1171    batch, height, width, depth = _ImageDimensions(image, rank=4)
1172
1173    assert_ops += _assert(offset_width >= 0, ValueError,
1174                          'offset_width must be >= 0.')
1175    assert_ops += _assert(offset_height >= 0, ValueError,
1176                          'offset_height must be >= 0.')
1177    assert_ops += _assert(target_width > 0, ValueError,
1178                          'target_width must be > 0.')
1179    assert_ops += _assert(target_height > 0, ValueError,
1180                          'target_height must be > 0.')
1181    assert_ops += _assert(width >= (target_width + offset_width), ValueError,
1182                          'width must be >= target + offset.')
1183    assert_ops += _assert(height >= (target_height + offset_height), ValueError,
1184                          'height must be >= target + offset.')
1185    image = control_flow_ops.with_dependencies(assert_ops, image)
1186
1187    cropped = array_ops.slice(
1188        image, array_ops.stack([0, offset_height, offset_width, 0]),
1189        array_ops.stack([array_ops.shape(image)[0], target_height, target_width,
1190                         array_ops.shape(image)[3]]))
1191
1192    cropped_shape = [
1193        None if _is_tensor(i) else i
1194        for i in [batch, target_height, target_width, depth]
1195    ]
1196    cropped.set_shape(cropped_shape)
1197
1198    if not is_batch:
1199      cropped = array_ops.squeeze(cropped, axis=[0])
1200
1201    return cropped
1202
1203
1204@tf_export(
1205    'image.resize_with_crop_or_pad',
1206    v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad'])
1207@dispatch.add_dispatch_support
1208def resize_image_with_crop_or_pad(image, target_height, target_width):
1209  """Crops and/or pads an image to a target width and height.
1210
1211  Resizes an image to a target width and height by either centrally
1212  cropping the image or padding it evenly with zeros.
1213
1214  If `width` or `height` is greater than the specified `target_width` or
1215  `target_height` respectively, this op centrally crops along that dimension.
1216
1217  For example:
1218
1219  >>> image = np.arange(75).reshape(5, 5, 3)  # create 3-D image input
1220  >>> image[:,:,0]  # print first channel just for demo purposes
1221  array([[ 0,  3,  6,  9, 12],
1222         [15, 18, 21, 24, 27],
1223         [30, 33, 36, 39, 42],
1224         [45, 48, 51, 54, 57],
1225         [60, 63, 66, 69, 72]])
1226  >>> image = tf.image.resize_with_crop_or_pad(image, 3, 3)  # crop
1227  >>> # print first channel for demo purposes; centrally cropped output
1228  >>> image[:,:,0]
1229  <tf.Tensor: shape=(3, 3), dtype=int64, numpy=
1230  array([[18, 21, 24],
1231         [33, 36, 39],
1232         [48, 51, 54]])>
1233
1234  If `width` or `height` is smaller than the specified `target_width` or
1235  `target_height` respectively, this op centrally pads with 0 along that
1236  dimension.
1237
1238  For example:
1239
1240  >>> image = np.arange(1, 28).reshape(3, 3, 3)  # create 3-D image input
1241  >>> image[:,:,0]  # print first channel just for demo purposes
1242  array([[ 1,  4,  7],
1243         [10, 13, 16],
1244         [19, 22, 25]])
1245  >>> image = tf.image.resize_with_crop_or_pad(image, 5, 5)  # pad
1246  >>> # print first channel for demo purposes; we should see 0 paddings
1247  >>> image[:,:,0]
1248  <tf.Tensor: shape=(5, 5), dtype=int64, numpy=
1249  array([[ 0,  0,  0,  0,  0],
1250         [ 0,  1,  4,  7,  0],
1251         [ 0, 10, 13, 16,  0],
1252         [ 0, 19, 22, 25,  0],
1253         [ 0,  0,  0,  0,  0]])>
1254
1255  Args:
1256    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1257      of shape `[height, width, channels]`.
1258    target_height: Target height.
1259    target_width: Target width.
1260
1261  Raises:
1262    ValueError: if `target_height` or `target_width` are zero or negative.
1263
1264  Returns:
1265    Cropped and/or padded image.
1266    If `images` was 4-D, a 4-D float Tensor of shape
1267    `[batch, new_height, new_width, channels]`.
1268    If `images` was 3-D, a 3-D float Tensor of shape
1269    `[new_height, new_width, channels]`.
1270  """
1271  with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]):
1272    image = ops.convert_to_tensor(image, name='image')
1273    image_shape = image.get_shape()
1274    is_batch = True
1275    if image_shape.ndims == 3:
1276      is_batch = False
1277      image = array_ops.expand_dims(image, 0)
1278    elif image_shape.ndims is None:
1279      is_batch = False
1280      image = array_ops.expand_dims(image, 0)
1281      image.set_shape([None] * 4)
1282    elif image_shape.ndims != 4:
1283      raise ValueError(
1284          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1285          image_shape)
1286
1287    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1288    assert_ops += _assert(target_width > 0, ValueError,
1289                          'target_width must be > 0.')
1290    assert_ops += _assert(target_height > 0, ValueError,
1291                          'target_height must be > 0.')
1292
1293    image = control_flow_ops.with_dependencies(assert_ops, image)
1294    # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
1295    # Make sure our checks come first, so that error messages are clearer.
1296    if _is_tensor(target_height):
1297      target_height = control_flow_ops.with_dependencies(
1298          assert_ops, target_height)
1299    if _is_tensor(target_width):
1300      target_width = control_flow_ops.with_dependencies(assert_ops,
1301                                                        target_width)
1302
1303    def max_(x, y):
1304      if _is_tensor(x) or _is_tensor(y):
1305        return math_ops.maximum(x, y)
1306      else:
1307        return max(x, y)
1308
1309    def min_(x, y):
1310      if _is_tensor(x) or _is_tensor(y):
1311        return math_ops.minimum(x, y)
1312      else:
1313        return min(x, y)
1314
1315    def equal_(x, y):
1316      if _is_tensor(x) or _is_tensor(y):
1317        return math_ops.equal(x, y)
1318      else:
1319        return x == y
1320
1321    _, height, width, _ = _ImageDimensions(image, rank=4)
1322    width_diff = target_width - width
1323    offset_crop_width = max_(-width_diff // 2, 0)
1324    offset_pad_width = max_(width_diff // 2, 0)
1325
1326    height_diff = target_height - height
1327    offset_crop_height = max_(-height_diff // 2, 0)
1328    offset_pad_height = max_(height_diff // 2, 0)
1329
1330    # Maybe crop if needed.
1331    cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
1332                                   min_(target_height, height),
1333                                   min_(target_width, width))
1334
1335    # Maybe pad if needed.
1336    resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
1337                                  target_height, target_width)
1338
1339    # In theory all the checks below are redundant.
1340    if resized.get_shape().ndims is None:
1341      raise ValueError('resized contains no shape.')
1342
1343    _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4)
1344
1345    assert_ops = []
1346    assert_ops += _assert(
1347        equal_(resized_height, target_height), ValueError,
1348        'resized height is not correct.')
1349    assert_ops += _assert(
1350        equal_(resized_width, target_width), ValueError,
1351        'resized width is not correct.')
1352
1353    resized = control_flow_ops.with_dependencies(assert_ops, resized)
1354
1355    if not is_batch:
1356      resized = array_ops.squeeze(resized, axis=[0])
1357
1358    return resized
1359
1360
1361@tf_export(v1=['image.ResizeMethod'])
1362class ResizeMethodV1(object):
1363  """See `v1.image.resize` for details."""
1364  BILINEAR = 0
1365  NEAREST_NEIGHBOR = 1
1366  BICUBIC = 2
1367  AREA = 3
1368
1369
1370@tf_export('image.ResizeMethod', v1=[])
1371class ResizeMethod(object):
1372  """See `tf.image.resize` for details."""
1373  BILINEAR = 'bilinear'
1374  NEAREST_NEIGHBOR = 'nearest'
1375  BICUBIC = 'bicubic'
1376  AREA = 'area'
1377  LANCZOS3 = 'lanczos3'
1378  LANCZOS5 = 'lanczos5'
1379  GAUSSIAN = 'gaussian'
1380  MITCHELLCUBIC = 'mitchellcubic'
1381
1382
1383def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name,
1384                          skip_resize_if_same):
1385  """Core functionality for v1 and v2 resize functions."""
1386  with ops.name_scope(name, 'resize', [images, size]):
1387    images = ops.convert_to_tensor(images, name='images')
1388    if images.get_shape().ndims is None:
1389      raise ValueError('\'images\' contains no shape.')
1390    # TODO(shlens): Migrate this functionality to the underlying Op's.
1391    is_batch = True
1392    if images.get_shape().ndims == 3:
1393      is_batch = False
1394      images = array_ops.expand_dims(images, 0)
1395    elif images.get_shape().ndims != 4:
1396      raise ValueError('\'images\' must have either 3 or 4 dimensions.')
1397
1398    _, height, width, _ = images.get_shape().as_list()
1399
1400    try:
1401      size = ops.convert_to_tensor(size, dtypes.int32, name='size')
1402    except (TypeError, ValueError):
1403      raise ValueError('\'size\' must be a 1-D int32 Tensor')
1404    if not size.get_shape().is_compatible_with([2]):
1405      raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
1406                       'new_height, new_width')
1407
1408    if preserve_aspect_ratio:
1409      # Get the current shapes of the image, even if dynamic.
1410      _, current_height, current_width, _ = _ImageDimensions(images, rank=4)
1411
1412      # do the computation to find the right scale and height/width.
1413      scale_factor_height = (
1414          math_ops.cast(size[0], dtypes.float32) /
1415          math_ops.cast(current_height, dtypes.float32))
1416      scale_factor_width = (
1417          math_ops.cast(size[1], dtypes.float32) /
1418          math_ops.cast(current_width, dtypes.float32))
1419      scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width)
1420      scaled_height_const = math_ops.cast(
1421          math_ops.round(scale_factor *
1422                         math_ops.cast(current_height, dtypes.float32)),
1423          dtypes.int32)
1424      scaled_width_const = math_ops.cast(
1425          math_ops.round(scale_factor *
1426                         math_ops.cast(current_width, dtypes.float32)),
1427          dtypes.int32)
1428
1429      # NOTE: Reset the size and other constants used later.
1430      size = ops.convert_to_tensor([scaled_height_const, scaled_width_const],
1431                                   dtypes.int32,
1432                                   name='size')
1433
1434    size_const_as_shape = tensor_util.constant_value_as_shape(size)
1435    new_height_const = tensor_shape.dimension_at_index(size_const_as_shape,
1436                                                       0).value
1437    new_width_const = tensor_shape.dimension_at_index(size_const_as_shape,
1438                                                      1).value
1439
1440    # If we can determine that the height and width will be unmodified by this
1441    # transformation, we avoid performing the resize.
1442    if skip_resize_if_same and all(
1443        x is not None
1444        for x in [new_width_const, width, new_height_const, height]) and (
1445            width == new_width_const and height == new_height_const):
1446      if not is_batch:
1447        images = array_ops.squeeze(images, axis=[0])
1448      return images
1449
1450    images = resizer_fn(images, size)
1451
1452    # NOTE(mrry): The shape functions for the resize ops cannot unpack
1453    # the packed values in `new_size`, so set the shape here.
1454    images.set_shape([None, new_height_const, new_width_const, None])
1455
1456    if not is_batch:
1457      images = array_ops.squeeze(images, axis=[0])
1458    return images
1459
1460
1461@tf_export(v1=['image.resize_images', 'image.resize'])
1462@dispatch.add_dispatch_support
1463def resize_images(images,
1464                  size,
1465                  method=ResizeMethodV1.BILINEAR,
1466                  align_corners=False,
1467                  preserve_aspect_ratio=False,
1468                  name=None):
1469  """Resize `images` to `size` using the specified `method`.
1470
1471  Resized images will be distorted if their original aspect ratio is not
1472  the same as `size`.  To avoid distortions see
1473  `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`.
1474
1475  The `method` can be one of:
1476
1477  *   <b>`tf.image.ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.](
1478    https://en.wikipedia.org/wiki/Bilinear_interpolation)
1479  *   <b>`tf.image.ResizeMethod.NEAREST_NEIGHBOR`</b>: [
1480    Nearest neighbor interpolation.](
1481    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1482  *   <b>`tf.image.ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.](
1483    https://en.wikipedia.org/wiki/Bicubic_interpolation)
1484  *   <b>`tf.image.ResizeMethod.AREA`</b>: Area interpolation.
1485
1486  The return value has the same type as `images` if `method` is
1487  `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type
1488  as `images` if the size of `images` can be statically determined to be the
1489  same as `size`, because `images` is returned in this case. Otherwise, the
1490  return value has type `float32`.
1491
1492  Args:
1493    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1494      of shape `[height, width, channels]`.
1495    size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The new
1496      size for the images.
1497    method: ResizeMethod.  Defaults to `tf.image.ResizeMethod.BILINEAR`.
1498    align_corners: bool.  If True, the centers of the 4 corner pixels of the
1499      input and output tensors are aligned, preserving the values at the corner
1500      pixels. Defaults to `False`.
1501    preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1502      then `images` will be resized to a size that fits in `size` while
1503      preserving the aspect ratio of the original image. Scales up the image if
1504      `size` is bigger than the current size of the `image`. Defaults to False.
1505    name: A name for this operation (optional).
1506
1507  Raises:
1508    ValueError: if the shape of `images` is incompatible with the
1509      shape arguments to this function
1510    ValueError: if `size` has invalid shape or type.
1511    ValueError: if an unsupported resize method is specified.
1512
1513  Returns:
1514    If `images` was 4-D, a 4-D float Tensor of shape
1515    `[batch, new_height, new_width, channels]`.
1516    If `images` was 3-D, a 3-D float Tensor of shape
1517    `[new_height, new_width, channels]`.
1518  """
1519
1520  def resize_fn(images_t, new_size):
1521    """Legacy resize core function, passed to _resize_images_common."""
1522    if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR:
1523      return gen_image_ops.resize_bilinear(
1524          images_t, new_size, align_corners=align_corners)
1525    elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or
1526          method == ResizeMethod.NEAREST_NEIGHBOR):
1527      return gen_image_ops.resize_nearest_neighbor(
1528          images_t, new_size, align_corners=align_corners)
1529    elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC:
1530      return gen_image_ops.resize_bicubic(
1531          images_t, new_size, align_corners=align_corners)
1532    elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA:
1533      return gen_image_ops.resize_area(
1534          images_t, new_size, align_corners=align_corners)
1535    else:
1536      raise ValueError('Resize method is not implemented: {}'.format(method))
1537
1538  return _resize_images_common(
1539      images,
1540      resize_fn,
1541      size,
1542      preserve_aspect_ratio=preserve_aspect_ratio,
1543      name=name,
1544      skip_resize_if_same=True)
1545
1546
1547@tf_export('image.resize', v1=[])
1548@dispatch.add_dispatch_support
1549def resize_images_v2(images,
1550                     size,
1551                     method=ResizeMethod.BILINEAR,
1552                     preserve_aspect_ratio=False,
1553                     antialias=False,
1554                     name=None):
1555  """Resize `images` to `size` using the specified `method`.
1556
1557  Resized images will be distorted if their original aspect ratio is not
1558  the same as `size`.  To avoid distortions see
1559  `tf.image.resize_with_pad`.
1560
1561  >>> image = tf.constant([
1562  ...  [1,0,0,0,0],
1563  ...  [0,1,0,0,0],
1564  ...  [0,0,1,0,0],
1565  ...  [0,0,0,1,0],
1566  ...  [0,0,0,0,1],
1567  ... ])
1568  >>> # Add "batch" and "channels" dimensions
1569  >>> image = image[tf.newaxis, ..., tf.newaxis]
1570  >>> image.shape.as_list()  # [batch, height, width, channels]
1571  [1, 5, 5, 1]
1572  >>> tf.image.resize(image, [3,5])[0,...,0].numpy()
1573  array([[0.6666667, 0.3333333, 0.       , 0.       , 0.       ],
1574         [0.       , 0.       , 1.       , 0.       , 0.       ],
1575         [0.       , 0.       , 0.       , 0.3333335, 0.6666665]],
1576        dtype=float32)
1577
1578  It works equally well with a single image instead of a batch of images:
1579
1580  >>> tf.image.resize(image[0], [3,5]).shape.as_list()
1581  [3, 5, 1]
1582
1583  When `antialias` is true, the sampling filter will anti-alias the input image
1584  as well as interpolate.  When downsampling an image with [anti-aliasing](
1585  https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter
1586  kernel is scaled in order to properly anti-alias the input image signal.
1587  `antialias` has no effect when upsampling an image:
1588
1589  >>> a = tf.image.resize(image, [5,10])
1590  >>> b = tf.image.resize(image, [5,10], antialias=True)
1591  >>> tf.reduce_max(abs(a - b)).numpy()
1592  0.0
1593
1594  The `method` argument expects an item from the `image.ResizeMethod` enum, or
1595  the string equivalent. The options are:
1596
1597  *   <b>`bilinear`</b>: [Bilinear interpolation.](
1598    https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is
1599    true, becomes a hat/tent filter function with radius 1 when downsampling.
1600  *   <b>`lanczos3`</b>:  [Lanczos kernel](
1601    https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3.
1602    High-quality practical filter but may have some ringing, especially on
1603    synthetic images.
1604  *   <b>`lanczos5`</b>: [Lanczos kernel] (
1605    https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5.
1606    Very-high-quality filter but may have stronger ringing.
1607  *   <b>`bicubic`</b>: [Cubic interpolant](
1608    https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to
1609    Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel,
1610    particularly when upsampling.
1611  *   <b>`gaussian`</b>: [Gaussian kernel](
1612    https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3,
1613    sigma = 1.5 / 3.0.
1614  *   <b>`nearest`</b>: [Nearest neighbor interpolation.](
1615    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1616    `antialias` has no effect when used with nearest neighbor interpolation.
1617  *   <b>`area`</b>: Anti-aliased resampling with area interpolation.
1618    `antialias` has no effect when used with area interpolation; it
1619    always anti-aliases.
1620  *   <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter.
1621    For synthetic images (especially those lacking proper prefiltering), less
1622    ringing than Keys cubic kernel but less sharp.
1623
1624  Note: Near image edges the filtering kernel may be partially outside the
1625  image boundaries. For these pixels, only input pixels inside the image will be
1626  included in the filter sum, and the output value will be appropriately
1627  normalized.
1628
1629  The return value has type `float32`, unless the `method` is
1630  `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype
1631  of `images`:
1632
1633  >>> nn = tf.image.resize(image, [5,7], method='nearest')
1634  >>> nn[0,...,0].numpy()
1635  array([[1, 0, 0, 0, 0, 0, 0],
1636         [0, 1, 1, 0, 0, 0, 0],
1637         [0, 0, 0, 1, 0, 0, 0],
1638         [0, 0, 0, 0, 1, 1, 0],
1639         [0, 0, 0, 0, 0, 0, 1]], dtype=int32)
1640
1641  With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size`
1642  is the maximum for each dimension:
1643
1644  >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True)
1645  >>> max_10_20.shape.as_list()
1646  [1, 10, 10, 1]
1647
1648  Args:
1649    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1650      of shape `[height, width, channels]`.
1651    size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The new
1652      size for the images.
1653    method: An `image.ResizeMethod`, or string equivalent.  Defaults to
1654      `bilinear`.
1655    preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1656      then `images` will be resized to a size that fits in `size` while
1657      preserving the aspect ratio of the original image. Scales up the image if
1658      `size` is bigger than the current size of the `image`. Defaults to False.
1659    antialias: Whether to use an anti-aliasing filter when downsampling an
1660      image.
1661    name: A name for this operation (optional).
1662
1663  Raises:
1664    ValueError: if the shape of `images` is incompatible with the
1665      shape arguments to this function
1666    ValueError: if `size` has an invalid shape or type.
1667    ValueError: if an unsupported resize method is specified.
1668
1669  Returns:
1670    If `images` was 4-D, a 4-D float Tensor of shape
1671    `[batch, new_height, new_width, channels]`.
1672    If `images` was 3-D, a 3-D float Tensor of shape
1673    `[new_height, new_width, channels]`.
1674  """
1675
1676  def resize_fn(images_t, new_size):
1677    """Resize core function, passed to _resize_images_common."""
1678    scale_and_translate_methods = [
1679        ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN,
1680        ResizeMethod.MITCHELLCUBIC
1681    ]
1682
1683    def resize_with_scale_and_translate(method):
1684      scale = (
1685          math_ops.cast(new_size, dtype=dtypes.float32) /
1686          math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32))
1687      return gen_image_ops.scale_and_translate(
1688          images_t,
1689          new_size,
1690          scale,
1691          array_ops.zeros([2]),
1692          kernel_type=method,
1693          antialias=antialias)
1694
1695    if method == ResizeMethod.BILINEAR:
1696      if antialias:
1697        return resize_with_scale_and_translate('triangle')
1698      else:
1699        return gen_image_ops.resize_bilinear(
1700            images_t, new_size, half_pixel_centers=True)
1701    elif method == ResizeMethod.NEAREST_NEIGHBOR:
1702      return gen_image_ops.resize_nearest_neighbor(
1703          images_t, new_size, half_pixel_centers=True)
1704    elif method == ResizeMethod.BICUBIC:
1705      if antialias:
1706        return resize_with_scale_and_translate('keyscubic')
1707      else:
1708        return gen_image_ops.resize_bicubic(
1709            images_t, new_size, half_pixel_centers=True)
1710    elif method == ResizeMethod.AREA:
1711      return gen_image_ops.resize_area(images_t, new_size)
1712    elif method in scale_and_translate_methods:
1713      return resize_with_scale_and_translate(method)
1714    else:
1715      raise ValueError('Resize method is not implemented: {}'.format(method))
1716
1717  return _resize_images_common(
1718      images,
1719      resize_fn,
1720      size,
1721      preserve_aspect_ratio=preserve_aspect_ratio,
1722      name=name,
1723      skip_resize_if_same=False)
1724
1725
1726def _resize_image_with_pad_common(image, target_height, target_width,
1727                                  resize_fn):
1728  """Core functionality for v1 and v2 resize_image_with_pad functions."""
1729  with ops.name_scope(None, 'resize_image_with_pad', [image]):
1730    image = ops.convert_to_tensor(image, name='image')
1731    image_shape = image.get_shape()
1732    is_batch = True
1733    if image_shape.ndims == 3:
1734      is_batch = False
1735      image = array_ops.expand_dims(image, 0)
1736    elif image_shape.ndims is None:
1737      is_batch = False
1738      image = array_ops.expand_dims(image, 0)
1739      image.set_shape([None] * 4)
1740    elif image_shape.ndims != 4:
1741      raise ValueError(
1742          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1743          image_shape)
1744
1745    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1746    assert_ops += _assert(target_width > 0, ValueError,
1747                          'target_width must be > 0.')
1748    assert_ops += _assert(target_height > 0, ValueError,
1749                          'target_height must be > 0.')
1750
1751    image = control_flow_ops.with_dependencies(assert_ops, image)
1752
1753    def max_(x, y):
1754      if _is_tensor(x) or _is_tensor(y):
1755        return math_ops.maximum(x, y)
1756      else:
1757        return max(x, y)
1758
1759    _, height, width, _ = _ImageDimensions(image, rank=4)
1760
1761    # convert values to float, to ease divisions
1762    f_height = math_ops.cast(height, dtype=dtypes.float32)
1763    f_width = math_ops.cast(width, dtype=dtypes.float32)
1764    f_target_height = math_ops.cast(target_height, dtype=dtypes.float32)
1765    f_target_width = math_ops.cast(target_width, dtype=dtypes.float32)
1766
1767    # Find the ratio by which the image must be adjusted
1768    # to fit within the target
1769    ratio = max_(f_width / f_target_width, f_height / f_target_height)
1770    resized_height_float = f_height / ratio
1771    resized_width_float = f_width / ratio
1772    resized_height = math_ops.cast(
1773        math_ops.floor(resized_height_float), dtype=dtypes.int32)
1774    resized_width = math_ops.cast(
1775        math_ops.floor(resized_width_float), dtype=dtypes.int32)
1776
1777    padding_height = (f_target_height - resized_height_float) / 2
1778    padding_width = (f_target_width - resized_width_float) / 2
1779    f_padding_height = math_ops.floor(padding_height)
1780    f_padding_width = math_ops.floor(padding_width)
1781    p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32))
1782    p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32))
1783
1784    # Resize first, then pad to meet requested dimensions
1785    resized = resize_fn(image, [resized_height, resized_width])
1786
1787    padded = pad_to_bounding_box(resized, p_height, p_width, target_height,
1788                                 target_width)
1789
1790    if padded.get_shape().ndims is None:
1791      raise ValueError('padded contains no shape.')
1792
1793    _ImageDimensions(padded, rank=4)
1794
1795    if not is_batch:
1796      padded = array_ops.squeeze(padded, axis=[0])
1797
1798    return padded
1799
1800
1801@tf_export(v1=['image.resize_image_with_pad'])
1802@dispatch.add_dispatch_support
1803def resize_image_with_pad_v1(image,
1804                             target_height,
1805                             target_width,
1806                             method=ResizeMethodV1.BILINEAR,
1807                             align_corners=False):
1808  """Resizes and pads an image to a target width and height.
1809
1810  Resizes an image to a target width and height by keeping
1811  the aspect ratio the same without distortion. If the target
1812  dimensions don't match the image dimensions, the image
1813  is resized and then padded with zeroes to match requested
1814  dimensions.
1815
1816  Args:
1817    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1818      of shape `[height, width, channels]`.
1819    target_height: Target height.
1820    target_width: Target width.
1821    method: Method to use for resizing image. See `resize_images()`
1822    align_corners: bool.  If True, the centers of the 4 corner pixels of the
1823      input and output tensors are aligned, preserving the values at the corner
1824      pixels. Defaults to `False`.
1825
1826  Raises:
1827    ValueError: if `target_height` or `target_width` are zero or negative.
1828
1829  Returns:
1830    Resized and padded image.
1831    If `images` was 4-D, a 4-D float Tensor of shape
1832    `[batch, new_height, new_width, channels]`.
1833    If `images` was 3-D, a 3-D float Tensor of shape
1834    `[new_height, new_width, channels]`.
1835  """
1836
1837  def _resize_fn(im, new_size):
1838    return resize_images(im, new_size, method, align_corners=align_corners)
1839
1840  return _resize_image_with_pad_common(image, target_height, target_width,
1841                                       _resize_fn)
1842
1843
1844@tf_export('image.resize_with_pad', v1=[])
1845@dispatch.add_dispatch_support
1846def resize_image_with_pad_v2(image,
1847                             target_height,
1848                             target_width,
1849                             method=ResizeMethod.BILINEAR,
1850                             antialias=False):
1851  """Resizes and pads an image to a target width and height.
1852
1853  Resizes an image to a target width and height by keeping
1854  the aspect ratio the same without distortion. If the target
1855  dimensions don't match the image dimensions, the image
1856  is resized and then padded with zeroes to match requested
1857  dimensions.
1858
1859  Args:
1860    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1861      of shape `[height, width, channels]`.
1862    target_height: Target height.
1863    target_width: Target width.
1864    method: Method to use for resizing image. See `image.resize()`
1865    antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'.
1866
1867  Raises:
1868    ValueError: if `target_height` or `target_width` are zero or negative.
1869
1870  Returns:
1871    Resized and padded image.
1872    If `images` was 4-D, a 4-D float Tensor of shape
1873    `[batch, new_height, new_width, channels]`.
1874    If `images` was 3-D, a 3-D float Tensor of shape
1875    `[new_height, new_width, channels]`.
1876  """
1877
1878  def _resize_fn(im, new_size):
1879    return resize_images_v2(im, new_size, method, antialias=antialias)
1880
1881  return _resize_image_with_pad_common(image, target_height, target_width,
1882                                       _resize_fn)
1883
1884
1885@tf_export('image.per_image_standardization')
1886@dispatch.add_dispatch_support
1887def per_image_standardization(image):
1888  """Linearly scales each image in `image` to have mean 0 and variance 1.
1889
1890  For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`,
1891  where
1892
1893  - `mean` is the average of all values in `x`
1894  - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to
1895    protect against division by 0 when handling uniform images
1896    - `N` is the number of elements in `x`
1897    - `stddev` is the standard deviation of all values in `x`
1898
1899  Example Usage:
1900
1901  >>> image = tf.constant(np.arange(1, 13, dtype=np.int32), shape=[2, 2, 3])
1902  >>> image # 3-D tensor
1903  <tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy=
1904  array([[[ 1,  2,  3],
1905          [ 4,  5,  6]],
1906         [[ 7,  8,  9],
1907          [10, 11, 12]]], dtype=int32)>
1908  >>> new_image = tf.image.per_image_standardization(image)
1909  >>> new_image # 3-D tensor with mean ~= 0 and variance ~= 1
1910  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
1911  array([[[-1.593255  , -1.3035723 , -1.0138896 ],
1912          [-0.7242068 , -0.4345241 , -0.14484136]],
1913         [[ 0.14484136,  0.4345241 ,  0.7242068 ],
1914          [ 1.0138896 ,  1.3035723 ,  1.593255  ]]], dtype=float32)>
1915
1916  Args:
1917    image: An n-D `Tensor` with at least 3 dimensions, the last 3 of which are
1918      the dimensions of each image.
1919
1920  Returns:
1921    A `Tensor` with the same shape as `image` and its dtype is `float32`.
1922
1923  Raises:
1924    ValueError: The shape of `image` has fewer than 3 dimensions.
1925  """
1926  with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
1927    image = ops.convert_to_tensor(image, name='image')
1928    image = _AssertAtLeast3DImage(image)
1929
1930    image = math_ops.cast(image, dtype=dtypes.float32)
1931    num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:])
1932    image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)
1933
1934    # Apply a minimum normalization that protects us against uniform images.
1935    stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True)
1936    min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
1937    adjusted_stddev = math_ops.maximum(stddev, min_stddev)
1938
1939    image -= image_mean
1940    image = math_ops.divide(image, adjusted_stddev, name=scope)
1941    return image
1942
1943
1944@tf_export('image.random_brightness')
1945@dispatch.add_dispatch_support
1946def random_brightness(image, max_delta, seed=None):
1947  """Adjust the brightness of images by a random factor.
1948
1949  Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
1950  interval `[-max_delta, max_delta)`.
1951
1952  For producing deterministic results given a `seed` value, use
1953  `tf.image.stateless_random_brightness`. Unlike using the `seed` param
1954  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
1955  same results given the same seed independent of how many times the function is
1956  called, and independent of global seed settings (e.g. tf.random.set_seed).
1957
1958  Args:
1959    image: An image or images to adjust.
1960    max_delta: float, must be non-negative.
1961    seed: A Python integer. Used to create a random seed. See
1962      `tf.compat.v1.set_random_seed` for behavior.
1963
1964  Usage Example:
1965
1966  >>> x = [[[1.0, 2.0, 3.0],
1967  ...       [4.0, 5.0, 6.0]],
1968  ...      [[7.0, 8.0, 9.0],
1969  ...       [10.0, 11.0, 12.0]]]
1970  >>> tf.image.random_brightness(x, 0.2)
1971  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
1972
1973  Returns:
1974    The brightness-adjusted image(s).
1975
1976  Raises:
1977    ValueError: if `max_delta` is negative.
1978  """
1979  if max_delta < 0:
1980    raise ValueError('max_delta must be non-negative.')
1981
1982  delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
1983  return adjust_brightness(image, delta)
1984
1985
1986@tf_export('image.stateless_random_brightness', v1=[])
1987@dispatch.add_dispatch_support
1988def stateless_random_brightness(image, max_delta, seed):
1989  """Adjust the brightness of images by a random factor deterministically.
1990
1991  Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
1992  interval `[-max_delta, max_delta)`.
1993
1994  Guarantees the same results given the same `seed` independent of how many
1995  times the function is called, and independent of global seed settings (e.g.
1996  `tf.random.set_seed`).
1997
1998  Usage Example:
1999
2000  >>> x = [[[1.0, 2.0, 3.0],
2001  ...       [4.0, 5.0, 6.0]],
2002  ...      [[7.0, 8.0, 9.0],
2003  ...       [10.0, 11.0, 12.0]]]
2004  >>> seed = (1, 2)
2005  >>> tf.image.stateless_random_brightness(x, 0.2, seed)
2006  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2007  array([[[ 1.1376241,  2.1376243,  3.1376243],
2008          [ 4.1376243,  5.1376243,  6.1376243]],
2009         [[ 7.1376243,  8.137624 ,  9.137624 ],
2010          [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)>
2011
2012  Args:
2013    image: An image or images to adjust.
2014    max_delta: float, must be non-negative.
2015    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2016      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2017
2018  Returns:
2019    The brightness-adjusted image(s).
2020
2021  Raises:
2022    ValueError: if `max_delta` is negative.
2023  """
2024  if max_delta < 0:
2025    raise ValueError('max_delta must be non-negative.')
2026
2027  delta = stateless_random_ops.stateless_random_uniform(
2028      shape=[], minval=-max_delta, maxval=max_delta, seed=seed)
2029  return adjust_brightness(image, delta)
2030
2031
2032@tf_export('image.random_contrast')
2033@dispatch.add_dispatch_support
2034def random_contrast(image, lower, upper, seed=None):
2035  """Adjust the contrast of an image or images by a random factor.
2036
2037  Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly
2038  picked in the interval `[lower, upper)`.
2039
2040  For producing deterministic results given a `seed` value, use
2041  `tf.image.stateless_random_contrast`. Unlike using the `seed` param
2042  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2043  same results given the same seed independent of how many times the function is
2044  called, and independent of global seed settings (e.g. tf.random.set_seed).
2045
2046  Args:
2047    image: An image tensor with 3 or more dimensions.
2048    lower: float.  Lower bound for the random contrast factor.
2049    upper: float.  Upper bound for the random contrast factor.
2050    seed: A Python integer. Used to create a random seed. See
2051      `tf.compat.v1.set_random_seed` for behavior.
2052
2053  Usage Example:
2054
2055  >>> x = [[[1.0, 2.0, 3.0],
2056  ...       [4.0, 5.0, 6.0]],
2057  ...     [[7.0, 8.0, 9.0],
2058  ...       [10.0, 11.0, 12.0]]]
2059  >>> tf.image.random_contrast(x, 0.2, 0.5)
2060  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2061
2062  Returns:
2063    The contrast-adjusted image(s).
2064
2065  Raises:
2066    ValueError: if `upper <= lower` or if `lower < 0`.
2067  """
2068  if upper <= lower:
2069    raise ValueError('upper must be > lower.')
2070
2071  if lower < 0:
2072    raise ValueError('lower must be non-negative.')
2073
2074  contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed)
2075  return adjust_contrast(image, contrast_factor)
2076
2077
2078@tf_export('image.stateless_random_contrast', v1=[])
2079@dispatch.add_dispatch_support
2080def stateless_random_contrast(image, lower, upper, seed):
2081  """Adjust the contrast of images by a random factor deterministically.
2082
2083  Guarantees the same results given the same `seed` independent of how many
2084  times the function is called, and independent of global seed settings (e.g.
2085  `tf.random.set_seed`).
2086
2087  Args:
2088    image: An image tensor with 3 or more dimensions.
2089    lower: float.  Lower bound for the random contrast factor.
2090    upper: float.  Upper bound for the random contrast factor.
2091    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2092      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2093
2094  Usage Example:
2095
2096  >>> x = [[[1.0, 2.0, 3.0],
2097  ...       [4.0, 5.0, 6.0]],
2098  ...      [[7.0, 8.0, 9.0],
2099  ...       [10.0, 11.0, 12.0]]]
2100  >>> seed = (1, 2)
2101  >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed)
2102  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2103  array([[[3.4605184, 4.4605184, 5.4605184],
2104          [4.820173 , 5.820173 , 6.820173 ]],
2105         [[6.179827 , 7.179827 , 8.179828 ],
2106          [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)>
2107
2108  Returns:
2109    The contrast-adjusted image(s).
2110
2111  Raises:
2112    ValueError: if `upper <= lower` or if `lower < 0`.
2113  """
2114  if upper <= lower:
2115    raise ValueError('upper must be > lower.')
2116
2117  if lower < 0:
2118    raise ValueError('lower must be non-negative.')
2119
2120  contrast_factor = stateless_random_ops.stateless_random_uniform(
2121      shape=[], minval=lower, maxval=upper, seed=seed)
2122  return adjust_contrast(image, contrast_factor)
2123
2124
2125@tf_export('image.adjust_brightness')
2126@dispatch.add_dispatch_support
2127def adjust_brightness(image, delta):
2128  """Adjust the brightness of RGB or Grayscale images.
2129
2130  This is a convenience method that converts RGB images to float
2131  representation, adjusts their brightness, and then converts them back to the
2132  original data type. If several adjustments are chained, it is advisable to
2133  minimize the number of redundant conversions.
2134
2135  The value `delta` is added to all components of the tensor `image`. `image` is
2136  converted to `float` and scaled appropriately if it is in fixed-point
2137  representation, and `delta` is converted to the same data type. For regular
2138  images, `delta` should be in the range `(-1,1)`, as it is added to the image
2139  in floating point representation, where pixel values are in the `[0,1)` range.
2140
2141  Usage Example:
2142
2143  >>> x = [[[1.0, 2.0, 3.0],
2144  ...       [4.0, 5.0, 6.0]],
2145  ...     [[7.0, 8.0, 9.0],
2146  ...       [10.0, 11.0, 12.0]]]
2147  >>> tf.image.adjust_brightness(x, delta=0.1)
2148  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2149  array([[[ 1.1,  2.1,  3.1],
2150          [ 4.1,  5.1,  6.1]],
2151         [[ 7.1,  8.1,  9.1],
2152          [10.1, 11.1, 12.1]]], dtype=float32)>
2153
2154  Args:
2155    image: RGB image or images to adjust.
2156    delta: A scalar. Amount to add to the pixel values.
2157
2158  Returns:
2159    A brightness-adjusted tensor of the same shape and type as `image`.
2160  """
2161  with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name:
2162    image = ops.convert_to_tensor(image, name='image')
2163    # Remember original dtype to so we can convert back if needed
2164    orig_dtype = image.dtype
2165
2166    if orig_dtype in [dtypes.float16, dtypes.float32]:
2167      flt_image = image
2168    else:
2169      flt_image = convert_image_dtype(image, dtypes.float32)
2170
2171    adjusted = math_ops.add(
2172        flt_image, math_ops.cast(delta, flt_image.dtype), name=name)
2173
2174    return convert_image_dtype(adjusted, orig_dtype, saturate=True)
2175
2176
2177@tf_export('image.adjust_contrast')
2178@dispatch.add_dispatch_support
2179def adjust_contrast(images, contrast_factor):
2180  """Adjust contrast of RGB or grayscale images.
2181
2182  This is a convenience method that converts RGB images to float
2183  representation, adjusts their contrast, and then converts them back to the
2184  original data type. If several adjustments are chained, it is advisable to
2185  minimize the number of redundant conversions.
2186
2187  `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
2188  interpreted as `[height, width, channels]`.  The other dimensions only
2189  represent a collection of images, such as `[batch, height, width, channels].`
2190
2191  Contrast is adjusted independently for each channel of each image.
2192
2193  For each channel, this Op computes the mean of the image pixels in the
2194  channel and then adjusts each component `x` of each pixel to
2195  `(x - mean) * contrast_factor + mean`.
2196
2197  Usage Example:
2198
2199  >>> x = [[[1.0, 2.0, 3.0],
2200  ...       [4.0, 5.0, 6.0]],
2201  ...     [[7.0, 8.0, 9.0],
2202  ...       [10.0, 11.0, 12.0]]]
2203  >>> tf.image.adjust_contrast(x, 2)
2204  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2205  array([[[-3.5, -2.5, -1.5],
2206          [ 2.5,  3.5,  4.5]],
2207         [[ 8.5,  9.5, 10.5],
2208          [14.5, 15.5, 16.5]]], dtype=float32)>
2209
2210  Args:
2211    images: Images to adjust.  At least 3-D.
2212    contrast_factor: A float multiplier for adjusting contrast.
2213
2214  Returns:
2215    The contrast-adjusted image or images.
2216  """
2217  with ops.name_scope(None, 'adjust_contrast',
2218                      [images, contrast_factor]) as name:
2219    images = ops.convert_to_tensor(images, name='images')
2220    # Remember original dtype to so we can convert back if needed
2221    orig_dtype = images.dtype
2222
2223    if orig_dtype in (dtypes.float16, dtypes.float32):
2224      flt_images = images
2225    else:
2226      flt_images = convert_image_dtype(images, dtypes.float32)
2227
2228    adjusted = gen_image_ops.adjust_contrastv2(
2229        flt_images, contrast_factor=contrast_factor, name=name)
2230
2231    return convert_image_dtype(adjusted, orig_dtype, saturate=True)
2232
2233
2234@tf_export('image.adjust_gamma')
2235@dispatch.add_dispatch_support
2236def adjust_gamma(image, gamma=1, gain=1):
2237  """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction).
2238
2239  on the input image.
2240
2241  Also known as Power Law Transform. This function converts the
2242  input images at first to float representation, then transforms them
2243  pixelwise according to the equation `Out = gain * In**gamma`,
2244  and then converts the back to the original data type.
2245
2246  Usage Example:
2247
2248  >>> x = [[[1.0, 2.0, 3.0],
2249  ...       [4.0, 5.0, 6.0]],
2250  ...     [[7.0, 8.0, 9.0],
2251  ...       [10.0, 11.0, 12.0]]]
2252  >>> tf.image.adjust_gamma(x, 0.2)
2253  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2254  array([[[1.       , 1.1486983, 1.2457309],
2255          [1.319508 , 1.3797297, 1.4309691]],
2256         [[1.4757731, 1.5157166, 1.5518456],
2257          [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)>
2258
2259  Args:
2260    image : RGB image or images to adjust.
2261    gamma : A scalar or tensor. Non-negative real number.
2262    gain  : A scalar or tensor. The constant multiplier.
2263
2264  Returns:
2265    A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`.
2266
2267  Raises:
2268    ValueError: If gamma is negative.
2269  Notes:
2270    For gamma greater than 1, the histogram will shift towards left and
2271    the output image will be darker than the input image.
2272    For gamma less than 1, the histogram will shift towards right and
2273    the output image will be brighter than the input image.
2274  References:
2275    [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction)
2276  """
2277
2278  with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name:
2279    image = ops.convert_to_tensor(image, name='image')
2280    # Remember original dtype to so we can convert back if needed
2281    orig_dtype = image.dtype
2282
2283    if orig_dtype in [dtypes.float16, dtypes.float32]:
2284      flt_image = image
2285    else:
2286      flt_image = convert_image_dtype(image, dtypes.float32)
2287
2288    assert_op = _assert(gamma >= 0, ValueError,
2289                        'Gamma should be a non-negative real number.')
2290    if assert_op:
2291      gamma = control_flow_ops.with_dependencies(assert_op, gamma)
2292
2293    # According to the definition of gamma correction.
2294    adjusted_img = gain * flt_image**gamma
2295
2296    return convert_image_dtype(adjusted_img, orig_dtype, saturate=True)
2297
2298
2299@tf_export('image.convert_image_dtype')
2300@dispatch.add_dispatch_support
2301def convert_image_dtype(image, dtype, saturate=False, name=None):
2302  """Convert `image` to `dtype`, scaling its values if needed.
2303
2304  The operation supports data types (for `image` and `dtype`) of
2305  `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`,
2306  `float16`, `float32`, `float64`, `bfloat16`.
2307
2308  Images that are represented using floating point values are expected to have
2309  values in the range [0,1). Image data stored in integer data types are
2310  expected to have values in the range `[0,MAX]`, where `MAX` is the largest
2311  positive representable number for the data type.
2312
2313  This op converts between data types, scaling the values appropriately before
2314  casting.
2315
2316  Usage Example:
2317
2318  >>> x = [[[1, 2, 3], [4, 5, 6]],
2319  ...      [[7, 8, 9], [10, 11, 12]]]
2320  >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8)
2321  >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False)
2322  <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy=
2323  array([[[0.00787, 0.01575, 0.02362],
2324          [0.0315 , 0.03937, 0.04724]],
2325         [[0.0551 , 0.063  , 0.07086],
2326          [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)>
2327
2328  Converting integer types to floating point types returns normalized floating
2329  point values in the range [0, 1); the values are normalized by the `MAX` value
2330  of the input dtype. Consider the following two examples:
2331
2332  >>> a = [[[1], [2]], [[3], [4]]]
2333  >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8)
2334  >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32)
2335  <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2336  array([[[0.00787402],
2337          [0.01574803]],
2338         [[0.02362205],
2339          [0.03149606]]], dtype=float32)>
2340
2341  >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32)
2342  >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32)
2343  <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2344  array([[[4.6566129e-10],
2345          [9.3132257e-10]],
2346         [[1.3969839e-09],
2347          [1.8626451e-09]]], dtype=float32)>
2348
2349  Despite having identical values of `a` and output dtype of `float32`, the
2350  outputs differ due to the different input dtypes (`int8` vs. `int32`). This
2351  is, again, because the values are normalized by the `MAX` value of the input
2352  dtype.
2353
2354  Note that converting floating point values to integer type may lose precision.
2355  In the example below, an image tensor `b` of dtype `float32` is converted to
2356  `int8` and back to `float32`. The final output, however, is different from
2357  the original input `b` due to precision loss.
2358
2359  >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]]
2360  >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32)
2361  >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8)
2362  >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32)
2363  <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2364  array([[[0.11811024],
2365          [0.33858266]],
2366         [[0.5590551 ],
2367          [0.77952754]]], dtype=float32)>
2368
2369  Scaling up from an integer type (input dtype) to another integer type (output
2370  dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting
2371  back and forth should result in no change. For example, as shown below, the
2372  `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767)
2373  but, when scaled back, we get the same, original values of `c`.
2374
2375  >>> c = [[[1], [2]], [[127], [127]]]
2376  >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8)
2377  >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16)
2378  >>> print(c_int16)
2379  tf.Tensor(
2380  [[[  256]
2381    [  512]]
2382   [[32512]
2383    [32512]]], shape=(2, 2, 1), dtype=int16)
2384  >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8)
2385  >>> print(c_int8_back)
2386  tf.Tensor(
2387  [[[  1]
2388    [  2]]
2389   [[127]
2390    [127]]], shape=(2, 2, 1), dtype=int8)
2391
2392  Scaling down from an integer type to another integer type can be a lossy
2393  conversion. Notice in the example below that converting `int16` to `uint8` and
2394  back to `int16` has lost precision.
2395
2396  >>> d = [[[1000], [2000]], [[3000], [4000]]]
2397  >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16)
2398  >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8)
2399  >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16)
2400  >>> print(d_int16_back)
2401  tf.Tensor(
2402  [[[ 896]
2403    [1920]]
2404   [[2944]
2405    [3968]]], shape=(2, 2, 1), dtype=int16)
2406
2407  Note that converting from floating point inputs to integer types may lead to
2408  over/underflow problems. Set saturate to `True` to avoid such problem in
2409  problematic conversions. If enabled, saturation will clip the output into the
2410  allowed range before performing a potentially dangerous cast (and only before
2411  performing such a cast, i.e., when casting from a floating point to an integer
2412  type, and when casting from a signed to an unsigned type; `saturate` has no
2413  effect on casts between floats, or on casts that increase the type's range).
2414
2415  Args:
2416    image: An image.
2417    dtype: A `DType` to convert `image` to.
2418    saturate: If `True`, clip the input before casting (if necessary).
2419    name: A name for this operation (optional).
2420
2421  Returns:
2422    `image`, converted to `dtype`.
2423
2424  Raises:
2425    AttributeError: Raises an attribute error when dtype is neither
2426    float nor integer
2427  """
2428  image = ops.convert_to_tensor(image, name='image')
2429  dtype = dtypes.as_dtype(dtype)
2430  if not dtype.is_floating and not dtype.is_integer:
2431    raise AttributeError('dtype must be either floating point or integer')
2432  if dtype == image.dtype:
2433    return array_ops.identity(image, name=name)
2434
2435  with ops.name_scope(name, 'convert_image', [image]) as name:
2436    # Both integer: use integer multiplication in the larger range
2437    if image.dtype.is_integer and dtype.is_integer:
2438      scale_in = image.dtype.max
2439      scale_out = dtype.max
2440      if scale_in > scale_out:
2441        # Scaling down, scale first, then cast. The scaling factor will
2442        # cause in.max to be mapped to above out.max but below out.max+1,
2443        # so that the output is safely in the supported range.
2444        scale = (scale_in + 1) // (scale_out + 1)
2445        scaled = math_ops.floordiv(image, scale)
2446
2447        if saturate:
2448          return math_ops.saturate_cast(scaled, dtype, name=name)
2449        else:
2450          return math_ops.cast(scaled, dtype, name=name)
2451      else:
2452        # Scaling up, cast first, then scale. The scale will not map in.max to
2453        # out.max, but converting back and forth should result in no change.
2454        if saturate:
2455          cast = math_ops.saturate_cast(image, dtype)
2456        else:
2457          cast = math_ops.cast(image, dtype)
2458        scale = (scale_out + 1) // (scale_in + 1)
2459        return math_ops.multiply(cast, scale, name=name)
2460    elif image.dtype.is_floating and dtype.is_floating:
2461      # Both float: Just cast, no possible overflows in the allowed ranges.
2462      # Note: We're ignoring float overflows. If your image dynamic range
2463      # exceeds float range, you're on your own.
2464      return math_ops.cast(image, dtype, name=name)
2465    else:
2466      if image.dtype.is_integer:
2467        # Converting to float: first cast, then scale. No saturation possible.
2468        cast = math_ops.cast(image, dtype)
2469        scale = 1. / image.dtype.max
2470        return math_ops.multiply(cast, scale, name=name)
2471      else:
2472        # Converting from float: first scale, then cast
2473        scale = dtype.max + 0.5  # avoid rounding problems in the cast
2474        scaled = math_ops.multiply(image, scale)
2475        if saturate:
2476          return math_ops.saturate_cast(scaled, dtype, name=name)
2477        else:
2478          return math_ops.cast(scaled, dtype, name=name)
2479
2480
2481@tf_export('image.rgb_to_grayscale')
2482@dispatch.add_dispatch_support
2483def rgb_to_grayscale(images, name=None):
2484  """Converts one or more images from RGB to Grayscale.
2485
2486  Outputs a tensor of the same `DType` and rank as `images`.  The size of the
2487  last dimension of the output is 1, containing the Grayscale value of the
2488  pixels.
2489
2490  >>> original = tf.constant([[[1.0, 2.0, 3.0]]])
2491  >>> converted = tf.image.rgb_to_grayscale(original)
2492  >>> print(converted.numpy())
2493  [[[1.81...]]]
2494
2495  Args:
2496    images: The RGB tensor to convert. The last dimension must have size 3 and
2497      should contain RGB values.
2498    name: A name for the operation (optional).
2499
2500  Returns:
2501    The converted grayscale image(s).
2502  """
2503  with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name:
2504    images = ops.convert_to_tensor(images, name='images')
2505    # Remember original dtype to so we can convert back if needed
2506    orig_dtype = images.dtype
2507    flt_image = convert_image_dtype(images, dtypes.float32)
2508
2509    # Reference for converting between RGB and grayscale.
2510    # https://en.wikipedia.org/wiki/Luma_%28video%29
2511    rgb_weights = [0.2989, 0.5870, 0.1140]
2512    gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1])
2513    gray_float = array_ops.expand_dims(gray_float, -1)
2514    return convert_image_dtype(gray_float, orig_dtype, name=name)
2515
2516
2517@tf_export('image.grayscale_to_rgb')
2518@dispatch.add_dispatch_support
2519def grayscale_to_rgb(images, name=None):
2520  """Converts one or more images from Grayscale to RGB.
2521
2522  Outputs a tensor of the same `DType` and rank as `images`.  The size of the
2523  last dimension of the output is 3, containing the RGB value of the pixels.
2524  The input images' last dimension must be size 1.
2525
2526  >>> original = tf.constant([[[1.0], [2.0], [3.0]]])
2527  >>> converted = tf.image.grayscale_to_rgb(original)
2528  >>> print(converted.numpy())
2529  [[[1. 1. 1.]
2530    [2. 2. 2.]
2531    [3. 3. 3.]]]
2532
2533  Args:
2534    images: The Grayscale tensor to convert. The last dimension must be size 1.
2535    name: A name for the operation (optional).
2536
2537  Returns:
2538    The converted grayscale image(s).
2539  """
2540  with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name:
2541    images = _AssertGrayscaleImage(images)
2542
2543    images = ops.convert_to_tensor(images, name='images')
2544    rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
2545    shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] +
2546                  [array_ops.expand_dims(3, 0)])
2547    multiples = array_ops.concat(shape_list, 0)
2548    rgb = array_ops.tile(images, multiples, name=name)
2549    rgb.set_shape(images.get_shape()[:-1].concatenate([3]))
2550    return rgb
2551
2552
2553# pylint: disable=invalid-name
2554@tf_export('image.random_hue')
2555@dispatch.add_dispatch_support
2556def random_hue(image, max_delta, seed=None):
2557  """Adjust the hue of RGB images by a random factor.
2558
2559  Equivalent to `adjust_hue()` but uses a `delta` randomly
2560  picked in the interval `[-max_delta, max_delta)`.
2561
2562  `max_delta` must be in the interval `[0, 0.5]`.
2563
2564  Usage Example:
2565
2566  >>> x = [[[1.0, 2.0, 3.0],
2567  ...       [4.0, 5.0, 6.0]],
2568  ...     [[7.0, 8.0, 9.0],
2569  ...       [10.0, 11.0, 12.0]]]
2570  >>> tf.image.random_hue(x, 0.2)
2571  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2572
2573  For producing deterministic results given a `seed` value, use
2574  `tf.image.stateless_random_hue`. Unlike using the `seed` param with
2575  `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the same
2576  results given the same seed independent of how many times the function is
2577  called, and independent of global seed settings (e.g. tf.random.set_seed).
2578
2579  Args:
2580    image: RGB image or images. The size of the last dimension must be 3.
2581    max_delta: float. The maximum value for the random delta.
2582    seed: An operation-specific seed. It will be used in conjunction with the
2583      graph-level seed to determine the real seeds that will be used in this
2584      operation. Please see the documentation of set_random_seed for its
2585      interaction with the graph-level random seed.
2586
2587  Returns:
2588    Adjusted image(s), same shape and DType as `image`.
2589
2590  Raises:
2591    ValueError: if `max_delta` is invalid.
2592  """
2593  if max_delta > 0.5:
2594    raise ValueError('max_delta must be <= 0.5.')
2595
2596  if max_delta < 0:
2597    raise ValueError('max_delta must be non-negative.')
2598
2599  delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
2600  return adjust_hue(image, delta)
2601
2602
2603@tf_export('image.stateless_random_hue', v1=[])
2604@dispatch.add_dispatch_support
2605def stateless_random_hue(image, max_delta, seed):
2606  """Adjust the hue of RGB images by a random factor deterministically.
2607
2608  Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the
2609  interval `[-max_delta, max_delta)`.
2610
2611  Guarantees the same results given the same `seed` independent of how many
2612  times the function is called, and independent of global seed settings (e.g.
2613  `tf.random.set_seed`).
2614
2615  `max_delta` must be in the interval `[0, 0.5]`.
2616
2617  Usage Example:
2618
2619  >>> x = [[[1.0, 2.0, 3.0],
2620  ...       [4.0, 5.0, 6.0]],
2621  ...      [[7.0, 8.0, 9.0],
2622  ...       [10.0, 11.0, 12.0]]]
2623  >>> seed = (1, 2)
2624  >>> tf.image.stateless_random_hue(x, 0.2, seed)
2625  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2626  array([[[ 1.6514902,  1.       ,  3.       ],
2627          [ 4.65149  ,  4.       ,  6.       ]],
2628         [[ 7.65149  ,  7.       ,  9.       ],
2629          [10.65149  , 10.       , 12.       ]]], dtype=float32)>
2630
2631  Args:
2632    image: RGB image or images. The size of the last dimension must be 3.
2633    max_delta: float. The maximum value for the random delta.
2634    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2635      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2636
2637  Returns:
2638    Adjusted image(s), same shape and DType as `image`.
2639
2640  Raises:
2641    ValueError: if `max_delta` is invalid.
2642  """
2643  if max_delta > 0.5:
2644    raise ValueError('max_delta must be <= 0.5.')
2645
2646  if max_delta < 0:
2647    raise ValueError('max_delta must be non-negative.')
2648
2649  delta = stateless_random_ops.stateless_random_uniform(
2650      shape=[], minval=-max_delta, maxval=max_delta, seed=seed)
2651  return adjust_hue(image, delta)
2652
2653
2654@tf_export('image.adjust_hue')
2655@dispatch.add_dispatch_support
2656def adjust_hue(image, delta, name=None):
2657  """Adjust hue of RGB images.
2658
2659  This is a convenience method that converts an RGB image to float
2660  representation, converts it to HSV, adds an offset to the
2661  hue channel, converts back to RGB and then back to the original
2662  data type. If several adjustments are chained it is advisable to minimize
2663  the number of redundant conversions.
2664
2665  `image` is an RGB image.  The image hue is adjusted by converting the
2666  image(s) to HSV and rotating the hue channel (H) by
2667  `delta`.  The image is then converted back to RGB.
2668
2669  `delta` must be in the interval `[-1, 1]`.
2670
2671  Usage Example:
2672
2673  >>> x = [[[1.0, 2.0, 3.0],
2674  ...       [4.0, 5.0, 6.0]],
2675  ...     [[7.0, 8.0, 9.0],
2676  ...       [10.0, 11.0, 12.0]]]
2677  >>> tf.image.adjust_hue(x, 0.2)
2678  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2679  array([[[ 2.3999996,  1.       ,  3.       ],
2680          [ 5.3999996,  4.       ,  6.       ]],
2681        [[ 8.4      ,  7.       ,  9.       ],
2682          [11.4      , 10.       , 12.       ]]], dtype=float32)>
2683
2684  Args:
2685    image: RGB image or images. The size of the last dimension must be 3.
2686    delta: float.  How much to add to the hue channel.
2687    name: A name for this operation (optional).
2688
2689  Returns:
2690    Adjusted image(s), same shape and DType as `image`.
2691
2692  Usage Example:
2693
2694  >>> image = [[[1, 2, 3], [4, 5, 6]],
2695  ...          [[7, 8, 9], [10, 11, 12]],
2696  ...          [[13, 14, 15], [16, 17, 18]]]
2697  >>> image = tf.constant(image)
2698  >>> tf.image.adjust_hue(image, 0.2)
2699  <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy=
2700  array([[[ 2,  1,  3],
2701        [ 5,  4,  6]],
2702       [[ 8,  7,  9],
2703        [11, 10, 12]],
2704       [[14, 13, 15],
2705        [17, 16, 18]]], dtype=int32)>
2706  """
2707  with ops.name_scope(name, 'adjust_hue', [image]) as name:
2708    image = ops.convert_to_tensor(image, name='image')
2709    # Remember original dtype to so we can convert back if needed
2710    orig_dtype = image.dtype
2711    if orig_dtype in (dtypes.float16, dtypes.float32):
2712      flt_image = image
2713    else:
2714      flt_image = convert_image_dtype(image, dtypes.float32)
2715
2716    rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
2717
2718    return convert_image_dtype(rgb_altered, orig_dtype)
2719
2720
2721# pylint: disable=invalid-name
2722@tf_export('image.random_jpeg_quality')
2723@dispatch.add_dispatch_support
2724def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None):
2725  """Randomly changes jpeg encoding quality for inducing jpeg noise.
2726
2727  `min_jpeg_quality` must be in the interval `[0, 100]` and less than
2728  `max_jpeg_quality`.
2729  `max_jpeg_quality` must be in the interval `[0, 100]`.
2730
2731  Usage Example:
2732
2733  >>> x = [[[1.0, 2.0, 3.0],
2734  ...       [4.0, 5.0, 6.0]],
2735  ...     [[7.0, 8.0, 9.0],
2736  ...       [10.0, 11.0, 12.0]]]
2737  >>> tf.image.random_jpeg_quality(x, 75, 95)
2738  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2739
2740  For producing deterministic results given a `seed` value, use
2741  `tf.image.stateless_random_jpeg_quality`. Unlike using the `seed` param
2742  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2743  same results given the same seed independent of how many times the function is
2744  called, and independent of global seed settings (e.g. tf.random.set_seed).
2745
2746  Args:
2747    image: 3D image. Size of the last dimension must be 1 or 3.
2748    min_jpeg_quality: Minimum jpeg encoding quality to use.
2749    max_jpeg_quality: Maximum jpeg encoding quality to use.
2750    seed: An operation-specific seed. It will be used in conjunction with the
2751      graph-level seed to determine the real seeds that will be used in this
2752      operation. Please see the documentation of set_random_seed for its
2753      interaction with the graph-level random seed.
2754
2755  Returns:
2756    Adjusted image(s), same shape and DType as `image`.
2757
2758  Raises:
2759    ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
2760  """
2761  if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or
2762      max_jpeg_quality > 100):
2763    raise ValueError('jpeg encoding range must be between 0 and 100.')
2764
2765  if min_jpeg_quality >= max_jpeg_quality:
2766    raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')
2767
2768  jpeg_quality = random_ops.random_uniform([],
2769                                           min_jpeg_quality,
2770                                           max_jpeg_quality,
2771                                           seed=seed,
2772                                           dtype=dtypes.int32)
2773  return adjust_jpeg_quality(image, jpeg_quality)
2774
2775
2776@tf_export('image.stateless_random_jpeg_quality', v1=[])
2777@dispatch.add_dispatch_support
2778def stateless_random_jpeg_quality(image,
2779                                  min_jpeg_quality,
2780                                  max_jpeg_quality,
2781                                  seed):
2782  """Deterministically radomize jpeg encoding quality for inducing jpeg noise.
2783
2784  Guarantees the same results given the same `seed` independent of how many
2785  times the function is called, and independent of global seed settings (e.g.
2786  `tf.random.set_seed`).
2787
2788  `min_jpeg_quality` must be in the interval `[0, 100]` and less than
2789  `max_jpeg_quality`.
2790  `max_jpeg_quality` must be in the interval `[0, 100]`.
2791
2792  Usage Example:
2793
2794  >>> x = [[[1, 2, 3],
2795  ...       [4, 5, 6]],
2796  ...      [[7, 8, 9],
2797  ...       [10, 11, 12]]]
2798  >>> x_uint8 = tf.cast(x, tf.uint8)
2799  >>> seed = (1, 2)
2800  >>> tf.image.stateless_random_jpeg_quality(x_uint8, 75, 95, seed)
2801  <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=
2802  array([[[ 0,  4,  5],
2803          [ 1,  5,  6]],
2804         [[ 5,  9, 10],
2805          [ 5,  9, 10]]], dtype=uint8)>
2806
2807  Args:
2808    image: 3D image. Size of the last dimension must be 1 or 3.
2809    min_jpeg_quality: Minimum jpeg encoding quality to use.
2810    max_jpeg_quality: Maximum jpeg encoding quality to use.
2811    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2812      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2813
2814  Returns:
2815    Adjusted image(s), same shape and DType as `image`.
2816
2817  Raises:
2818    ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
2819  """
2820  if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or
2821      max_jpeg_quality > 100):
2822    raise ValueError('jpeg encoding range must be between 0 and 100.')
2823
2824  if min_jpeg_quality >= max_jpeg_quality:
2825    raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')
2826
2827  jpeg_quality = stateless_random_ops.stateless_random_uniform(
2828      shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed,
2829      dtype=dtypes.int32)
2830  return adjust_jpeg_quality(image, jpeg_quality)
2831
2832
2833@tf_export('image.adjust_jpeg_quality')
2834@dispatch.add_dispatch_support
2835def adjust_jpeg_quality(image, jpeg_quality, name=None):
2836  """Adjust jpeg encoding quality of an image.
2837
2838  This is a convenience method that converts an image to uint8 representation,
2839  encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back
2840  to the original data type.
2841
2842  `jpeg_quality` must be in the interval `[0, 100]`.
2843
2844  Usage Example:
2845
2846  >>> x = [[[1.0, 2.0, 3.0],
2847  ...       [4.0, 5.0, 6.0]],
2848  ...     [[7.0, 8.0, 9.0],
2849  ...       [10.0, 11.0, 12.0]]]
2850  >>> tf.image.adjust_jpeg_quality(x, 75)
2851  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2852  array([[[1., 1., 1.],
2853          [1., 1., 1.]],
2854         [[1., 1., 1.],
2855          [1., 1., 1.]]], dtype=float32)>
2856
2857  Args:
2858    image: 3D image. The size of the last dimension must be None, 1 or 3.
2859    jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality.
2860    name: A name for this operation (optional).
2861
2862  Returns:
2863    Adjusted image, same shape and DType as `image`.
2864
2865  Raises:
2866    InvalidArgumentError: quality must be in [0,100]
2867    InvalidArgumentError: image must have 1 or 3 channels
2868  """
2869  with ops.name_scope(name, 'adjust_jpeg_quality', [image]):
2870    image = ops.convert_to_tensor(image, name='image')
2871    channels = image.shape.as_list()[-1]
2872    # Remember original dtype to so we can convert back if needed
2873    orig_dtype = image.dtype
2874    image = convert_image_dtype(image, dtypes.uint8, saturate=True)
2875    if not _is_tensor(jpeg_quality):
2876      # If jpeg_quality is a int (not tensor).
2877      jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32)
2878    image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality)
2879
2880    image = gen_image_ops.decode_jpeg(image, channels=channels)
2881    return convert_image_dtype(image, orig_dtype, saturate=True)
2882
2883
2884@tf_export('image.random_saturation')
2885@dispatch.add_dispatch_support
2886def random_saturation(image, lower, upper, seed=None):
2887  """Adjust the saturation of RGB images by a random factor.
2888
2889  Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
2890  picked in the interval `[lower, upper)`.
2891
2892  Usage Example:
2893
2894  >>> x = [[[1.0, 2.0, 3.0],
2895  ...       [4.0, 5.0, 6.0]],
2896  ...     [[7.0, 8.0, 9.0],
2897  ...       [10.0, 11.0, 12.0]]]
2898  >>> tf.image.random_saturation(x, 5, 10)
2899  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2900  array([[[ 0. ,  1.5,  3. ],
2901          [ 0. ,  3. ,  6. ]],
2902         [[ 0. ,  4.5,  9. ],
2903          [ 0. ,  6. , 12. ]]], dtype=float32)>
2904
2905  For producing deterministic results given a `seed` value, use
2906  `tf.image.stateless_random_saturation`. Unlike using the `seed` param
2907  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2908  same results given the same seed independent of how many times the function is
2909  called, and independent of global seed settings (e.g. tf.random.set_seed).
2910
2911  Args:
2912    image: RGB image or images. The size of the last dimension must be 3.
2913    lower: float.  Lower bound for the random saturation factor.
2914    upper: float.  Upper bound for the random saturation factor.
2915    seed: An operation-specific seed. It will be used in conjunction with the
2916      graph-level seed to determine the real seeds that will be used in this
2917      operation. Please see the documentation of set_random_seed for its
2918      interaction with the graph-level random seed.
2919
2920  Returns:
2921    Adjusted image(s), same shape and DType as `image`.
2922
2923  Raises:
2924    ValueError: if `upper <= lower` or if `lower < 0`.
2925  """
2926  if upper <= lower:
2927    raise ValueError('upper must be > lower.')
2928
2929  if lower < 0:
2930    raise ValueError('lower must be non-negative.')
2931
2932  saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed)
2933  return adjust_saturation(image, saturation_factor)
2934
2935
2936@tf_export('image.stateless_random_saturation', v1=[])
2937@dispatch.add_dispatch_support
2938def stateless_random_saturation(image, lower, upper, seed=None):
2939  """Adjust the saturation of RGB images by a random factor deterministically.
2940
2941  Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
2942  picked in the interval `[lower, upper)`.
2943
2944  Guarantees the same results given the same `seed` independent of how many
2945  times the function is called, and independent of global seed settings (e.g.
2946  `tf.random.set_seed`).
2947
2948  Usage Example:
2949
2950  >>> x = [[[1.0, 2.0, 3.0],
2951  ...       [4.0, 5.0, 6.0]],
2952  ...      [[7.0, 8.0, 9.0],
2953  ...       [10.0, 11.0, 12.0]]]
2954  >>> seed = (1, 2)
2955  >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed)
2956  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2957  array([[[ 1.1559395,  2.0779698,  3.       ],
2958          [ 4.1559396,  5.07797  ,  6.       ]],
2959         [[ 7.1559396,  8.07797  ,  9.       ],
2960          [10.155939 , 11.07797  , 12.       ]]], dtype=float32)>
2961
2962  Args:
2963    image: RGB image or images. The size of the last dimension must be 3.
2964    lower: float.  Lower bound for the random saturation factor.
2965    upper: float.  Upper bound for the random saturation factor.
2966    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2967      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2968
2969  Returns:
2970    Adjusted image(s), same shape and DType as `image`.
2971
2972  Raises:
2973    ValueError: if `upper <= lower` or if `lower < 0`.
2974  """
2975  if upper <= lower:
2976    raise ValueError('upper must be > lower.')
2977
2978  if lower < 0:
2979    raise ValueError('lower must be non-negative.')
2980
2981  saturation_factor = stateless_random_ops.stateless_random_uniform(
2982      shape=[], minval=lower, maxval=upper, seed=seed)
2983  return adjust_saturation(image, saturation_factor)
2984
2985
2986@tf_export('image.adjust_saturation')
2987@dispatch.add_dispatch_support
2988def adjust_saturation(image, saturation_factor, name=None):
2989  """Adjust saturation of RGB images.
2990
2991  This is a convenience method that converts RGB images to float
2992  representation, converts them to HSV, adds an offset to the
2993  saturation channel, converts back to RGB and then back to the original
2994  data type. If several adjustments are chained it is advisable to minimize
2995  the number of redundant conversions.
2996
2997  `image` is an RGB image or images.  The image saturation is adjusted by
2998  converting the images to HSV and multiplying the saturation (S) channel by
2999  `saturation_factor` and clipping. The images are then converted back to RGB.
3000
3001  Usage Example:
3002
3003  >>> x = [[[1.0, 2.0, 3.0],
3004  ...       [4.0, 5.0, 6.0]],
3005  ...     [[7.0, 8.0, 9.0],
3006  ...       [10.0, 11.0, 12.0]]]
3007  >>> tf.image.adjust_saturation(x, 0.5)
3008  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
3009  array([[[ 2. ,  2.5,  3. ],
3010          [ 5. ,  5.5,  6. ]],
3011         [[ 8. ,  8.5,  9. ],
3012          [11. , 11.5, 12. ]]], dtype=float32)>
3013
3014  Args:
3015    image: RGB image or images. The size of the last dimension must be 3.
3016    saturation_factor: float. Factor to multiply the saturation by.
3017    name: A name for this operation (optional).
3018
3019  Returns:
3020    Adjusted image(s), same shape and DType as `image`.
3021
3022  Raises:
3023    InvalidArgumentError: input must have 3 channels
3024  """
3025  with ops.name_scope(name, 'adjust_saturation', [image]) as name:
3026    image = ops.convert_to_tensor(image, name='image')
3027    # Remember original dtype to so we can convert back if needed
3028    orig_dtype = image.dtype
3029    if orig_dtype in (dtypes.float16, dtypes.float32):
3030      flt_image = image
3031    else:
3032      flt_image = convert_image_dtype(image, dtypes.float32)
3033
3034    adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor)
3035
3036    return convert_image_dtype(adjusted, orig_dtype)
3037
3038
3039@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg'])
3040def is_jpeg(contents, name=None):
3041  r"""Convenience function to check if the 'contents' encodes a JPEG image.
3042
3043  Args:
3044    contents: 0-D `string`. The encoded image bytes.
3045    name: A name for the operation (optional)
3046
3047  Returns:
3048     A scalar boolean tensor indicating if 'contents' may be a JPEG image.
3049     is_jpeg is susceptible to false positives.
3050  """
3051  # Normal JPEGs start with \xff\xd8\xff\xe0
3052  # JPEG with EXIF starts with \xff\xd8\xff\xe1
3053  # Use \xff\xd8\xff to cover both.
3054  with ops.name_scope(name, 'is_jpeg'):
3055    substr = string_ops.substr(contents, 0, 3)
3056    return math_ops.equal(substr, b'\xff\xd8\xff', name=name)
3057
3058
3059def _is_png(contents, name=None):
3060  r"""Convenience function to check if the 'contents' encodes a PNG image.
3061
3062  Args:
3063    contents: 0-D `string`. The encoded image bytes.
3064    name: A name for the operation (optional)
3065
3066  Returns:
3067     A scalar boolean tensor indicating if 'contents' may be a PNG image.
3068     is_png is susceptible to false positives.
3069  """
3070  with ops.name_scope(name, 'is_png'):
3071    substr = string_ops.substr(contents, 0, 3)
3072    return math_ops.equal(substr, b'\211PN', name=name)
3073
3074
3075tf_export(
3076    'io.decode_and_crop_jpeg',
3077    'image.decode_and_crop_jpeg',
3078    v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])(
3079        dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg))
3080
3081tf_export(
3082    'io.decode_bmp',
3083    'image.decode_bmp',
3084    v1=['io.decode_bmp', 'image.decode_bmp'])(
3085        dispatch.add_dispatch_support(gen_image_ops.decode_bmp))
3086tf_export(
3087    'io.decode_gif',
3088    'image.decode_gif',
3089    v1=['io.decode_gif', 'image.decode_gif'])(
3090        dispatch.add_dispatch_support(gen_image_ops.decode_gif))
3091tf_export(
3092    'io.decode_jpeg',
3093    'image.decode_jpeg',
3094    v1=['io.decode_jpeg', 'image.decode_jpeg'])(
3095        dispatch.add_dispatch_support(gen_image_ops.decode_jpeg))
3096tf_export(
3097    'io.decode_png',
3098    'image.decode_png',
3099    v1=['io.decode_png', 'image.decode_png'])(
3100        dispatch.add_dispatch_support(gen_image_ops.decode_png))
3101
3102tf_export(
3103    'io.encode_jpeg',
3104    'image.encode_jpeg',
3105    v1=['io.encode_jpeg', 'image.encode_jpeg'])(
3106        dispatch.add_dispatch_support(gen_image_ops.encode_jpeg))
3107tf_export(
3108    'io.extract_jpeg_shape',
3109    'image.extract_jpeg_shape',
3110    v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])(
3111        dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape))
3112
3113
3114@tf_export('io.encode_png', 'image.encode_png')
3115@dispatch.add_dispatch_support
3116def encode_png(image, compression=-1, name=None):
3117  r"""PNG-encode an image.
3118
3119  `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
3120  where `channels` is:
3121
3122  *   1: for grayscale.
3123  *   2: for grayscale + alpha.
3124  *   3: for RGB.
3125  *   4: for RGBA.
3126
3127  The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
3128  default or a value from 0 to 9.  9 is the highest compression level,
3129  generating the smallest output, but is slower.
3130
3131  Args:
3132    image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`.
3133      3-D with shape `[height, width, channels]`.
3134    compression: An optional `int`. Defaults to `-1`. Compression level.
3135    name: A name for the operation (optional).
3136
3137  Returns:
3138    A `Tensor` of type `string`.
3139  """
3140  return gen_image_ops.encode_png(
3141      ops.convert_to_tensor(image), compression, name)
3142
3143
3144@tf_export(
3145    'io.decode_image',
3146    'image.decode_image',
3147    v1=['io.decode_image', 'image.decode_image'])
3148@dispatch.add_dispatch_support
3149def decode_image(contents,
3150                 channels=None,
3151                 dtype=dtypes.uint8,
3152                 name=None,
3153                 expand_animations=True):
3154  """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`.
3155
3156  Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the
3157  appropriate operation to convert the input bytes `string` into a `Tensor`
3158  of type `dtype`.
3159
3160  Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as
3161  opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D
3162  arrays `[height, width, num_channels]`. Make sure to take this into account
3163  when constructing your graph if you are intermixing GIF files with BMP, JPEG,
3164  and/or PNG files. Alternately, set the `expand_animations` argument of this
3165  function to `False`, in which case the op will return 3-dimensional tensors
3166  and will truncate animated GIF files to the first frame.
3167
3168  NOTE: If the first frame of an animated GIF does not occupy the entire
3169  canvas (maximum frame width x maximum frame height), then it fills the
3170  unoccupied areas (in the first frame) with zeros (black). For frames after the
3171  first frame that does not occupy the entire canvas, it uses the previous
3172  frame to fill the unoccupied areas.
3173
3174  Args:
3175    contents: A `Tensor` of type `string`. 0-D. The encoded image bytes.
3176    channels: An optional `int`. Defaults to `0`. Number of color channels for
3177      the decoded image.
3178    dtype: The desired DType of the returned `Tensor`.
3179    name: A name for the operation (optional)
3180    expand_animations: An optional `bool`. Defaults to `True`. Controls the
3181      shape of the returned op's output. If `True`, the returned op will produce
3182      a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs,
3183      whether animated or not. If, `False`, the returned op will produce a 3-D
3184      tensor for all file types and will truncate animated GIFs to the first
3185      frame.
3186
3187  Returns:
3188    `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on
3189    the file type and the value of the `expand_animations` parameter.
3190
3191  Raises:
3192    ValueError: On incorrect number of channels.
3193  """
3194  with ops.name_scope(name, 'decode_image'):
3195    channels = 0 if channels is None else channels
3196    if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]:
3197      dest_dtype = dtype
3198      dtype = dtypes.uint16
3199      return convert_image_dtype(
3200          gen_image_ops.decode_image(
3201              contents=contents,
3202              channels=channels,
3203              expand_animations=expand_animations,
3204              dtype=dtype), dest_dtype)
3205    else:
3206      return gen_image_ops.decode_image(
3207          contents=contents,
3208          channels=channels,
3209          expand_animations=expand_animations,
3210          dtype=dtype)
3211
3212
3213@tf_export('image.total_variation')
3214@dispatch.add_dispatch_support
3215def total_variation(images, name=None):
3216  """Calculate and return the total variation for one or more images.
3217
3218  The total variation is the sum of the absolute differences for neighboring
3219  pixel-values in the input images. This measures how much noise is in the
3220  images.
3221
3222  This can be used as a loss-function during optimization so as to suppress
3223  noise in images. If you have a batch of images, then you should calculate
3224  the scalar loss-value as the sum:
3225  `loss = tf.reduce_sum(tf.image.total_variation(images))`
3226
3227  This implements the anisotropic 2-D version of the formula described here:
3228
3229  https://en.wikipedia.org/wiki/Total_variation_denoising
3230
3231  Args:
3232    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
3233      of shape `[height, width, channels]`.
3234    name: A name for the operation (optional).
3235
3236  Raises:
3237    ValueError: if images.shape is not a 3-D or 4-D vector.
3238
3239  Returns:
3240    The total variation of `images`.
3241
3242    If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the
3243    total variation for each image in the batch.
3244    If `images` was 3-D, return a scalar float with the total variation for
3245    that image.
3246  """
3247
3248  with ops.name_scope(name, 'total_variation'):
3249    ndims = images.get_shape().ndims
3250
3251    if ndims == 3:
3252      # The input is a single image with shape [height, width, channels].
3253
3254      # Calculate the difference of neighboring pixel-values.
3255      # The images are shifted one pixel along the height and width by slicing.
3256      pixel_dif1 = images[1:, :, :] - images[:-1, :, :]
3257      pixel_dif2 = images[:, 1:, :] - images[:, :-1, :]
3258
3259      # Sum for all axis. (None is an alias for all axis.)
3260      sum_axis = None
3261    elif ndims == 4:
3262      # The input is a batch of images with shape:
3263      # [batch, height, width, channels].
3264
3265      # Calculate the difference of neighboring pixel-values.
3266      # The images are shifted one pixel along the height and width by slicing.
3267      pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :]
3268      pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :]
3269
3270      # Only sum for the last 3 axis.
3271      # This results in a 1-D tensor with the total variation for each image.
3272      sum_axis = [1, 2, 3]
3273    else:
3274      raise ValueError('\'images\' must be either 3 or 4-dimensional.')
3275
3276    # Calculate the total variation by taking the absolute value of the
3277    # pixel-differences and summing over the appropriate axis.
3278    tot_var = (
3279        math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) +
3280        math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis))
3281
3282  return tot_var
3283
3284
3285@tf_export('image.sample_distorted_bounding_box', v1=[])
3286@dispatch.add_dispatch_support
3287def sample_distorted_bounding_box_v2(image_size,
3288                                     bounding_boxes,
3289                                     seed=0,
3290                                     min_object_covered=0.1,
3291                                     aspect_ratio_range=None,
3292                                     area_range=None,
3293                                     max_attempts=None,
3294                                     use_image_if_no_bounding_boxes=None,
3295                                     name=None):
3296  """Generate a single randomly distorted bounding box for an image.
3297
3298  Bounding box annotations are often supplied in addition to ground-truth labels
3299  in image recognition or object localization tasks. A common technique for
3300  training such a system is to randomly distort an image while preserving
3301  its content, i.e. *data augmentation*. This Op outputs a randomly distorted
3302  localization of an object, i.e. bounding box, given an `image_size`,
3303  `bounding_boxes` and a series of constraints.
3304
3305  The output of this Op is a single bounding box that may be used to crop the
3306  original image. The output is returned as 3 tensors: `begin`, `size` and
3307  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3308  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3309  visualize what the bounding box looks like.
3310
3311  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3312  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
3313  and the height of the underlying image.
3314
3315  For example,
3316
3317  ```python
3318      # Generate a single distorted bounding box.
3319      begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
3320          tf.shape(image),
3321          bounding_boxes=bounding_boxes,
3322          min_object_covered=0.1)
3323
3324      # Draw the bounding box in an image summary.
3325      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
3326                                                    bbox_for_draw)
3327      tf.compat.v1.summary.image('images_with_box', image_with_box)
3328
3329      # Employ the bounding box to distort the image.
3330      distorted_image = tf.slice(image, begin, size)
3331  ```
3332
3333  Note that if no bounding box information is available, setting
3334  `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
3335  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3336  false and no bounding boxes are supplied, an error is raised.
3337
3338  For producing deterministic results given a `seed` value, use
3339  `tf.image.stateless_sample_distorted_bounding_box`. Unlike using the `seed`
3340  param with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops
3341  guarantee the same results given the same seed independent of how many times
3342  the function is called, and independent of global seed settings
3343  (e.g. tf.random.set_seed).
3344
3345  Args:
3346    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3347      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3348    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3349      describing the N bounding boxes associated with the image.
3350    seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the
3351      random number generator is seeded by the given `seed`.  Otherwise, it is
3352      seeded by a random seed.
3353    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3354      cropped area of the image must contain at least this fraction of any
3355      bounding box supplied. The value of this parameter should be non-negative.
3356      In the case of 0, the cropped area does not need to overlap any of the
3357      bounding boxes supplied.
3358    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3359      1.33]`. The cropped area of the image must have an aspect `ratio = width /
3360      height` within this range.
3361    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3362      cropped area of the image must contain a fraction of the supplied image
3363      within this range.
3364    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3365      generating a cropped region of the image of the specified constraints.
3366      After `max_attempts` failures, return the entire image.
3367    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3368      Controls behavior if no bounding boxes supplied. If true, assume an
3369      implicit bounding box covering the whole input. If false, raise an error.
3370    name: A name for the operation (optional).
3371
3372  Returns:
3373    A tuple of `Tensor` objects (begin, size, bboxes).
3374
3375    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3376    `[offset_height, offset_width, 0]`. Provide as input to
3377      `tf.slice`.
3378    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3379    `[target_height, target_width, -1]`. Provide as input to
3380      `tf.slice`.
3381    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3382    the distorted bounding box.
3383    Provide as input to `tf.image.draw_bounding_boxes`.
3384  """
3385  seed1, seed2 = random_seed.get_seed(seed) if seed else (0, 0)
3386  with ops.name_scope(name, 'sample_distorted_bounding_box'):
3387    return gen_image_ops.sample_distorted_bounding_box_v2(
3388        image_size,
3389        bounding_boxes,
3390        seed=seed1,
3391        seed2=seed2,
3392        min_object_covered=min_object_covered,
3393        aspect_ratio_range=aspect_ratio_range,
3394        area_range=area_range,
3395        max_attempts=max_attempts,
3396        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3397        name=name)
3398
3399
3400@tf_export('image.stateless_sample_distorted_bounding_box', v1=[])
3401@dispatch.add_dispatch_support
3402def stateless_sample_distorted_bounding_box(image_size,
3403                                            bounding_boxes,
3404                                            seed,
3405                                            min_object_covered=0.1,
3406                                            aspect_ratio_range=None,
3407                                            area_range=None,
3408                                            max_attempts=None,
3409                                            use_image_if_no_bounding_boxes=None,
3410                                            name=None):
3411  """Generate a randomly distorted bounding box for an image deterministically.
3412
3413  Bounding box annotations are often supplied in addition to ground-truth labels
3414  in image recognition or object localization tasks. A common technique for
3415  training such a system is to randomly distort an image while preserving
3416  its content, i.e. *data augmentation*. This Op, given the same `seed`,
3417  deterministically outputs a randomly distorted localization of an object, i.e.
3418  bounding box, given an `image_size`, `bounding_boxes` and a series of
3419  constraints.
3420
3421  The output of this Op is a single bounding box that may be used to crop the
3422  original image. The output is returned as 3 tensors: `begin`, `size` and
3423  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3424  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3425  visualize what the bounding box looks like.
3426
3427  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3428  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
3429  and the height of the underlying image.
3430
3431  The output of this Op is guaranteed to be the same given the same `seed` and
3432  is independent of how many times the function is called, and independent of
3433  global seed settings (e.g. `tf.random.set_seed`).
3434
3435  Example usage:
3436
3437  >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]])
3438  >>> bbox = tf.constant(
3439  ...   [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
3440  >>> seed = (1, 2)
3441  >>> # Generate a single distorted bounding box.
3442  >>> bbox_begin, bbox_size, bbox_draw = (
3443  ...   tf.image.stateless_sample_distorted_bounding_box(
3444  ...     tf.shape(image), bounding_boxes=bbox, seed=seed))
3445  >>> # Employ the bounding box to distort the image.
3446  >>> tf.slice(image, bbox_begin, bbox_size)
3447  <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy=
3448  array([[[1],
3449          [2]],
3450         [[4],
3451          [5]]])>
3452  >>> # Draw the bounding box in an image summary.
3453  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
3454  >>> tf.image.draw_bounding_boxes(
3455  ...   tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors)
3456  <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=
3457  array([[[[1.],
3458           [1.],
3459           [3.]],
3460          [[1.],
3461           [1.],
3462           [6.]],
3463          [[7.],
3464           [8.],
3465           [9.]]]], dtype=float32)>
3466
3467  Note that if no bounding box information is available, setting
3468  `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
3469  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3470  false and no bounding boxes are supplied, an error is raised.
3471
3472  Args:
3473    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3474      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3475    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3476      describing the N bounding boxes associated with the image.
3477    seed: A shape [2] Tensor, the seed to the random number generator. Must have
3478      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
3479    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3480      cropped area of the image must contain at least this fraction of any
3481      bounding box supplied. The value of this parameter should be non-negative.
3482      In the case of 0, the cropped area does not need to overlap any of the
3483      bounding boxes supplied.
3484    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3485      1.33]`. The cropped area of the image must have an aspect `ratio = width /
3486      height` within this range.
3487    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3488      cropped area of the image must contain a fraction of the supplied image
3489      within this range.
3490    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3491      generating a cropped region of the image of the specified constraints.
3492      After `max_attempts` failures, return the entire image.
3493    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3494      Controls behavior if no bounding boxes supplied. If true, assume an
3495      implicit bounding box covering the whole input. If false, raise an error.
3496    name: A name for the operation (optional).
3497
3498  Returns:
3499    A tuple of `Tensor` objects (begin, size, bboxes).
3500
3501    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3502    `[offset_height, offset_width, 0]`. Provide as input to
3503      `tf.slice`.
3504    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3505    `[target_height, target_width, -1]`. Provide as input to
3506      `tf.slice`.
3507    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3508    the distorted bounding box.
3509    Provide as input to `tf.image.draw_bounding_boxes`.
3510  """
3511  with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'):
3512    return gen_image_ops.stateless_sample_distorted_bounding_box(
3513        image_size=image_size,
3514        bounding_boxes=bounding_boxes,
3515        seed=seed,
3516        min_object_covered=min_object_covered,
3517        aspect_ratio_range=aspect_ratio_range,
3518        area_range=area_range,
3519        max_attempts=max_attempts,
3520        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3521        name=name)
3522
3523
3524@tf_export(v1=['image.sample_distorted_bounding_box'])
3525@dispatch.add_dispatch_support
3526@deprecation.deprecated(
3527    date=None,
3528    instructions='`seed2` arg is deprecated.'
3529    'Use sample_distorted_bounding_box_v2 instead.')
3530def sample_distorted_bounding_box(image_size,
3531                                  bounding_boxes,
3532                                  seed=None,
3533                                  seed2=None,
3534                                  min_object_covered=0.1,
3535                                  aspect_ratio_range=None,
3536                                  area_range=None,
3537                                  max_attempts=None,
3538                                  use_image_if_no_bounding_boxes=None,
3539                                  name=None):
3540  """Generate a single randomly distorted bounding box for an image.
3541
3542  Bounding box annotations are often supplied in addition to ground-truth labels
3543  in image recognition or object localization tasks. A common technique for
3544  training such a system is to randomly distort an image while preserving
3545  its content, i.e. *data augmentation*. This Op outputs a randomly distorted
3546  localization of an object, i.e. bounding box, given an `image_size`,
3547  `bounding_boxes` and a series of constraints.
3548
3549  The output of this Op is a single bounding box that may be used to crop the
3550  original image. The output is returned as 3 tensors: `begin`, `size` and
3551  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3552  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3553  visualize what the bounding box looks like.
3554
3555  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3556  The
3557  bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
3558  height of the underlying image.
3559
3560  For example,
3561
3562  ```python
3563      # Generate a single distorted bounding box.
3564      begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
3565          tf.shape(image),
3566          bounding_boxes=bounding_boxes,
3567          min_object_covered=0.1)
3568
3569      # Draw the bounding box in an image summary.
3570      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
3571                                                    bbox_for_draw)
3572      tf.compat.v1.summary.image('images_with_box', image_with_box)
3573
3574      # Employ the bounding box to distort the image.
3575      distorted_image = tf.slice(image, begin, size)
3576  ```
3577
3578  Note that if no bounding box information is available, setting
3579  `use_image_if_no_bounding_boxes = True` will assume there is a single implicit
3580  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3581  false and no bounding boxes are supplied, an error is raised.
3582
3583  Args:
3584    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3585      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3586    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3587      describing the N bounding boxes associated with the image.
3588    seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are
3589      set to non-zero, the random number generator is seeded by the given
3590      `seed`.  Otherwise, it is seeded by a random seed.
3591    seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed
3592      collision.
3593    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3594      cropped area of the image must contain at least this fraction of any
3595      bounding box supplied. The value of this parameter should be non-negative.
3596      In the case of 0, the cropped area does not need to overlap any of the
3597      bounding boxes supplied.
3598    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3599      1.33]`. The cropped area of the image must have an aspect ratio = width /
3600      height within this range.
3601    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3602      cropped area of the image must contain a fraction of the supplied image
3603      within this range.
3604    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3605      generating a cropped region of the image of the specified constraints.
3606      After `max_attempts` failures, return the entire image.
3607    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3608      Controls behavior if no bounding boxes supplied. If true, assume an
3609      implicit bounding box covering the whole input. If false, raise an error.
3610    name: A name for the operation (optional).
3611
3612  Returns:
3613    A tuple of `Tensor` objects (begin, size, bboxes).
3614
3615    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3616    `[offset_height, offset_width, 0]`. Provide as input to
3617      `tf.slice`.
3618    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3619    `[target_height, target_width, -1]`. Provide as input to
3620      `tf.slice`.
3621    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3622    the distorted bounding box.
3623      Provide as input to `tf.image.draw_bounding_boxes`.
3624  """
3625  with ops.name_scope(name, 'sample_distorted_bounding_box'):
3626    return gen_image_ops.sample_distorted_bounding_box_v2(
3627        image_size,
3628        bounding_boxes,
3629        seed=seed,
3630        seed2=seed2,
3631        min_object_covered=min_object_covered,
3632        aspect_ratio_range=aspect_ratio_range,
3633        area_range=area_range,
3634        max_attempts=max_attempts,
3635        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3636        name=name)
3637
3638
3639@tf_export('image.non_max_suppression')
3640@dispatch.add_dispatch_support
3641def non_max_suppression(boxes,
3642                        scores,
3643                        max_output_size,
3644                        iou_threshold=0.5,
3645                        score_threshold=float('-inf'),
3646                        name=None):
3647  """Greedily selects a subset of bounding boxes in descending order of score.
3648
3649  Prunes away boxes that have high intersection-over-union (IOU) overlap
3650  with previously selected boxes.  Bounding boxes are supplied as
3651  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
3652  diagonal pair of box corners and the coordinates can be provided as normalized
3653  (i.e., lying in the interval `[0, 1]`) or absolute.  Note that this algorithm
3654  is agnostic to where the origin is in the coordinate system.  Note that this
3655  algorithm is invariant to orthogonal transformations and translations
3656  of the coordinate system; thus translating or reflections of the coordinate
3657  system result in the same boxes being selected by the algorithm.
3658  The output of this operation is a set of integers indexing into the input
3659  collection of bounding boxes representing the selected boxes.  The bounding
3660  box coordinates corresponding to the selected indices can then be obtained
3661  using the `tf.gather` operation.  For example:
3662    ```python
3663    selected_indices = tf.image.non_max_suppression(
3664        boxes, scores, max_output_size, iou_threshold)
3665    selected_boxes = tf.gather(boxes, selected_indices)
3666    ```
3667
3668  Args:
3669    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
3670    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3671      score corresponding to each box (each row of boxes).
3672    max_output_size: A scalar integer `Tensor` representing the maximum number
3673      of boxes to be selected by non-max suppression.
3674    iou_threshold: A 0-D float tensor representing the threshold for deciding
3675      whether boxes overlap too much with respect to IOU.
3676    score_threshold: A 0-D float tensor representing the threshold for deciding
3677      when to remove boxes based on score.
3678    name: A name for the operation (optional).
3679
3680  Returns:
3681    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3682      selected indices from the boxes tensor, where `M <= max_output_size`.
3683  """
3684  with ops.name_scope(name, 'non_max_suppression'):
3685    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
3686    score_threshold = ops.convert_to_tensor(
3687        score_threshold, name='score_threshold')
3688    return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size,
3689                                                iou_threshold, score_threshold)
3690
3691
3692@tf_export('image.non_max_suppression_with_scores')
3693@dispatch.add_dispatch_support
3694def non_max_suppression_with_scores(boxes,
3695                                    scores,
3696                                    max_output_size,
3697                                    iou_threshold=0.5,
3698                                    score_threshold=float('-inf'),
3699                                    soft_nms_sigma=0.0,
3700                                    name=None):
3701  """Greedily selects a subset of bounding boxes in descending order of score.
3702
3703  Prunes away boxes that have high intersection-over-union (IOU) overlap
3704  with previously selected boxes.  Bounding boxes are supplied as
3705  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
3706  diagonal pair of box corners and the coordinates can be provided as normalized
3707  (i.e., lying in the interval `[0, 1]`) or absolute.  Note that this algorithm
3708  is agnostic to where the origin is in the coordinate system.  Note that this
3709  algorithm is invariant to orthogonal transformations and translations
3710  of the coordinate system; thus translating or reflections of the coordinate
3711  system result in the same boxes being selected by the algorithm.
3712  The output of this operation is a set of integers indexing into the input
3713  collection of bounding boxes representing the selected boxes.  The bounding
3714  box coordinates corresponding to the selected indices can then be obtained
3715  using the `tf.gather` operation.  For example:
3716    ```python
3717    selected_indices, selected_scores = tf.image.non_max_suppression_padded(
3718        boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1,
3719        soft_nms_sigma=0.5)
3720    selected_boxes = tf.gather(boxes, selected_indices)
3721    ```
3722
3723  This function generalizes the `tf.image.non_max_suppression` op by also
3724  supporting a Soft-NMS (with Gaussian weighting) mode (c.f.
3725  Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score
3726  of other overlapping boxes instead of directly causing them to be pruned.
3727  Consequently, in contrast to `tf.image.non_max_suppression`,
3728  `tf.image.non_max_suppression_padded` returns the new scores of each input box
3729  in the second output, `selected_scores`.
3730
3731  To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be
3732  larger than 0.  When `soft_nms_sigma` equals 0, the behavior of
3733  `tf.image.non_max_suppression_padded` is identical to that of
3734  `tf.image.non_max_suppression` (except for the extra output) both in function
3735  and in running time.
3736
3737  Args:
3738    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
3739    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3740      score corresponding to each box (each row of boxes).
3741    max_output_size: A scalar integer `Tensor` representing the maximum number
3742      of boxes to be selected by non-max suppression.
3743    iou_threshold: A 0-D float tensor representing the threshold for deciding
3744      whether boxes overlap too much with respect to IOU.
3745    score_threshold: A 0-D float tensor representing the threshold for deciding
3746      when to remove boxes based on score.
3747    soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft
3748      NMS; see Bodla et al (c.f. https://arxiv.org/abs/1704.04503).  When
3749      `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
3750      NMS.
3751    name: A name for the operation (optional).
3752
3753  Returns:
3754    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3755      selected indices from the boxes tensor, where `M <= max_output_size`.
3756    selected_scores: A 1-D float tensor of shape `[M]` representing the
3757      corresponding scores for each selected box, where `M <= max_output_size`.
3758      Scores only differ from corresponding input scores when using Soft NMS
3759      (i.e. when `soft_nms_sigma>0`)
3760  """
3761  with ops.name_scope(name, 'non_max_suppression_with_scores'):
3762    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
3763    score_threshold = ops.convert_to_tensor(
3764        score_threshold, name='score_threshold')
3765    soft_nms_sigma = ops.convert_to_tensor(
3766        soft_nms_sigma, name='soft_nms_sigma')
3767    (selected_indices, selected_scores,
3768     _) = gen_image_ops.non_max_suppression_v5(
3769         boxes,
3770         scores,
3771         max_output_size,
3772         iou_threshold,
3773         score_threshold,
3774         soft_nms_sigma,
3775         pad_to_max_output_size=False)
3776    return selected_indices, selected_scores
3777
3778
3779@tf_export('image.non_max_suppression_overlaps')
3780@dispatch.add_dispatch_support
3781def non_max_suppression_with_overlaps(overlaps,
3782                                      scores,
3783                                      max_output_size,
3784                                      overlap_threshold=0.5,
3785                                      score_threshold=float('-inf'),
3786                                      name=None):
3787  """Greedily selects a subset of bounding boxes in descending order of score.
3788
3789  Prunes away boxes that have high overlap with previously selected boxes.
3790  N-by-n overlap values are supplied as square matrix.
3791  The output of this operation is a set of integers indexing into the input
3792  collection of bounding boxes representing the selected boxes.  The bounding
3793  box coordinates corresponding to the selected indices can then be obtained
3794  using the `tf.gather` operation.  For example:
3795    ```python
3796    selected_indices = tf.image.non_max_suppression_overlaps(
3797        overlaps, scores, max_output_size, iou_threshold)
3798    selected_boxes = tf.gather(boxes, selected_indices)
3799    ```
3800
3801  Args:
3802    overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]`
3803      representing the n-by-n box overlap values.
3804    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3805      score corresponding to each box (each row of boxes).
3806    max_output_size: A scalar integer `Tensor` representing the maximum number
3807      of boxes to be selected by non-max suppression.
3808    overlap_threshold: A 0-D float tensor representing the threshold for
3809      deciding whether boxes overlap too much with respect to the provided
3810      overlap values.
3811    score_threshold: A 0-D float tensor representing the threshold for deciding
3812      when to remove boxes based on score.
3813    name: A name for the operation (optional).
3814
3815  Returns:
3816    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3817      selected indices from the overlaps tensor, where `M <= max_output_size`.
3818  """
3819  with ops.name_scope(name, 'non_max_suppression_overlaps'):
3820    overlap_threshold = ops.convert_to_tensor(
3821        overlap_threshold, name='overlap_threshold')
3822    # pylint: disable=protected-access
3823    return gen_image_ops.non_max_suppression_with_overlaps(
3824        overlaps, scores, max_output_size, overlap_threshold, score_threshold)
3825    # pylint: enable=protected-access
3826
3827
3828_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115],
3829                      [0.587, -0.27455667, -0.52273617],
3830                      [0.114, -0.32134392, 0.31119955]]
3831
3832
3833@tf_export('image.rgb_to_yiq')
3834@dispatch.add_dispatch_support
3835def rgb_to_yiq(images):
3836  """Converts one or more images from RGB to YIQ.
3837
3838  Outputs a tensor of the same shape as the `images` tensor, containing the YIQ
3839  value of the pixels.
3840  The output is only well defined if the value in images are in [0,1].
3841
3842  Usage Example:
3843
3844  >>> x = tf.constant([[[1.0, 2.0, 3.0]]])
3845  >>> tf.image.rgb_to_yiq(x)
3846  <tf.Tensor: shape=(1, 1, 3), dtype=float32,
3847  numpy=array([[[ 1.815     , -0.91724455,  0.09962624]]], dtype=float32)>
3848
3849  Args:
3850    images: 2-D or higher rank. Image data to convert. Last dimension must be
3851      size 3.
3852
3853  Returns:
3854    images: tensor with the same shape as `images`.
3855  """
3856  images = ops.convert_to_tensor(images, name='images')
3857  kernel = ops.convert_to_tensor(
3858      _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel')
3859  ndims = images.get_shape().ndims
3860  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3861
3862
3863_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021],
3864                      [0.6208248, -0.64720424, 1.70423049]]
3865
3866
3867@tf_export('image.yiq_to_rgb')
3868@dispatch.add_dispatch_support
3869def yiq_to_rgb(images):
3870  """Converts one or more images from YIQ to RGB.
3871
3872  Outputs a tensor of the same shape as the `images` tensor, containing the RGB
3873  value of the pixels.
3874  The output is only well defined if the Y value in images are in [0,1],
3875  I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226].
3876
3877  Args:
3878    images: 2-D or higher rank. Image data to convert. Last dimension must be
3879      size 3.
3880
3881  Returns:
3882    images: tensor with the same shape as `images`.
3883  """
3884  images = ops.convert_to_tensor(images, name='images')
3885  kernel = ops.convert_to_tensor(
3886      _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel')
3887  ndims = images.get_shape().ndims
3888  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3889
3890
3891_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538],
3892                      [0.587, -0.28886916, -0.51496512],
3893                      [0.114, 0.43601035, -0.10001026]]
3894
3895
3896@tf_export('image.rgb_to_yuv')
3897@dispatch.add_dispatch_support
3898def rgb_to_yuv(images):
3899  """Converts one or more images from RGB to YUV.
3900
3901  Outputs a tensor of the same shape as the `images` tensor, containing the YUV
3902  value of the pixels.
3903  The output is only well defined if the value in images are in [0, 1].
3904  There are two ways of representing an image: [0, 255] pixel values range or
3905  [0, 1] (as float) pixel values range. Users need to convert the input image
3906  into a float [0, 1] range.
3907
3908  Args:
3909    images: 2-D or higher rank. Image data to convert. Last dimension must be
3910      size 3.
3911
3912  Returns:
3913    images: tensor with the same shape as `images`.
3914  """
3915  images = ops.convert_to_tensor(images, name='images')
3916  kernel = ops.convert_to_tensor(
3917      _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel')
3918  ndims = images.get_shape().ndims
3919  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3920
3921
3922_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185],
3923                      [1.13988303, -0.58062185, 0]]
3924
3925
3926@tf_export('image.yuv_to_rgb')
3927@dispatch.add_dispatch_support
3928def yuv_to_rgb(images):
3929  """Converts one or more images from YUV to RGB.
3930
3931  Outputs a tensor of the same shape as the `images` tensor, containing the RGB
3932  value of the pixels.
3933  The output is only well defined if the Y value in images are in [0,1],
3934  U and V value are in [-0.5,0.5].
3935
3936  As per the above description, you need to scale your YUV images if their
3937  pixel values are not in the required range. Below given example illustrates
3938  preprocessing of each channel of images before feeding them to `yuv_to_rgb`.
3939
3940  ```python
3941  yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255)
3942  last_dimension_axis = len(yuv_images.shape) - 1
3943  yuv_tensor_images = tf.truediv(
3944      tf.subtract(
3945          yuv_images,
3946          tf.reduce_min(yuv_images)
3947      ),
3948      tf.subtract(
3949          tf.reduce_max(yuv_images),
3950          tf.reduce_min(yuv_images)
3951       )
3952  )
3953  y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis)
3954  target_uv_min, target_uv_max = -0.5, 0.5
3955  u = u * (target_uv_max - target_uv_min) + target_uv_min
3956  v = v * (target_uv_max - target_uv_min) + target_uv_min
3957  preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis)
3958  rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images)
3959  ```
3960
3961  Args:
3962    images: 2-D or higher rank. Image data to convert. Last dimension must be
3963      size 3.
3964
3965  Returns:
3966    images: tensor with the same shape as `images`.
3967  """
3968  images = ops.convert_to_tensor(images, name='images')
3969  kernel = ops.convert_to_tensor(
3970      _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel')
3971  ndims = images.get_shape().ndims
3972  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3973
3974
3975def _verify_compatible_image_shapes(img1, img2):
3976  """Checks if two image tensors are compatible for applying SSIM or PSNR.
3977
3978  This function checks if two sets of images have ranks at least 3, and if the
3979  last three dimensions match.
3980
3981  Args:
3982    img1: Tensor containing the first image batch.
3983    img2: Tensor containing the second image batch.
3984
3985  Returns:
3986    A tuple containing: the first tensor shape, the second tensor shape, and a
3987    list of control_flow_ops.Assert() ops implementing the checks.
3988
3989  Raises:
3990    ValueError: When static shape check fails.
3991  """
3992  shape1 = img1.get_shape().with_rank_at_least(3)
3993  shape2 = img2.get_shape().with_rank_at_least(3)
3994  shape1[-3:].assert_is_compatible_with(shape2[-3:])
3995
3996  if shape1.ndims is not None and shape2.ndims is not None:
3997    for dim1, dim2 in zip(
3998        reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])):
3999      if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)):
4000        raise ValueError('Two images are not compatible: %s and %s' %
4001                         (shape1, shape2))
4002
4003  # Now assign shape tensors.
4004  shape1, shape2 = array_ops.shape_n([img1, img2])
4005
4006  # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable.
4007  checks = []
4008  checks.append(
4009      control_flow_ops.Assert(
4010          math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2],
4011          summarize=10))
4012  checks.append(
4013      control_flow_ops.Assert(
4014          math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])),
4015          [shape1, shape2],
4016          summarize=10))
4017  return shape1, shape2, checks
4018
4019
4020@tf_export('image.psnr')
4021@dispatch.add_dispatch_support
4022def psnr(a, b, max_val, name=None):
4023  """Returns the Peak Signal-to-Noise Ratio between a and b.
4024
4025  This is intended to be used on signals (or images). Produces a PSNR value for
4026  each image in batch.
4027
4028  The last three dimensions of input are expected to be [height, width, depth].
4029
4030  Example:
4031
4032  ```python
4033      # Read images from file.
4034      im1 = tf.decode_png('path/to/im1.png')
4035      im2 = tf.decode_png('path/to/im2.png')
4036      # Compute PSNR over tf.uint8 Tensors.
4037      psnr1 = tf.image.psnr(im1, im2, max_val=255)
4038
4039      # Compute PSNR over tf.float32 Tensors.
4040      im1 = tf.image.convert_image_dtype(im1, tf.float32)
4041      im2 = tf.image.convert_image_dtype(im2, tf.float32)
4042      psnr2 = tf.image.psnr(im1, im2, max_val=1.0)
4043      # psnr1 and psnr2 both have type tf.float32 and are almost equal.
4044  ```
4045
4046  Args:
4047    a: First set of images.
4048    b: Second set of images.
4049    max_val: The dynamic range of the images (i.e., the difference between the
4050      maximum the and minimum allowed values).
4051    name: Namespace to embed the computation in.
4052
4053  Returns:
4054    The scalar PSNR between a and b. The returned tensor has type `tf.float32`
4055    and shape [batch_size, 1].
4056  """
4057  with ops.name_scope(name, 'PSNR', [a, b]):
4058    # Need to convert the images to float32.  Scale max_val accordingly so that
4059    # PSNR is computed correctly.
4060    max_val = math_ops.cast(max_val, a.dtype)
4061    max_val = convert_image_dtype(max_val, dtypes.float32)
4062    a = convert_image_dtype(a, dtypes.float32)
4063    b = convert_image_dtype(b, dtypes.float32)
4064    mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1])
4065    psnr_val = math_ops.subtract(
4066        20 * math_ops.log(max_val) / math_ops.log(10.0),
4067        np.float32(10 / np.log(10)) * math_ops.log(mse),
4068        name='psnr')
4069
4070    _, _, checks = _verify_compatible_image_shapes(a, b)
4071    with ops.control_dependencies(checks):
4072      return array_ops.identity(psnr_val)
4073
4074
4075def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03):
4076  r"""Helper function for computing SSIM.
4077
4078  SSIM estimates covariances with weighted sums.  The default parameters
4079  use a biased estimate of the covariance:
4080  Suppose `reducer` is a weighted sum, then the mean estimators are
4081    \mu_x = \sum_i w_i x_i,
4082    \mu_y = \sum_i w_i y_i,
4083  where w_i's are the weighted-sum weights, and covariance estimator is
4084    cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
4085  with assumption \sum_i w_i = 1. This covariance estimator is biased, since
4086    E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y).
4087  For SSIM measure with unbiased covariance estimators, pass as `compensation`
4088  argument (1 - \sum_i w_i ^ 2).
4089
4090  Args:
4091    x: First set of images.
4092    y: Second set of images.
4093    reducer: Function that computes 'local' averages from the set of images. For
4094      non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and
4095      for convolutional version, this is usually tf.nn.avg_pool2d or
4096      tf.nn.conv2d with weighted-sum kernel.
4097    max_val: The dynamic range (i.e., the difference between the maximum
4098      possible allowed value and the minimum allowed value).
4099    compensation: Compensation factor. See above.
4100    k1: Default value 0.01
4101    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4102      it would be better if we took the values in the range of 0 < K2 < 0.4).
4103
4104  Returns:
4105    A pair containing the luminance measure, and the contrast-structure measure.
4106  """
4107
4108  c1 = (k1 * max_val)**2
4109  c2 = (k2 * max_val)**2
4110
4111  # SSIM luminance measure is
4112  # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1).
4113  mean0 = reducer(x)
4114  mean1 = reducer(y)
4115  num0 = mean0 * mean1 * 2.0
4116  den0 = math_ops.square(mean0) + math_ops.square(mean1)
4117  luminance = (num0 + c1) / (den0 + c1)
4118
4119  # SSIM contrast-structure measure is
4120  #   (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2).
4121  # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then
4122  #   cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
4123  #          = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j).
4124  num1 = reducer(x * y) * 2.0
4125  den1 = reducer(math_ops.square(x) + math_ops.square(y))
4126  c2 *= compensation
4127  cs = (num1 - num0 + c2) / (den1 - den0 + c2)
4128
4129  # SSIM score is the product of the luminance and contrast-structure measures.
4130  return luminance, cs
4131
4132
4133def _fspecial_gauss(size, sigma):
4134  """Function to mimic the 'fspecial' gaussian MATLAB function."""
4135  size = ops.convert_to_tensor(size, dtypes.int32)
4136  sigma = ops.convert_to_tensor(sigma)
4137
4138  coords = math_ops.cast(math_ops.range(size), sigma.dtype)
4139  coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0
4140
4141  g = math_ops.square(coords)
4142  g *= -0.5 / math_ops.square(sigma)
4143
4144  g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1])
4145  g = array_ops.reshape(g, shape=[1, -1])  # For tf.nn.softmax().
4146  g = nn_ops.softmax(g)
4147  return array_ops.reshape(g, shape=[size, size, 1, 1])
4148
4149
4150def _ssim_per_channel(img1,
4151                      img2,
4152                      max_val=1.0,
4153                      filter_size=11,
4154                      filter_sigma=1.5,
4155                      k1=0.01,
4156                      k2=0.03):
4157  """Computes SSIM index between img1 and img2 per color channel.
4158
4159  This function matches the standard SSIM implementation from:
4160  Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
4161  quality assessment: from error visibility to structural similarity. IEEE
4162  transactions on image processing.
4163
4164  Details:
4165    - 11x11 Gaussian filter of width 1.5 is used.
4166    - k1 = 0.01, k2 = 0.03 as in the original paper.
4167
4168  Args:
4169    img1: First image batch.
4170    img2: Second image batch.
4171    max_val: The dynamic range of the images (i.e., the difference between the
4172      maximum the and minimum allowed values).
4173    filter_size: Default value 11 (size of gaussian filter).
4174    filter_sigma: Default value 1.5 (width of gaussian filter).
4175    k1: Default value 0.01
4176    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4177      it would be better if we took the values in the range of 0 < K2 < 0.4).
4178
4179  Returns:
4180    A pair of tensors containing and channel-wise SSIM and contrast-structure
4181    values. The shape is [..., channels].
4182  """
4183  filter_size = constant_op.constant(filter_size, dtype=dtypes.int32)
4184  filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype)
4185
4186  shape1, shape2 = array_ops.shape_n([img1, img2])
4187  checks = [
4188      control_flow_ops.Assert(
4189          math_ops.reduce_all(
4190              math_ops.greater_equal(shape1[-3:-1], filter_size)),
4191          [shape1, filter_size],
4192          summarize=8),
4193      control_flow_ops.Assert(
4194          math_ops.reduce_all(
4195              math_ops.greater_equal(shape2[-3:-1], filter_size)),
4196          [shape2, filter_size],
4197          summarize=8)
4198  ]
4199
4200  # Enforce the check to run before computation.
4201  with ops.control_dependencies(checks):
4202    img1 = array_ops.identity(img1)
4203
4204  # TODO(sjhwang): Try to cache kernels and compensation factor.
4205  kernel = _fspecial_gauss(filter_size, filter_sigma)
4206  kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1])
4207
4208  # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`,
4209  # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead.
4210  compensation = 1.0
4211
4212  # TODO(sjhwang): Try FFT.
4213  # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying
4214  #   1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter.
4215  def reducer(x):
4216    shape = array_ops.shape(x)
4217    x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0))
4218    y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')
4219    return array_ops.reshape(
4220        y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0))
4221
4222  luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1,
4223                               k2)
4224
4225  # Average over the second and the third from the last: height, width.
4226  axes = constant_op.constant([-3, -2], dtype=dtypes.int32)
4227  ssim_val = math_ops.reduce_mean(luminance * cs, axes)
4228  cs = math_ops.reduce_mean(cs, axes)
4229  return ssim_val, cs
4230
4231
4232@tf_export('image.ssim')
4233@dispatch.add_dispatch_support
4234def ssim(img1,
4235         img2,
4236         max_val,
4237         filter_size=11,
4238         filter_sigma=1.5,
4239         k1=0.01,
4240         k2=0.03):
4241  """Computes SSIM index between img1 and img2.
4242
4243  This function is based on the standard SSIM implementation from:
4244  Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
4245  quality assessment: from error visibility to structural similarity. IEEE
4246  transactions on image processing.
4247
4248  Note: The true SSIM is only defined on grayscale.  This function does not
4249  perform any colorspace transform.  (If the input is already YUV, then it will
4250  compute YUV SSIM average.)
4251
4252  Details:
4253    - 11x11 Gaussian filter of width 1.5 is used.
4254    - k1 = 0.01, k2 = 0.03 as in the original paper.
4255
4256  The image sizes must be at least 11x11 because of the filter size.
4257
4258  Example:
4259
4260  ```python
4261      # Read images (of size 255 x 255) from file.
4262      im1 = tf.image.decode_image(tf.io.read_file('path/to/im1.png'))
4263      im2 = tf.image.decode_image(tf.io.read_file('path/to/im2.png'))
4264      tf.shape(im1)  # `img1.png` has 3 channels; shape is `(255, 255, 3)`
4265      tf.shape(im2)  # `img2.png` has 3 channels; shape is `(255, 255, 3)`
4266      # Add an outer batch for each image.
4267      im1 = tf.expand_dims(im1, axis=0)
4268      im2 = tf.expand_dims(im2, axis=0)
4269      # Compute SSIM over tf.uint8 Tensors.
4270      ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11,
4271                            filter_sigma=1.5, k1=0.01, k2=0.03)
4272
4273      # Compute SSIM over tf.float32 Tensors.
4274      im1 = tf.image.convert_image_dtype(im1, tf.float32)
4275      im2 = tf.image.convert_image_dtype(im2, tf.float32)
4276      ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11,
4277                            filter_sigma=1.5, k1=0.01, k2=0.03)
4278      # ssim1 and ssim2 both have type tf.float32 and are almost equal.
4279  ```
4280
4281  Args:
4282    img1: First image batch. 4-D Tensor of shape `[batch, height, width,
4283      channels]` with only Positive Pixel Values.
4284    img2: Second image batch. 4-D Tensor of shape `[batch, height, width,
4285      channels]` with only Positive Pixel Values.
4286    max_val: The dynamic range of the images (i.e., the difference between the
4287      maximum the and minimum allowed values).
4288    filter_size: Default value 11 (size of gaussian filter).
4289    filter_sigma: Default value 1.5 (width of gaussian filter).
4290    k1: Default value 0.01
4291    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4292      it would be better if we took the values in the range of 0 < K2 < 0.4).
4293
4294  Returns:
4295    A tensor containing an SSIM value for each image in batch.  Returned SSIM
4296    values are in range (-1, 1], when pixel values are non-negative. Returns
4297    a tensor with shape: broadcast(img1.shape[:-3], img2.shape[:-3]).
4298  """
4299  with ops.name_scope(None, 'SSIM', [img1, img2]):
4300    # Convert to tensor if needed.
4301    img1 = ops.convert_to_tensor(img1, name='img1')
4302    img2 = ops.convert_to_tensor(img2, name='img2')
4303    # Shape checking.
4304    _, _, checks = _verify_compatible_image_shapes(img1, img2)
4305    with ops.control_dependencies(checks):
4306      img1 = array_ops.identity(img1)
4307
4308    # Need to convert the images to float32.  Scale max_val accordingly so that
4309    # SSIM is computed correctly.
4310    max_val = math_ops.cast(max_val, img1.dtype)
4311    max_val = convert_image_dtype(max_val, dtypes.float32)
4312    img1 = convert_image_dtype(img1, dtypes.float32)
4313    img2 = convert_image_dtype(img2, dtypes.float32)
4314    ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size,
4315                                            filter_sigma, k1, k2)
4316    # Compute average over color channels.
4317    return math_ops.reduce_mean(ssim_per_channel, [-1])
4318
4319
4320# Default values obtained by Wang et al.
4321_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
4322
4323
4324@tf_export('image.ssim_multiscale')
4325@dispatch.add_dispatch_support
4326def ssim_multiscale(img1,
4327                    img2,
4328                    max_val,
4329                    power_factors=_MSSSIM_WEIGHTS,
4330                    filter_size=11,
4331                    filter_sigma=1.5,
4332                    k1=0.01,
4333                    k2=0.03):
4334  """Computes the MS-SSIM between img1 and img2.
4335
4336  This function assumes that `img1` and `img2` are image batches, i.e. the last
4337  three dimensions are [height, width, channels].
4338
4339  Note: The true SSIM is only defined on grayscale.  This function does not
4340  perform any colorspace transform.  (If the input is already YUV, then it will
4341  compute YUV SSIM average.)
4342
4343  Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale
4344  structural similarity for image quality assessment." Signals, Systems and
4345  Computers, 2004.
4346
4347  Args:
4348    img1: First image batch with only Positive Pixel Values.
4349    img2: Second image batch with only Positive Pixel Values. Must have the
4350    same rank as img1.
4351    max_val: The dynamic range of the images (i.e., the difference between the
4352      maximum the and minimum allowed values).
4353    power_factors: Iterable of weights for each of the scales. The number of
4354      scales used is the length of the list. Index 0 is the unscaled
4355      resolution's weight and each increasing scale corresponds to the image
4356      being downsampled by 2.  Defaults to (0.0448, 0.2856, 0.3001, 0.2363,
4357      0.1333), which are the values obtained in the original paper.
4358    filter_size: Default value 11 (size of gaussian filter).
4359    filter_sigma: Default value 1.5 (width of gaussian filter).
4360    k1: Default value 0.01
4361    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4362      it would be better if we took the values in the range of 0 < K2 < 0.4).
4363
4364  Returns:
4365    A tensor containing an MS-SSIM value for each image in batch.  The values
4366    are in range [0, 1].  Returns a tensor with shape:
4367    broadcast(img1.shape[:-3], img2.shape[:-3]).
4368  """
4369  with ops.name_scope(None, 'MS-SSIM', [img1, img2]):
4370    # Convert to tensor if needed.
4371    img1 = ops.convert_to_tensor(img1, name='img1')
4372    img2 = ops.convert_to_tensor(img2, name='img2')
4373    # Shape checking.
4374    shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2)
4375    with ops.control_dependencies(checks):
4376      img1 = array_ops.identity(img1)
4377
4378    # Need to convert the images to float32.  Scale max_val accordingly so that
4379    # SSIM is computed correctly.
4380    max_val = math_ops.cast(max_val, img1.dtype)
4381    max_val = convert_image_dtype(max_val, dtypes.float32)
4382    img1 = convert_image_dtype(img1, dtypes.float32)
4383    img2 = convert_image_dtype(img2, dtypes.float32)
4384
4385    imgs = [img1, img2]
4386    shapes = [shape1, shape2]
4387
4388    # img1 and img2 are assumed to be a (multi-dimensional) batch of
4389    # 3-dimensional images (height, width, channels). `heads` contain the batch
4390    # dimensions, and `tails` contain the image dimensions.
4391    heads = [s[:-3] for s in shapes]
4392    tails = [s[-3:] for s in shapes]
4393
4394    divisor = [1, 2, 2, 1]
4395    divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32)
4396
4397    def do_pad(images, remainder):
4398      padding = array_ops.expand_dims(remainder, -1)
4399      padding = array_ops.pad(padding, [[1, 0], [1, 0]])
4400      return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images]
4401
4402    mcs = []
4403    for k in range(len(power_factors)):
4404      with ops.name_scope(None, 'Scale%d' % k, imgs):
4405        if k > 0:
4406          # Avg pool takes rank 4 tensors. Flatten leading dimensions.
4407          flat_imgs = [
4408              array_ops.reshape(x, array_ops.concat([[-1], t], 0))
4409              for x, t in zip(imgs, tails)
4410          ]
4411
4412          remainder = tails[0] % divisor_tensor
4413          need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0))
4414          # pylint: disable=cell-var-from-loop
4415          padded = control_flow_ops.cond(need_padding,
4416                                         lambda: do_pad(flat_imgs, remainder),
4417                                         lambda: flat_imgs)
4418          # pylint: enable=cell-var-from-loop
4419
4420          downscaled = [
4421              nn_ops.avg_pool(
4422                  x, ksize=divisor, strides=divisor, padding='VALID')
4423              for x in padded
4424          ]
4425          tails = [x[1:] for x in array_ops.shape_n(downscaled)]
4426          imgs = [
4427              array_ops.reshape(x, array_ops.concat([h, t], 0))
4428              for x, h, t in zip(downscaled, heads, tails)
4429          ]
4430
4431        # Overwrite previous ssim value since we only need the last one.
4432        ssim_per_channel, cs = _ssim_per_channel(
4433            *imgs,
4434            max_val=max_val,
4435            filter_size=filter_size,
4436            filter_sigma=filter_sigma,
4437            k1=k1,
4438            k2=k2)
4439        mcs.append(nn_ops.relu(cs))
4440
4441    # Remove the cs score for the last scale. In the MS-SSIM calculation,
4442    # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p).
4443    mcs.pop()  # Remove the cs score for the last scale.
4444    mcs_and_ssim = array_ops.stack(
4445        mcs + [nn_ops.relu(ssim_per_channel)], axis=-1)
4446    # Take weighted geometric mean across the scale axis.
4447    ms_ssim = math_ops.reduce_prod(
4448        math_ops.pow(mcs_and_ssim, power_factors), [-1])
4449
4450    return math_ops.reduce_mean(ms_ssim, [-1])  # Avg over color channels.
4451
4452
4453@tf_export('image.image_gradients')
4454@dispatch.add_dispatch_support
4455def image_gradients(image):
4456  """Returns image gradients (dy, dx) for each color channel.
4457
4458  Both output tensors have the same shape as the input: [batch_size, h, w,
4459  d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in
4460  location (x, y). That means that dy will always have zeros in the last row,
4461  and dx will always have zeros in the last column.
4462
4463  Usage Example:
4464    ```python
4465    BATCH_SIZE = 1
4466    IMAGE_HEIGHT = 5
4467    IMAGE_WIDTH = 5
4468    CHANNELS = 1
4469    image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS,
4470      delta=1, dtype=tf.float32),
4471      shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
4472    dy, dx = tf.image.image_gradients(image)
4473    print(image[0, :,:,0])
4474    tf.Tensor(
4475      [[ 0.  1.  2.  3.  4.]
4476      [ 5.  6.  7.  8.  9.]
4477      [10. 11. 12. 13. 14.]
4478      [15. 16. 17. 18. 19.]
4479      [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32)
4480    print(dy[0, :,:,0])
4481    tf.Tensor(
4482      [[5. 5. 5. 5. 5.]
4483      [5. 5. 5. 5. 5.]
4484      [5. 5. 5. 5. 5.]
4485      [5. 5. 5. 5. 5.]
4486      [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32)
4487    print(dx[0, :,:,0])
4488    tf.Tensor(
4489      [[1. 1. 1. 1. 0.]
4490      [1. 1. 1. 1. 0.]
4491      [1. 1. 1. 1. 0.]
4492      [1. 1. 1. 1. 0.]
4493      [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32)
4494    ```
4495
4496  Args:
4497    image: Tensor with shape [batch_size, h, w, d].
4498
4499  Returns:
4500    Pair of tensors (dy, dx) holding the vertical and horizontal image
4501    gradients (1-step finite difference).
4502
4503  Raises:
4504    ValueError: If `image` is not a 4D tensor.
4505  """
4506  if image.get_shape().ndims != 4:
4507    raise ValueError('image_gradients expects a 4D tensor '
4508                     '[batch_size, h, w, d], not {}.'.format(image.get_shape()))
4509  image_shape = array_ops.shape(image)
4510  batch_size, height, width, depth = array_ops.unstack(image_shape)
4511  dy = image[:, 1:, :, :] - image[:, :-1, :, :]
4512  dx = image[:, :, 1:, :] - image[:, :, :-1, :]
4513
4514  # Return tensors with same size as original image by concatenating
4515  # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y).
4516  shape = array_ops.stack([batch_size, 1, width, depth])
4517  dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1)
4518  dy = array_ops.reshape(dy, image_shape)
4519
4520  shape = array_ops.stack([batch_size, height, 1, depth])
4521  dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2)
4522  dx = array_ops.reshape(dx, image_shape)
4523
4524  return dy, dx
4525
4526
4527@tf_export('image.sobel_edges')
4528@dispatch.add_dispatch_support
4529def sobel_edges(image):
4530  """Returns a tensor holding Sobel edge maps.
4531
4532  Example usage:
4533
4534  For general usage, `image` would be loaded from a file as below:
4535
4536  ```python
4537  image_bytes = tf.io.read_file(path_to_image_file)
4538  image = tf.image.decode_image(image_bytes)
4539  image = tf.cast(image, tf.float32)
4540  image = tf.expand_dims(image, 0)
4541  ```
4542  But for demo purposes, we are using randomly generated values for `image`:
4543
4544  >>> image = tf.random.uniform(
4545  ...   maxval=255, shape=[1, 28, 28, 3], dtype=tf.float32)
4546  >>> sobel = tf.image.sobel_edges(image)
4547  >>> sobel_y = np.asarray(sobel[0, :, :, :, 0]) # sobel in y-direction
4548  >>> sobel_x = np.asarray(sobel[0, :, :, :, 1]) # sobel in x-direction
4549
4550  For displaying the sobel results, PIL's [Image Module](
4551  https://pillow.readthedocs.io/en/stable/reference/Image.html) can be used:
4552
4553  ```python
4554  # Display edge maps for the first channel (at index 0)
4555  Image.fromarray(sobel_y[..., 0] / 4 + 0.5).show()
4556  Image.fromarray(sobel_x[..., 0] / 4 + 0.5).show()
4557  ```
4558
4559  Args:
4560    image: Image tensor with shape [batch_size, h, w, d] and type float32 or
4561      float64.  The image(s) must be 2x2 or larger.
4562
4563  Returns:
4564    Tensor holding edge maps for each channel. Returns a tensor with shape
4565    [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]],
4566    [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter.
4567  """
4568  # Define vertical and horizontal Sobel filters.
4569  static_image_shape = image.get_shape()
4570  image_shape = array_ops.shape(image)
4571  kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]],
4572             [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]]
4573  num_kernels = len(kernels)
4574  kernels = np.transpose(np.asarray(kernels), (1, 2, 0))
4575  kernels = np.expand_dims(kernels, -2)
4576  kernels_tf = constant_op.constant(kernels, dtype=image.dtype)
4577
4578  kernels_tf = array_ops.tile(
4579      kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters')
4580
4581  # Use depth-wise convolution to calculate edge maps per channel.
4582  pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]]
4583  padded = array_ops.pad(image, pad_sizes, mode='REFLECT')
4584
4585  # Output tensor has shape [batch_size, h, w, d * num_kernels].
4586  strides = [1, 1, 1, 1]
4587  output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID')
4588
4589  # Reshape to [batch_size, h, w, d, num_kernels].
4590  shape = array_ops.concat([image_shape, [num_kernels]], 0)
4591  output = array_ops.reshape(output, shape=shape)
4592  output.set_shape(static_image_shape.concatenate([num_kernels]))
4593  return output
4594
4595
4596def resize_bicubic(images,
4597                   size,
4598                   align_corners=False,
4599                   name=None,
4600                   half_pixel_centers=False):
4601  return gen_image_ops.resize_bicubic(
4602      images=images,
4603      size=size,
4604      align_corners=align_corners,
4605      half_pixel_centers=half_pixel_centers,
4606      name=name)
4607
4608
4609def resize_bilinear(images,
4610                    size,
4611                    align_corners=False,
4612                    name=None,
4613                    half_pixel_centers=False):
4614  return gen_image_ops.resize_bilinear(
4615      images=images,
4616      size=size,
4617      align_corners=align_corners,
4618      half_pixel_centers=half_pixel_centers,
4619      name=name)
4620
4621
4622def resize_nearest_neighbor(images,
4623                            size,
4624                            align_corners=False,
4625                            name=None,
4626                            half_pixel_centers=False):
4627  return gen_image_ops.resize_nearest_neighbor(
4628      images=images,
4629      size=size,
4630      align_corners=align_corners,
4631      half_pixel_centers=half_pixel_centers,
4632      name=name)
4633
4634
4635resize_area_deprecation = deprecation.deprecated(
4636    date=None,
4637    instructions=(
4638        'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.'))
4639tf_export(v1=['image.resize_area'])(
4640    resize_area_deprecation(
4641        dispatch.add_dispatch_support(gen_image_ops.resize_area)))
4642
4643resize_bicubic_deprecation = deprecation.deprecated(
4644    date=None,
4645    instructions=(
4646        'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.'))
4647tf_export(v1=['image.resize_bicubic'])(
4648    dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic)))
4649
4650resize_bilinear_deprecation = deprecation.deprecated(
4651    date=None,
4652    instructions=(
4653        'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.'))
4654tf_export(v1=['image.resize_bilinear'])(
4655    dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear)))
4656
4657resize_nearest_neighbor_deprecation = deprecation.deprecated(
4658    date=None,
4659    instructions=(
4660        'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` '
4661        'instead.'))
4662tf_export(v1=['image.resize_nearest_neighbor'])(
4663    dispatch.add_dispatch_support(
4664        resize_nearest_neighbor_deprecation(resize_nearest_neighbor)))
4665
4666
4667@tf_export('image.crop_and_resize', v1=[])
4668@dispatch.add_dispatch_support
4669def crop_and_resize_v2(image,
4670                       boxes,
4671                       box_indices,
4672                       crop_size,
4673                       method='bilinear',
4674                       extrapolation_value=0,
4675                       name=None):
4676  """Extracts crops from the input image tensor and resizes them.
4677
4678  Extracts crops from the input image tensor and resizes them using bilinear
4679  sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
4680  common output size specified by `crop_size`. This is more general than the
4681  `crop_to_bounding_box` op which extracts a fixed size slice from the input
4682  image and does not allow resizing or aspect ratio change.
4683
4684  Returns a tensor with `crops` from the input `image` at positions defined at
4685  the bounding box locations in `boxes`. The cropped boxes are all resized (with
4686  bilinear or nearest neighbor interpolation) to a fixed
4687  `size = [crop_height, crop_width]`. The result is a 4-D tensor
4688  `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
4689  In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
4690  results to using `tf.compat.v1.image.resize_bilinear()` or
4691  `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method`
4692  argument) with
4693  `align_corners=True`.
4694
4695  Args:
4696    image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
4697      Both `image_height` and `image_width` need to be positive.
4698    boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
4699      specifies the coordinates of a box in the `box_ind[i]` image and is
4700      specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
4701      coordinate value of `y` is mapped to the image coordinate at `y *
4702      (image_height - 1)`, so as the `[0, 1]` interval of normalized image
4703      height is mapped to `[0, image_height - 1]` in image height coordinates.
4704      We do allow `y1` > `y2`, in which case the sampled crop is an up-down
4705      flipped version of the original image. The width dimension is treated
4706      similarly. Normalized coordinates outside the `[0, 1]` range are allowed,
4707      in which case we use `extrapolation_value` to extrapolate the input image
4708      values.
4709    box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0,
4710      batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box
4711      refers to.
4712    crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`.
4713      All cropped image patches are resized to this size. The aspect ratio of
4714      the image content is not preserved. Both `crop_height` and `crop_width`
4715      need to be positive.
4716    method: An optional string specifying the sampling method for resizing. It
4717      can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`.
4718      Currently two sampling methods are supported: Bilinear and Nearest
4719        Neighbor.
4720    extrapolation_value: An optional `float`. Defaults to `0`. Value used for
4721      extrapolation, when applicable.
4722    name: A name for the operation (optional).
4723
4724  Returns:
4725    A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
4726
4727  Example:
4728
4729  ```python
4730  import tensorflow as tf
4731  BATCH_SIZE = 1
4732  NUM_BOXES = 5
4733  IMAGE_HEIGHT = 256
4734  IMAGE_WIDTH = 256
4735  CHANNELS = 3
4736  CROP_SIZE = (24, 24)
4737
4738  image = tf.random.normal(shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH,
4739  CHANNELS) )
4740  boxes = tf.random.uniform(shape=(NUM_BOXES, 4))
4741  box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0,
4742  maxval=BATCH_SIZE, dtype=tf.int32)
4743  output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE)
4744  output.shape  #=> (5, 24, 24, 3)
4745  ```
4746  """
4747  return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size,
4748                                       method, extrapolation_value, name)
4749
4750
4751@tf_export(v1=['image.crop_and_resize'])
4752@dispatch.add_dispatch_support
4753@deprecation.deprecated_args(None,
4754                             'box_ind is deprecated, use box_indices instead',
4755                             'box_ind')
4756def crop_and_resize_v1(  # pylint: disable=missing-docstring
4757    image,
4758    boxes,
4759    box_ind=None,
4760    crop_size=None,
4761    method='bilinear',
4762    extrapolation_value=0,
4763    name=None,
4764    box_indices=None):
4765  box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices,
4766                                                   'box_ind', box_ind)
4767  return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method,
4768                                       extrapolation_value, name)
4769
4770
4771crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__
4772
4773
4774@tf_export(v1=['image.extract_glimpse'])
4775@dispatch.add_dispatch_support
4776def extract_glimpse(
4777    input,  # pylint: disable=redefined-builtin
4778    size,
4779    offsets,
4780    centered=True,
4781    normalized=True,
4782    uniform_noise=True,
4783    name=None):
4784  """Extracts a glimpse from the input tensor.
4785
4786  Returns a set of windows called glimpses extracted at location
4787  `offsets` from the input tensor. If the windows only partially
4788  overlaps the inputs, the non-overlapping areas will be filled with
4789  random noise.
4790
4791  The result is a 4-D tensor of shape `[batch_size, glimpse_height,
4792  glimpse_width, channels]`. The channels and batch dimensions are the
4793  same as that of the input tensor. The height and width of the output
4794  windows are specified in the `size` parameter.
4795
4796  The argument `normalized` and `centered` controls how the windows are built:
4797
4798  * If the coordinates are normalized but not centered, 0.0 and 1.0
4799    correspond to the minimum and maximum of each height and width
4800    dimension.
4801  * If the coordinates are both normalized and centered, they range from
4802    -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
4803    left corner, the lower right corner is located at (1.0, 1.0) and the
4804    center is at (0, 0).
4805  * If the coordinates are not normalized they are interpreted as
4806    numbers of pixels.
4807
4808  Usage Example:
4809
4810  >>> x = [[[[0.0],
4811  ...           [1.0],
4812  ...           [2.0]],
4813  ...          [[3.0],
4814  ...           [4.0],
4815  ...           [5.0]],
4816  ...          [[6.0],
4817  ...           [7.0],
4818  ...           [8.0]]]]
4819  >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],
4820  ...                                    centered=False, normalized=False)
4821  <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
4822  array([[[[0.],
4823           [1.]],
4824          [[3.],
4825           [4.]]]], dtype=float32)>
4826
4827  Args:
4828    input: A `Tensor` of type `float32`. A 4-D float tensor of shape
4829      `[batch_size, height, width, channels]`.
4830    size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
4831      size of the glimpses to extract.  The glimpse height must be specified
4832      first, following by the glimpse width.
4833    offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
4834      `[batch_size, 2]` containing the y, x locations of the center of each
4835      window.
4836    centered: An optional `bool`. Defaults to `True`. indicates if the offset
4837      coordinates are centered relative to the image, in which case the (0, 0)
4838      offset is relative to the center of the input images. If false, the (0,0)
4839      offset corresponds to the upper left corner of the input images.
4840    normalized: An optional `bool`. Defaults to `True`. indicates if the offset
4841      coordinates are normalized.
4842    uniform_noise: An optional `bool`. Defaults to `True`. indicates if the
4843      noise should be generated using a uniform distribution or a Gaussian
4844      distribution.
4845    name: A name for the operation (optional).
4846
4847  Returns:
4848    A `Tensor` of type `float32`.
4849  """
4850  return gen_image_ops.extract_glimpse(
4851      input=input,
4852      size=size,
4853      offsets=offsets,
4854      centered=centered,
4855      normalized=normalized,
4856      uniform_noise=uniform_noise,
4857      name=name)
4858
4859
4860@tf_export('image.extract_glimpse', v1=[])
4861@dispatch.add_dispatch_support
4862def extract_glimpse_v2(
4863    input,  # pylint: disable=redefined-builtin
4864    size,
4865    offsets,
4866    centered=True,
4867    normalized=True,
4868    noise='uniform',
4869    name=None):
4870  """Extracts a glimpse from the input tensor.
4871
4872  Returns a set of windows called glimpses extracted at location
4873  `offsets` from the input tensor. If the windows only partially
4874  overlaps the inputs, the non-overlapping areas will be filled with
4875  random noise.
4876
4877  The result is a 4-D tensor of shape `[batch_size, glimpse_height,
4878  glimpse_width, channels]`. The channels and batch dimensions are the
4879  same as that of the input tensor. The height and width of the output
4880  windows are specified in the `size` parameter.
4881
4882  The argument `normalized` and `centered` controls how the windows are built:
4883
4884  * If the coordinates are normalized but not centered, 0.0 and 1.0
4885    correspond to the minimum and maximum of each height and width
4886    dimension.
4887  * If the coordinates are both normalized and centered, they range from
4888    -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
4889    left corner, the lower right corner is located at (1.0, 1.0) and the
4890    center is at (0, 0).
4891  * If the coordinates are not normalized they are interpreted as
4892    numbers of pixels.
4893
4894  Usage Example:
4895
4896  >>> x = [[[[0.0],
4897  ...           [1.0],
4898  ...           [2.0]],
4899  ...          [[3.0],
4900  ...           [4.0],
4901  ...           [5.0]],
4902  ...          [[6.0],
4903  ...           [7.0],
4904  ...           [8.0]]]]
4905  >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],
4906  ...                         centered=False, normalized=False)
4907  <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
4908  array([[[[4.],
4909           [5.]],
4910          [[7.],
4911           [8.]]]], dtype=float32)>
4912
4913  Args:
4914    input: A `Tensor` of type `float32`. A 4-D float tensor of shape
4915      `[batch_size, height, width, channels]`.
4916    size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
4917      size of the glimpses to extract.  The glimpse height must be specified
4918      first, following by the glimpse width.
4919    offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
4920      `[batch_size, 2]` containing the y, x locations of the center of each
4921      window.
4922    centered: An optional `bool`. Defaults to `True`. indicates if the offset
4923      coordinates are centered relative to the image, in which case the (0, 0)
4924      offset is relative to the center of the input images. If false, the (0,0)
4925      offset corresponds to the upper left corner of the input images.
4926    normalized: An optional `bool`. Defaults to `True`. indicates if the offset
4927      coordinates are normalized.
4928    noise: An optional `string`. Defaults to `uniform`. indicates if the noise
4929      should be `uniform` (uniform distribution), `gaussian` (gaussian
4930      distribution), or `zero` (zero padding).
4931    name: A name for the operation (optional).
4932
4933  Returns:
4934    A `Tensor` of type `float32`.
4935  """
4936  return gen_image_ops.extract_glimpse_v2(
4937      input=input,
4938      size=size,
4939      offsets=offsets,
4940      centered=centered,
4941      normalized=normalized,
4942      noise=noise,
4943      uniform_noise=False,
4944      name=name)
4945
4946
4947@tf_export('image.combined_non_max_suppression')
4948@dispatch.add_dispatch_support
4949def combined_non_max_suppression(boxes,
4950                                 scores,
4951                                 max_output_size_per_class,
4952                                 max_total_size,
4953                                 iou_threshold=0.5,
4954                                 score_threshold=float('-inf'),
4955                                 pad_per_class=False,
4956                                 clip_boxes=True,
4957                                 name=None):
4958  """Greedily selects a subset of bounding boxes in descending order of score.
4959
4960  This operation performs non_max_suppression on the inputs per batch, across
4961  all classes.
4962  Prunes away boxes that have high intersection-over-union (IOU) overlap
4963  with previously selected boxes.  Bounding boxes are supplied as
4964  [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
4965  diagonal pair of box corners and the coordinates can be provided as normalized
4966  (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
4967  is agnostic to where the origin is in the coordinate system. Also note that
4968  this algorithm is invariant to orthogonal transformations and translations
4969  of the coordinate system; thus translating or reflections of the coordinate
4970  system result in the same boxes being selected by the algorithm.
4971  The output of this operation is the final boxes, scores and classes tensor
4972  returned after performing non_max_suppression.
4973
4974  Args:
4975    boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q`
4976      is 1 then same boxes are used for all classes otherwise, if `q` is equal
4977      to number of classes, class-specific boxes are used.
4978    scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]`
4979      representing a single score corresponding to each box (each row of boxes).
4980    max_output_size_per_class: A scalar integer `Tensor` representing the
4981      maximum number of boxes to be selected by non-max suppression per class
4982    max_total_size: A int32 scalar representing maximum number of boxes retained
4983      over all classes. Note that setting this value to a large number may
4984      result in OOM error depending on the system workload.
4985    iou_threshold: A float representing the threshold for deciding whether boxes
4986      overlap too much with respect to IOU.
4987    score_threshold: A float representing the threshold for deciding when to
4988      remove boxes based on score.
4989    pad_per_class: If false, the output nmsed boxes, scores and classes are
4990      padded/clipped to `max_total_size`. If true, the output nmsed boxes,
4991      scores and classes are padded to be of length
4992      `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in
4993      which case it is clipped to `max_total_size`. Defaults to false.
4994    clip_boxes: If true, the coordinates of output nmsed boxes will be clipped
4995      to [0, 1]. If false, output the box coordinates as it is. Defaults to
4996      true.
4997    name: A name for the operation (optional).
4998
4999  Returns:
5000    'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
5001      containing the non-max suppressed boxes.
5002    'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
5003      the scores for the boxes.
5004    'nmsed_classes': A [batch_size, max_detections] float32 tensor
5005      containing the class for boxes.
5006    'valid_detections': A [batch_size] int32 tensor indicating the number of
5007      valid detections per batch item. Only the top valid_detections[i] entries
5008      in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
5009      entries are zero paddings.
5010  """
5011  with ops.name_scope(name, 'combined_non_max_suppression'):
5012    iou_threshold = ops.convert_to_tensor(
5013        iou_threshold, dtype=dtypes.float32, name='iou_threshold')
5014    score_threshold = ops.convert_to_tensor(
5015        score_threshold, dtype=dtypes.float32, name='score_threshold')
5016
5017    # Convert `max_total_size` to tensor *without* setting the `dtype` param.
5018    # This allows us to catch `int32` overflow case with `max_total_size`
5019    # whose expected dtype is `int32` by the op registration. Any number within
5020    # `int32` will get converted to `int32` tensor. Anything larger will get
5021    # converted to `int64`. Passing in `int64` for `max_total_size` to the op
5022    # will throw dtype mismatch exception.
5023    # TODO(b/173251596): Once there is a more general solution to warn against
5024    # int overflow conversions, revisit this check.
5025    max_total_size = ops.convert_to_tensor(max_total_size)
5026
5027    return gen_image_ops.combined_non_max_suppression(
5028        boxes, scores, max_output_size_per_class, max_total_size, iou_threshold,
5029        score_threshold, pad_per_class, clip_boxes)
5030
5031
5032def _bbox_overlap(boxes_a, boxes_b):
5033  """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b.
5034
5035  Args:
5036    boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of
5037      boxes per image. The last dimension is the pixel coordinates in
5038      [ymin, xmin, ymax, xmax] form.
5039    boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of
5040      boxes. The last dimension is the pixel coordinates in
5041      [ymin, xmin, ymax, xmax] form.
5042  Returns:
5043    intersection_over_union: a tensor with as a shape of [batch_size, N, M],
5044    representing the ratio of intersection area over union area (IoU) between
5045    two boxes
5046  """
5047  with ops.name_scope('bbox_overlap'):
5048    a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split(
5049        value=boxes_a, num_or_size_splits=4, axis=2)
5050    b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split(
5051        value=boxes_b, num_or_size_splits=4, axis=2)
5052
5053    # Calculates the intersection area.
5054    i_xmin = math_ops.maximum(
5055        a_x_min, array_ops.transpose(b_x_min, [0, 2, 1]))
5056    i_xmax = math_ops.minimum(
5057        a_x_max, array_ops.transpose(b_x_max, [0, 2, 1]))
5058    i_ymin = math_ops.maximum(
5059        a_y_min, array_ops.transpose(b_y_min, [0, 2, 1]))
5060    i_ymax = math_ops.minimum(
5061        a_y_max, array_ops.transpose(b_y_max, [0, 2, 1]))
5062    i_area = math_ops.maximum(
5063        (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0)
5064
5065    # Calculates the union area.
5066    a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min)
5067    b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min)
5068    EPSILON = 1e-8
5069    # Adds a small epsilon to avoid divide-by-zero.
5070    u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON
5071
5072    # Calculates IoU.
5073    intersection_over_union = i_area / u_area
5074
5075    return intersection_over_union
5076
5077
5078def _self_suppression(iou, _, iou_sum, iou_threshold):
5079  """Suppress boxes in the same tile.
5080
5081     Compute boxes that cannot be suppressed by others (i.e.,
5082     can_suppress_others), and then use them to suppress boxes in the same tile.
5083
5084  Args:
5085    iou: a tensor of shape [batch_size, num_boxes_with_padding] representing
5086    intersection over union.
5087    iou_sum: a scalar tensor.
5088    iou_threshold: a scalar tensor.
5089
5090  Returns:
5091    iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding].
5092    iou_diff: a scalar tensor representing whether any box is supressed in
5093      this step.
5094    iou_sum_new: a scalar tensor of shape [batch_size] that represents
5095      the iou sum after suppression.
5096    iou_threshold: a scalar tensor.
5097  """
5098  batch_size = array_ops.shape(iou)[0]
5099  can_suppress_others = math_ops.cast(
5100      array_ops.reshape(
5101          math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]),
5102      iou.dtype)
5103  iou_after_suppression = array_ops.reshape(
5104      math_ops.cast(
5105          math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold,
5106          iou.dtype),
5107      [batch_size, -1, 1]) * iou
5108  iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2])
5109  return [
5110      iou_after_suppression,
5111      math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new,
5112      iou_threshold
5113  ]
5114
5115
5116def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size):
5117  """Suppress boxes between different tiles.
5118
5119  Args:
5120    boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4]
5121    box_slice: a tensor of shape [batch_size, tile_size, 4]
5122    iou_threshold: a scalar tensor
5123    inner_idx: a scalar tensor representing the tile index of the tile
5124      that is used to supress box_slice
5125    tile_size: an integer representing the number of boxes in a tile
5126
5127  Returns:
5128    boxes: unchanged boxes as input
5129    box_slice_after_suppression: box_slice after suppression
5130    iou_threshold: unchanged
5131  """
5132  batch_size = array_ops.shape(boxes)[0]
5133  new_slice = array_ops.slice(
5134      boxes, [0, inner_idx * tile_size, 0],
5135      [batch_size, tile_size, 4])
5136  iou = _bbox_overlap(new_slice, box_slice)
5137  box_slice_after_suppression = array_ops.expand_dims(
5138      math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]),
5139                    box_slice.dtype),
5140      2) * box_slice
5141  return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1
5142
5143
5144def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size):
5145  """Process boxes in the range [idx*tile_size, (idx+1)*tile_size).
5146
5147  Args:
5148    boxes: a tensor with a shape of [batch_size, anchors, 4].
5149    iou_threshold: a float representing the threshold for deciding whether boxes
5150      overlap too much with respect to IOU.
5151    output_size: an int32 tensor of size [batch_size]. Representing the number
5152      of selected boxes for each batch.
5153    idx: an integer scalar representing induction variable.
5154    tile_size: an integer representing the number of boxes in a tile
5155
5156  Returns:
5157    boxes: updated boxes.
5158    iou_threshold: pass down iou_threshold to the next iteration.
5159    output_size: the updated output_size.
5160    idx: the updated induction variable.
5161  """
5162  with ops.name_scope('suppression_loop_body'):
5163    num_tiles = array_ops.shape(boxes)[1] // tile_size
5164    batch_size = array_ops.shape(boxes)[0]
5165
5166    def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx):
5167      return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx,
5168                                tile_size)
5169
5170    # Iterates over tiles that can possibly suppress the current tile.
5171    box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0],
5172                                [batch_size, tile_size, 4])
5173    _, box_slice, _, _ = control_flow_ops.while_loop(
5174        lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
5175        cross_suppression_func,
5176        [boxes, box_slice, iou_threshold, constant_op.constant(0)])
5177
5178    # Iterates over the current tile to compute self-suppression.
5179    iou = _bbox_overlap(box_slice, box_slice)
5180    mask = array_ops.expand_dims(
5181        array_ops.reshape(
5182            math_ops.range(tile_size), [1, -1]) > array_ops.reshape(
5183                math_ops.range(tile_size), [-1, 1]), 0)
5184    iou *= math_ops.cast(
5185        math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype)
5186    suppressed_iou, _, _, _ = control_flow_ops.while_loop(
5187        lambda _iou, loop_condition, _iou_sum, _: loop_condition,
5188        _self_suppression,
5189        [iou, constant_op.constant(True), math_ops.reduce_sum(iou, [1, 2]),
5190         iou_threshold])
5191    suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0
5192    box_slice *= array_ops.expand_dims(
5193        1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2)
5194
5195    # Uses box_slice to update the input boxes.
5196    mask = array_ops.reshape(
5197        math_ops.cast(
5198            math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype),
5199        [1, -1, 1, 1])
5200    boxes = array_ops.tile(array_ops.expand_dims(
5201        box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape(
5202            boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask)
5203    boxes = array_ops.reshape(boxes, [batch_size, -1, 4])
5204
5205    # Updates output_size.
5206    output_size += math_ops.reduce_sum(
5207        math_ops.cast(
5208            math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1])
5209  return boxes, iou_threshold, output_size, idx + 1
5210
5211
5212@tf_export('image.non_max_suppression_padded')
5213@dispatch.add_dispatch_support
5214def non_max_suppression_padded(boxes,
5215                               scores,
5216                               max_output_size,
5217                               iou_threshold=0.5,
5218                               score_threshold=float('-inf'),
5219                               pad_to_max_output_size=False,
5220                               name=None,
5221                               sorted_input=False,
5222                               canonicalized_coordinates=False,
5223                               tile_size=512):
5224  """Greedily selects a subset of bounding boxes in descending order of score.
5225
5226  Performs algorithmically equivalent operation to tf.image.non_max_suppression,
5227  with the addition of an optional parameter which zero-pads the output to
5228  be of size `max_output_size`.
5229  The output of this operation is a tuple containing the set of integers
5230  indexing into the input collection of bounding boxes representing the selected
5231  boxes and the number of valid indices in the index set.  The bounding box
5232  coordinates corresponding to the selected indices can then be obtained using
5233  the `tf.slice` and `tf.gather` operations.  For example:
5234    ```python
5235    selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(
5236        boxes, scores, max_output_size, iou_threshold,
5237        score_threshold, pad_to_max_output_size=True)
5238    selected_indices = tf.slice(
5239        selected_indices_padded, tf.constant([0]), num_valid)
5240    selected_boxes = tf.gather(boxes, selected_indices)
5241    ```
5242
5243  Args:
5244    boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].
5245      Dimensions except the last two are batch dimensions.
5246    scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].
5247    max_output_size: a scalar integer `Tensor` representing the maximum number
5248      of boxes to be selected by non max suppression. Note that setting this
5249      value to a large number may result in OOM error depending on the system
5250      workload.
5251    iou_threshold: a float representing the threshold for deciding whether boxes
5252      overlap too much with respect to IoU (intersection over union).
5253    score_threshold: a float representing the threshold for box scores. Boxes
5254      with a score that is not larger than this threshold will be suppressed.
5255    pad_to_max_output_size: whether to pad the output idx to max_output_size.
5256      Must be set to True when the input is a batch of images.
5257    name: name of operation.
5258    sorted_input: a boolean indicating whether the input boxes and scores
5259      are sorted in descending order by the score.
5260    canonicalized_coordinates: if box coordinates are given as
5261    `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant
5262     computation to canonicalize box coordinates.
5263    tile_size: an integer representing the number of boxes in a tile, i.e.,
5264      the maximum number of boxes per image that can be used to suppress other
5265      boxes in parallel; larger tile_size means larger parallelism and
5266      potentially more redundant work.
5267  Returns:
5268    idx: a tensor with a shape of [..., num_boxes] representing the
5269      indices selected by non-max suppression. The leading dimensions
5270      are the batch dimensions of the input boxes. All numbers are within
5271      [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]
5272      indices (i.e., idx[i][:num_valid[i]]) are valid.
5273    num_valid: a tensor of rank 0 or higher with a shape of [...]
5274      representing the number of valid indices in idx. Its dimensions are the
5275      batch dimensions of the input boxes.
5276   Raises:
5277    ValueError: When set pad_to_max_output_size to False for batched input.
5278  """
5279  with ops.name_scope(name, 'non_max_suppression_padded'):
5280    if not pad_to_max_output_size:
5281      # pad_to_max_output_size may be set to False only when the shape of
5282      # boxes is [num_boxes, 4], i.e., a single image. We make best effort to
5283      # detect violations at compile time. If `boxes` does not have a static
5284      # rank, the check allows computation to proceed.
5285      if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2:
5286        raise ValueError("'pad_to_max_output_size' (value {}) must be True for "
5287                         'batched input'.format(pad_to_max_output_size))
5288    if name is None:
5289      name = ''
5290    idx, num_valid = non_max_suppression_padded_v2(
5291        boxes, scores, max_output_size, iou_threshold, score_threshold,
5292        sorted_input, canonicalized_coordinates, tile_size)
5293    # def_function.function seems to lose shape information, so set it here.
5294    if not pad_to_max_output_size:
5295      idx = idx[0, :num_valid]
5296    else:
5297      batch_dims = array_ops.concat([
5298          array_ops.shape(boxes)[:-2],
5299          array_ops.expand_dims(max_output_size, 0)
5300      ], 0)
5301      idx = array_ops.reshape(idx, batch_dims)
5302    return idx, num_valid
5303
5304
5305# TODO(b/158709815): Improve performance regression due to
5306# def_function.function.
5307@def_function.function(
5308    experimental_implements='non_max_suppression_padded_v2')
5309def non_max_suppression_padded_v2(boxes,
5310                                  scores,
5311                                  max_output_size,
5312                                  iou_threshold=0.5,
5313                                  score_threshold=float('-inf'),
5314                                  sorted_input=False,
5315                                  canonicalized_coordinates=False,
5316                                  tile_size=512):
5317  """Non-maximum suppression.
5318
5319  Prunes away boxes that have high intersection-over-union (IOU) overlap
5320  with previously selected boxes. Bounding boxes are supplied as
5321  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
5322  diagonal pair of box corners and the coordinates can be provided as normalized
5323  (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box
5324  coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`,
5325  where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower
5326  left and upper right corner. User may indiciate the input box coordinates are
5327  already canonicalized to eliminate redundant work by setting
5328  canonicalized_coordinates to `True`. Note that this algorithm is agnostic to
5329  where the origin is in the coordinate system. Note that this algorithm is
5330  invariant to orthogonal transformations and translations of the coordinate
5331  system; thus translating or reflections of the coordinate system result in the
5332  same boxes being selected by the algorithm.
5333
5334  Similar to tf.image.non_max_suppression, non_max_suppression_padded
5335  implements hard NMS but can operate on a batch of images and improves
5336  performance by titling the bounding boxes. Non_max_suppression_padded should
5337  be preferred over tf.image_non_max_suppression when running on devices with
5338  abundant parallelsim for higher computation speed. For soft NMS, refer to
5339  tf.image.non_max_suppression_with_scores.
5340
5341  While a serial NMS algorithm iteratively uses the highest-scored unprocessed
5342  box to suppress boxes, this algorithm uses many boxes to suppress other boxes
5343  in parallel. The key idea is to partition boxes into tiles based on their
5344  score and suppresses boxes tile by tile, thus achieving parallelism within a
5345  tile. The tile size determines the degree of parallelism.
5346
5347  In cross suppression (using boxes of tile A to suppress boxes of tile B),
5348  all boxes in A can independently suppress boxes in B.
5349
5350  Self suppression (suppressing boxes of the same tile) needs to be iteratively
5351  applied until there's no more suppression. In each iteration, boxes that
5352  cannot be suppressed are used to suppress boxes in the same tile.
5353
5354  boxes = boxes.pad_to_multiply_of(tile_size)
5355  num_tiles = len(boxes) // tile_size
5356  output_boxes = []
5357  for i in range(num_tiles):
5358    box_tile = boxes[i*tile_size : (i+1)*tile_size]
5359    for j in range(i - 1):
5360      # in parallel suppress boxes in box_tile using boxes from suppressing_tile
5361      suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
5362      iou = _bbox_overlap(box_tile, suppressing_tile)
5363      # if the box is suppressed in iou, clear it to a dot
5364      box_tile *= _update_boxes(iou)
5365    # Iteratively handle the diagnal tile.
5366    iou = _box_overlap(box_tile, box_tile)
5367    iou_changed = True
5368    while iou_changed:
5369      # boxes that are not suppressed by anything else
5370      suppressing_boxes = _get_suppressing_boxes(iou)
5371      # boxes that are suppressed by suppressing_boxes
5372      suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
5373      # clear iou to 0 for boxes that are suppressed, as they cannot be used
5374      # to suppress other boxes any more
5375      new_iou = _clear_iou(iou, suppressed_boxes)
5376      iou_changed = (new_iou != iou)
5377      iou = new_iou
5378    # remaining boxes that can still suppress others, are selected boxes.
5379    output_boxes.append(_get_suppressing_boxes(iou))
5380    if len(output_boxes) >= max_output_size:
5381      break
5382
5383  Args:
5384    boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].
5385      Dimensions except the last two are batch dimensions. The last dimension
5386      represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates
5387      on each dimension can be given in any order
5388      (see also `canonicalized_coordinates`) but must describe a box with
5389      a positive area.
5390    scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].
5391    max_output_size: a scalar integer `Tensor` representing the maximum number
5392      of boxes to be selected by non max suppression.
5393    iou_threshold: a float representing the threshold for deciding whether boxes
5394      overlap too much with respect to IoU (intersection over union).
5395    score_threshold: a float representing the threshold for box scores. Boxes
5396      with a score that is not larger than this threshold will be suppressed.
5397    sorted_input: a boolean indicating whether the input boxes and scores
5398      are sorted in descending order by the score.
5399    canonicalized_coordinates: if box coordinates are given as
5400    `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant
5401     computation to canonicalize box coordinates.
5402    tile_size: an integer representing the number of boxes in a tile, i.e.,
5403      the maximum number of boxes per image that can be used to suppress other
5404      boxes in parallel; larger tile_size means larger parallelism and
5405      potentially more redundant work.
5406  Returns:
5407    idx: a tensor with a shape of [..., num_boxes] representing the
5408      indices selected by non-max suppression. The leading dimensions
5409      are the batch dimensions of the input boxes. All numbers are within
5410      [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]
5411      indices (i.e., idx[i][:num_valid[i]]) are valid.
5412    num_valid: a tensor of rank 0 or higher with a shape of [...]
5413      representing the number of valid indices in idx. Its dimensions are the
5414      batch dimensions of the input boxes.
5415   Raises:
5416    ValueError: When set pad_to_max_output_size to False for batched input.
5417  """
5418  def _sort_scores_and_boxes(scores, boxes):
5419    """Sort boxes based their score from highest to lowest.
5420
5421    Args:
5422      scores: a tensor with a shape of [batch_size, num_boxes] representing
5423        the scores of boxes.
5424      boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing
5425        the boxes.
5426    Returns:
5427      sorted_scores: a tensor with a shape of [batch_size, num_boxes]
5428        representing the sorted scores.
5429      sorted_boxes: a tensor representing the sorted boxes.
5430      sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes]
5431        representing the index of the scores in a sorted descending order.
5432    """
5433    with ops.name_scope('sort_scores_and_boxes'):
5434      batch_size = array_ops.shape(boxes)[0]
5435      num_boxes = array_ops.shape(boxes)[1]
5436      sorted_scores_indices = sort_ops.argsort(
5437          scores, axis=1, direction='DESCENDING')
5438      index_offsets = math_ops.range(batch_size) * num_boxes
5439      indices = array_ops.reshape(
5440          sorted_scores_indices + array_ops.expand_dims(index_offsets, 1), [-1])
5441      sorted_scores = array_ops.reshape(
5442          array_ops.gather(array_ops.reshape(scores, [-1]), indices),
5443          [batch_size, -1])
5444      sorted_boxes = array_ops.reshape(
5445          array_ops.gather(array_ops.reshape(boxes, [-1, 4]), indices),
5446          [batch_size, -1, 4])
5447    return sorted_scores, sorted_boxes, sorted_scores_indices
5448
5449  batch_dims = array_ops.shape(boxes)[:-2]
5450  num_boxes = array_ops.shape(boxes)[-2]
5451  boxes = array_ops.reshape(boxes, [-1, num_boxes, 4])
5452  scores = array_ops.reshape(scores, [-1, num_boxes])
5453  batch_size = array_ops.shape(boxes)[0]
5454  if score_threshold != float('-inf'):
5455    with ops.name_scope('filter_by_score'):
5456      score_mask = math_ops.cast(scores > score_threshold, scores.dtype)
5457      scores *= score_mask
5458      box_mask = array_ops.expand_dims(
5459          math_ops.cast(score_mask, boxes.dtype), 2)
5460      boxes *= box_mask
5461
5462  if not canonicalized_coordinates:
5463    with ops.name_scope('canonicalize_coordinates'):
5464      y_1, x_1, y_2, x_2 = array_ops.split(
5465          value=boxes, num_or_size_splits=4, axis=2)
5466      y_1_is_min = math_ops.reduce_all(
5467          math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0]))
5468      y_min, y_max = control_flow_ops.cond(
5469          y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1))
5470      x_1_is_min = math_ops.reduce_all(
5471          math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0]))
5472      x_min, x_max = control_flow_ops.cond(
5473          x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1))
5474      boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2)
5475
5476  if not sorted_input:
5477    scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes)
5478  else:
5479    # Default value required for Autograph.
5480    sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32)
5481
5482  pad = math_ops.cast(
5483      math_ops.ceil(
5484          math_ops.cast(
5485              math_ops.maximum(num_boxes, max_output_size), dtypes.float32) /
5486          math_ops.cast(tile_size, dtypes.float32)),
5487      dtypes.int32) * tile_size - num_boxes
5488  boxes = array_ops.pad(
5489      math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]])
5490  scores = array_ops.pad(
5491      math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]])
5492  num_boxes_after_padding = num_boxes + pad
5493  num_iterations = num_boxes_after_padding // tile_size
5494  def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
5495    return math_ops.logical_and(
5496        math_ops.reduce_min(output_size) < max_output_size,
5497        idx < num_iterations)
5498
5499  def suppression_loop_body(boxes, iou_threshold, output_size, idx):
5500    return _suppression_loop_body(
5501        boxes, iou_threshold, output_size, idx, tile_size)
5502
5503  selected_boxes, _, output_size, _ = control_flow_ops.while_loop(
5504      _loop_cond,
5505      suppression_loop_body,
5506      [
5507          boxes, iou_threshold,
5508          array_ops.zeros([batch_size], dtypes.int32),
5509          constant_op.constant(0)
5510      ],
5511      shape_invariants=[
5512          tensor_shape.TensorShape([None, None, 4]),
5513          tensor_shape.TensorShape([]),
5514          tensor_shape.TensorShape([None]),
5515          tensor_shape.TensorShape([]),
5516      ],
5517  )
5518  num_valid = math_ops.minimum(output_size, max_output_size)
5519  idx = num_boxes_after_padding - math_ops.cast(
5520      nn_ops.top_k(
5521          math_ops.cast(math_ops.reduce_any(
5522              selected_boxes > 0, [2]), dtypes.int32) *
5523          array_ops.expand_dims(
5524              math_ops.range(num_boxes_after_padding, 0, -1), 0),
5525          max_output_size)[0], dtypes.int32)
5526  idx = math_ops.minimum(idx, num_boxes - 1)
5527
5528  if not sorted_input:
5529    index_offsets = math_ops.range(batch_size) * num_boxes
5530    gather_idx = array_ops.reshape(
5531        idx + array_ops.expand_dims(index_offsets, 1), [-1])
5532    idx = array_ops.reshape(
5533        array_ops.gather(array_ops.reshape(sorted_indices, [-1]),
5534                         gather_idx),
5535        [batch_size, -1])
5536  invalid_index = array_ops.zeros([batch_size, max_output_size],
5537                                  dtype=dtypes.int32)
5538  idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0)
5539  num_valid_expanded = array_ops.expand_dims(num_valid, 1)
5540  idx = array_ops.where(idx_index < num_valid_expanded,
5541                        idx, invalid_index)
5542
5543  num_valid = array_ops.reshape(num_valid, batch_dims)
5544  return idx, num_valid
5545
5546
5547def non_max_suppression_padded_v1(boxes,
5548                                  scores,
5549                                  max_output_size,
5550                                  iou_threshold=0.5,
5551                                  score_threshold=float('-inf'),
5552                                  pad_to_max_output_size=False,
5553                                  name=None):
5554  """Greedily selects a subset of bounding boxes in descending order of score.
5555
5556  Performs algorithmically equivalent operation to tf.image.non_max_suppression,
5557  with the addition of an optional parameter which zero-pads the output to
5558  be of size `max_output_size`.
5559  The output of this operation is a tuple containing the set of integers
5560  indexing into the input collection of bounding boxes representing the selected
5561  boxes and the number of valid indices in the index set.  The bounding box
5562  coordinates corresponding to the selected indices can then be obtained using
5563  the `tf.slice` and `tf.gather` operations.  For example:
5564    ```python
5565    selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(
5566        boxes, scores, max_output_size, iou_threshold,
5567        score_threshold, pad_to_max_output_size=True)
5568    selected_indices = tf.slice(
5569        selected_indices_padded, tf.constant([0]), num_valid)
5570    selected_boxes = tf.gather(boxes, selected_indices)
5571    ```
5572
5573  Args:
5574    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
5575    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
5576      score corresponding to each box (each row of boxes).
5577    max_output_size: A scalar integer `Tensor` representing the maximum number
5578      of boxes to be selected by non-max suppression.
5579    iou_threshold: A float representing the threshold for deciding whether boxes
5580      overlap too much with respect to IOU.
5581    score_threshold: A float representing the threshold for deciding when to
5582      remove boxes based on score.
5583    pad_to_max_output_size: bool.  If True, size of `selected_indices` output is
5584      padded to `max_output_size`.
5585    name: A name for the operation (optional).
5586
5587  Returns:
5588    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
5589      selected indices from the boxes tensor, where `M <= max_output_size`.
5590    valid_outputs: A scalar integer `Tensor` denoting how many elements in
5591    `selected_indices` are valid.  Valid elements occur first, then padding.
5592  """
5593  with ops.name_scope(name, 'non_max_suppression_padded'):
5594    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
5595    score_threshold = ops.convert_to_tensor(
5596        score_threshold, name='score_threshold')
5597    return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size,
5598                                                iou_threshold, score_threshold,
5599                                                pad_to_max_output_size)
5600
5601
5602@tf_export('image.draw_bounding_boxes', v1=[])
5603@dispatch.add_dispatch_support
5604def draw_bounding_boxes_v2(images, boxes, colors, name=None):
5605  """Draw bounding boxes on a batch of images.
5606
5607  Outputs a copy of `images` but draws on top of the pixels zero or more
5608  bounding boxes specified by the locations in `boxes`. The coordinates of the
5609  each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.
5610  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
5611  and the height of the underlying image.
5612
5613  For example, if an image is 100 x 200 pixels (height x width) and the bounding
5614  box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
5615  the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
5616
5617  Parts of the bounding box may fall outside the image.
5618
5619  Args:
5620    images: A `Tensor`. Must be one of the following types: `float32`, `half`.
5621      4-D with shape `[batch, height, width, depth]`. A batch of images.
5622    boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,
5623      num_bounding_boxes, 4]` containing bounding boxes.
5624    colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle
5625      through for the boxes.
5626    name: A name for the operation (optional).
5627
5628  Returns:
5629    A `Tensor`. Has the same type as `images`.
5630
5631  Usage Example:
5632
5633  >>> # create an empty image
5634  >>> img = tf.zeros([1, 3, 3, 3])
5635  >>> # draw a box around the image
5636  >>> box = np.array([0, 0, 1, 1])
5637  >>> boxes = box.reshape([1, 1, 4])
5638  >>> # alternate between red and blue
5639  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
5640  >>> tf.image.draw_bounding_boxes(img, boxes, colors)
5641  <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
5642  array([[[[1., 0., 0.],
5643          [1., 0., 0.],
5644          [1., 0., 0.]],
5645          [[1., 0., 0.],
5646          [0., 0., 0.],
5647          [1., 0., 0.]],
5648          [[1., 0., 0.],
5649          [1., 0., 0.],
5650          [1., 0., 0.]]]], dtype=float32)>
5651  """
5652  if colors is None:
5653    return gen_image_ops.draw_bounding_boxes(images, boxes, name)
5654  return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name)
5655
5656
5657@tf_export(v1=['image.draw_bounding_boxes'])
5658@dispatch.add_dispatch_support
5659def draw_bounding_boxes(images, boxes, name=None, colors=None):
5660  """Draw bounding boxes on a batch of images.
5661
5662  Outputs a copy of `images` but draws on top of the pixels zero or more
5663  bounding boxes specified by the locations in `boxes`. The coordinates of the
5664  each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.
5665  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
5666  and the height of the underlying image.
5667
5668  For example, if an image is 100 x 200 pixels (height x width) and the bounding
5669  box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
5670  the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
5671
5672  Parts of the bounding box may fall outside the image.
5673
5674  Args:
5675    images: A `Tensor`. Must be one of the following types: `float32`, `half`.
5676      4-D with shape `[batch, height, width, depth]`. A batch of images.
5677    boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,
5678      num_bounding_boxes, 4]` containing bounding boxes.
5679    name: A name for the operation (optional).
5680    colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle
5681      through for the boxes.
5682
5683  Returns:
5684    A `Tensor`. Has the same type as `images`.
5685
5686  Usage Example:
5687
5688  >>> # create an empty image
5689  >>> img = tf.zeros([1, 3, 3, 3])
5690  >>> # draw a box around the image
5691  >>> box = np.array([0, 0, 1, 1])
5692  >>> boxes = box.reshape([1, 1, 4])
5693  >>> # alternate between red and blue
5694  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
5695  >>> tf.image.draw_bounding_boxes(img, boxes, colors)
5696  <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
5697  array([[[[1., 0., 0.],
5698          [1., 0., 0.],
5699          [1., 0., 0.]],
5700          [[1., 0., 0.],
5701          [0., 0., 0.],
5702          [1., 0., 0.]],
5703          [[1., 0., 0.],
5704          [1., 0., 0.],
5705          [1., 0., 0.]]]], dtype=float32)>
5706  """
5707  return draw_bounding_boxes_v2(images, boxes, colors, name)
5708
5709
5710@tf_export('image.generate_bounding_box_proposals')
5711@dispatch.add_dispatch_support
5712def generate_bounding_box_proposals(scores,
5713                                    bbox_deltas,
5714                                    image_info,
5715                                    anchors,
5716                                    nms_threshold=0.7,
5717                                    pre_nms_topn=6000,
5718                                    min_size=16,
5719                                    post_nms_topn=300,
5720                                    name=None):
5721  """Generate bounding box proposals from encoded bounding boxes.
5722
5723  Args:
5724    scores: A 4-D float `Tensor` of shape
5725     `[num_images, height, width, num_achors]` containing scores of
5726      the boxes for given anchors, can be unsorted.
5727    bbox_deltas: A 4-D float `Tensor` of shape
5728     `[num_images, height, width, 4 x num_anchors]` encoding boxes
5729      with respect to each anchor. Coordinates are given
5730      in the form `[dy, dx, dh, dw]`.
5731    image_info: A 2-D float `Tensor` of shape `[num_images, 5]`
5732      containing image information Height, Width, Scale.
5733    anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]`
5734      describing the anchor boxes.
5735      Boxes are formatted in the form `[y1, x1, y2, x2]`.
5736    nms_threshold: A scalar float `Tensor` for non-maximal-suppression
5737      threshold. Defaults to 0.7.
5738    pre_nms_topn: A scalar int `Tensor` for the number of
5739      top scoring boxes to be used as input. Defaults to 6000.
5740    min_size: A scalar float `Tensor`. Any box that has a smaller size
5741      than min_size will be discarded. Defaults to 16.
5742    post_nms_topn: An integer. Maximum number of rois in the output.
5743    name: A name for this operation (optional).
5744
5745  Returns:
5746    rois: Region of interest boxes sorted by their scores.
5747    roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`.
5748  """
5749  return gen_image_ops.generate_bounding_box_proposals(
5750      scores=scores,
5751      bbox_deltas=bbox_deltas,
5752      image_info=image_info,
5753      anchors=anchors,
5754      nms_threshold=nms_threshold,
5755      pre_nms_topn=pre_nms_topn,
5756      min_size=min_size,
5757      post_nms_topn=post_nms_topn,
5758      name=name)
5759