• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
7#     http://www.apache.org/licenses/LICENSE-2.0
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Implementation of image ops."""
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
21import numpy as np
23from tensorflow.python.compat import compat
24from tensorflow.python.framework import constant_op
25from tensorflow.python.framework import dtypes
26from tensorflow.python.framework import ops
27from tensorflow.python.framework import random_seed
28from tensorflow.python.framework import tensor_shape
29from tensorflow.python.framework import tensor_util
30from tensorflow.python.ops import array_ops
31from tensorflow.python.ops import check_ops
32from tensorflow.python.ops import control_flow_ops
33from tensorflow.python.ops import gen_image_ops
34from tensorflow.python.ops import gen_nn_ops
35from tensorflow.python.ops import math_ops
36from tensorflow.python.ops import nn
37from tensorflow.python.ops import nn_ops
38from tensorflow.python.ops import random_ops
39from tensorflow.python.ops import string_ops
40from tensorflow.python.ops import variables
41from tensorflow.python.util import deprecation
42from tensorflow.python.util.tf_export import tf_export
45# TODO(b/31222613): This op may be differentiable, and there may be
46# latent bugs here.
48# TODO(b/31222613): This op may be differentiable, and there may be
49# latent bugs here.
54# TODO(bsteiner): Implement the gradient function for extract_glimpse
55# TODO(b/31222613): This op may be differentiable, and there may be
56# latent bugs here.
63# pylint: disable=invalid-name
64def _assert(cond, ex_type, msg):
65  """A polymorphic assert, works with tensors and boolean expressions.
67  If `cond` is not a tensor, behave like an ordinary assert statement, except
68  that a empty list is returned. If `cond` is a tensor, return a list
69  containing a single TensorFlow assert op.
71  Args:
72    cond: Something evaluates to a boolean value. May be a tensor.
73    ex_type: The exception class to use.
74    msg: The error message.
76  Returns:
77    A list, containing at most one assert op.
78  """
79  if _is_tensor(cond):
80    return [control_flow_ops.Assert(cond, [msg])]
81  else:
82    if not cond:
83      raise ex_type(msg)
84    else:
85      return []
88def _is_tensor(x):
89  """Returns `True` if `x` is a symbolic tensor-like object.
91  Args:
92    x: A python object to check.
94  Returns:
95    `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
96  """
97  return isinstance(x, (ops.Tensor, variables.Variable))
100def _ImageDimensions(image, rank):
101  """Returns the dimensions of an image tensor.
103  Args:
104    image: A rank-D Tensor. For 3-D  of shape: `[height, width, channels]`.
105    rank: The expected rank of the image
107  Returns:
108    A list of corresponding to the dimensions of the
109    input image.  Dimensions that are statically known are python integers,
110    otherwise they are integer scalar tensors.
111  """
112  if image.get_shape().is_fully_defined():
113    return image.get_shape().as_list()
114  else:
115    static_shape = image.get_shape().with_rank(rank).as_list()
116    dynamic_shape = array_ops.unstack(array_ops.shape(image), rank)
117    return [
118        s if s is not None else d for s, d in zip(static_shape, dynamic_shape)
119    ]
122def _Check3DImage(image, require_static=True):
123  """Assert that we are working with properly shaped image.
125  Args:
126    image: 3-D Tensor of shape [height, width, channels]
127    require_static: If `True`, requires that all dimensions of `image` are
128      known and non-zero.
130  Raises:
131    ValueError: if `image.shape` is not a 3-vector.
133  Returns:
134    An empty list, if `image` has fully defined dimensions. Otherwise, a list
135    containing an assert op is returned.
136  """
137  try:
138    image_shape = image.get_shape().with_rank(3)
139  except ValueError:
140    raise ValueError(
141        "'image' (shape %s) must be three-dimensional." % image.shape)
142  if require_static and not image_shape.is_fully_defined():
143    raise ValueError("'image' (shape %s) must be fully defined." % image_shape)
144  if any(x == 0 for x in image_shape):
145    raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape)
146  if not image_shape.is_fully_defined():
147    return [
148        check_ops.assert_positive(
149            array_ops.shape(image),
150            ["all dims of 'image.shape' "
151             'must be > 0.'])
152    ]
153  else:
154    return []
157def _Assert3DImage(image):
158  """Assert that we are working with a properly shaped image.
160    Performs the check statically if possible (i.e. if the shape
161    is statically known). Otherwise adds a control dependency
162    to an assert op that checks the dynamic shape.
164    Args:
165      image: 3-D Tensor of shape [height, width, channels]
167    Raises:
168      ValueError: if `image.shape` is not a 3-vector.
170    Returns:
171      If the shape of `image` could be verified statically, `image` is
172      returned unchanged, otherwise there will be a control dependency
173      added that asserts the correct dynamic shape.
174    """
175  return control_flow_ops.with_dependencies(
176      _Check3DImage(image, require_static=False), image)
179def _AssertAtLeast3DImage(image):
180  """Assert that we are working with a properly shaped image.
182    Performs the check statically if possible (i.e. if the shape
183    is statically known). Otherwise adds a control dependency
184    to an assert op that checks the dynamic shape.
186    Args:
187      image: >= 3-D Tensor of size [*, height, width, depth]
189    Raises:
190      ValueError: if image.shape is not a [>= 3] vector.
192    Returns:
193      If the shape of `image` could be verified statically, `image` is
194      returned unchanged, otherwise there will be a control dependency
195      added that asserts the correct dynamic shape.
196  """
197  return control_flow_ops.with_dependencies(
198      _CheckAtLeast3DImage(image, require_static=False), image)
201def _CheckAtLeast3DImage(image, require_static=True):
202  """Assert that we are working with properly shaped image.
204  Args:
205    image: >= 3-D Tensor of size [*, height, width, depth]
206    require_static: If `True`, requires that all dimensions of `image` are
207      known and non-zero.
209  Raises:
210    ValueError: if image.shape is not a [>= 3] vector.
212  Returns:
213    An empty list, if `image` has fully defined dimensions. Otherwise, a list
214    containing an assert op is returned.
215  """
216  try:
217    if image.get_shape().ndims is None:
218      image_shape = image.get_shape().with_rank(3)
219    else:
220      image_shape = image.get_shape().with_rank_at_least(3)
221  except ValueError:
222    raise ValueError("'image' must be at least three-dimensional.")
223  if require_static and not image_shape.is_fully_defined():
224    raise ValueError('\'image\' must be fully defined.')
225  if any(x == 0 for x in image_shape):
226    raise ValueError(
227        'all dims of \'image.shape\' must be > 0: %s' % image_shape)
228  if not image_shape.is_fully_defined():
229    return [
230        check_ops.assert_positive(
231            array_ops.shape(image),
232            ["all dims of 'image.shape' "
233             'must be > 0.'])
234    ]
235  else:
236    return []
239def fix_image_flip_shape(image, result):
240  """Set the shape to 3 dimensional if we don't know anything else.
242  Args:
243    image: original image size
244    result: flipped or transformed image
246  Returns:
247    An image whose shape is at least None,None,None.
248  """
250  image_shape = image.get_shape()
251  if image_shape == tensor_shape.unknown_shape():
252    result.set_shape([None, None, None])
253  else:
254    result.set_shape(image_shape)
255  return result
259def random_flip_up_down(image, seed=None):
260  """Randomly flips an image vertically (upside down).
262  With a 1 in 2 chance, outputs the contents of `image` flipped along the first
263  dimension, which is `height`.  Otherwise output the image as-is.
265  Args:
266    image: 4-D Tensor of shape `[batch, height, width, channels]` or
267           3-D Tensor of shape `[height, width, channels]`.
268    seed: A Python integer. Used to create a random seed. See
269      `tf.set_random_seed`
270      for behavior.
272  Returns:
273    A tensor of the same type and shape as `image`.
274  Raises:
275    ValueError: if the shape of `image` not supported.
276  """
277  return _random_flip(image, 0, seed, 'random_flip_up_down')
281def random_flip_left_right(image, seed=None):
282  """Randomly flip an image horizontally (left to right).
284  With a 1 in 2 chance, outputs the contents of `image` flipped along the
285  second dimension, which is `width`.  Otherwise output the image as-is.
287  Args:
288    image: 4-D Tensor of shape `[batch, height, width, channels]` or
289           3-D Tensor of shape `[height, width, channels]`.
290    seed: A Python integer. Used to create a random seed. See
291      `tf.set_random_seed`
292      for behavior.
294  Returns:
295    A tensor of the same type and shape as `image`.
297  Raises:
298    ValueError: if the shape of `image` not supported.
299  """
300  return _random_flip(image, 1, seed, 'random_flip_left_right')
303def _random_flip(image, flip_index, seed, scope_name):
304  """Randomly (50% chance) flip an image along axis `flip_index`.
306  Args:
307    image: 4-D Tensor of shape `[batch, height, width, channels]` or
308           3-D Tensor of shape `[height, width, channels]`.
309    flip_index: Dimension along which to flip image. Vertical: 0, Horizontal: 1
310    seed: A Python integer. Used to create a random seed. See
311      `tf.set_random_seed`
312      for behavior.
313    scope_name: Name of the scope in which the ops are added.
315  Returns:
316    A tensor of the same type and shape as `image`.
318  Raises:
319    ValueError: if the shape of `image` not supported.
320  """
321  with ops.name_scope(None, scope_name, [image]) as scope:
322    image = ops.convert_to_tensor(image, name='image')
323    image = _AssertAtLeast3DImage(image)
324    shape = image.get_shape()
325    if shape.ndims == 3 or shape.ndims is None:
326      uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
327      mirror_cond = math_ops.less(uniform_random, .5)
328      result = control_flow_ops.cond(
329          mirror_cond,
330          lambda: array_ops.reverse(image, [flip_index]),
331          lambda: image,
332          name=scope
333      )
334      return fix_image_flip_shape(image, result)
335    elif shape.ndims == 4:
336      batch_size = array_ops.shape(image)[0]
337      uniform_random = random_ops.random_uniform(
338          [batch_size], 0, 1.0, seed=seed
339      )
340      flips = math_ops.round(
341          array_ops.reshape(uniform_random, [batch_size, 1, 1, 1])
342      )
343      flips = math_ops.cast(flips, image.dtype)
344      flipped_input = array_ops.reverse(image, [flip_index + 1])
345      return flips * flipped_input + (1 - flips) * image
346    else:
347      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
351def flip_left_right(image):
352  """Flip an image horizontally (left to right).
354  Outputs the contents of `image` flipped along the width dimension.
356  See also `reverse()`.
358  Args:
359    image: 4-D Tensor of shape `[batch, height, width, channels]` or
360           3-D Tensor of shape `[height, width, channels]`.
362  Returns:
363    A tensor of the same type and shape as `image`.
365  Raises:
366    ValueError: if the shape of `image` not supported.
367  """
368  return _flip(image, 1, 'flip_left_right')
372def flip_up_down(image):
373  """Flip an image vertically (upside down).
375  Outputs the contents of `image` flipped along the height dimension.
377  See also `reverse()`.
379  Args:
380    image: 4-D Tensor of shape `[batch, height, width, channels]` or
381           3-D Tensor of shape `[height, width, channels]`.
383  Returns:
384    A tensor of the same type and shape as `image`.
386  Raises:
387    ValueError: if the shape of `image` not supported.
388  """
389  return _flip(image, 0, 'flip_up_down')
392def _flip(image, flip_index, scope_name):
393  """Flip an image either horizontally or vertically.
395  Outputs the contents of `image` flipped along the dimension `flip_index`.
397  See also `reverse()`.
399  Args:
400    image: 4-D Tensor of shape `[batch, height, width, channels]` or
401           3-D Tensor of shape `[height, width, channels]`.
402    flip_index: 0 For vertical, 1 for horizontal.
404  Returns:
405    A tensor of the same type and shape as `image`.
407  Raises:
408    ValueError: if the shape of `image` not supported.
409  """
410  with ops.name_scope(None, scope_name, [image]):
411    image = ops.convert_to_tensor(image, name='image')
412    image = _AssertAtLeast3DImage(image)
413    shape = image.get_shape()
414    if shape.ndims == 3 or shape.ndims is None:
415      return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index]))
416    elif shape.ndims == 4:
417      return array_ops.reverse(image, [flip_index+1])
418    else:
419      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
423def rot90(image, k=1, name=None):
424  """Rotate image(s) counter-clockwise by 90 degrees.
426  Args:
427    image: 4-D Tensor of shape `[batch, height, width, channels]` or
428           3-D Tensor of shape `[height, width, channels]`.
429    k: A scalar integer. The number of times the image is rotated by 90 degrees.
430    name: A name for this operation (optional).
432  Returns:
433    A rotated tensor of the same type and shape as `image`.
435  Raises:
436    ValueError: if the shape of `image` not supported.
437  """
438  with ops.name_scope(name, 'rot90', [image, k]) as scope:
439    image = ops.convert_to_tensor(image, name='image')
440    image = _AssertAtLeast3DImage(image)
441    k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')
442    k.get_shape().assert_has_rank(0)
443    k = math_ops.mod(k, 4)
445    shape = image.get_shape()
446    if shape.ndims == 3 or shape.ndims is None:
447      return _rot90_3D(image, k, scope)
448    elif shape.ndims == 4:
449      return _rot90_4D(image, k, scope)
450    else:
451      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
454def _rot90_3D(image, k, name_scope):
455  """Rotate image counter-clockwise by 90 degrees `k` times.
457  Args:
458    image: 3-D Tensor of shape `[height, width, channels]`.
459    k: A scalar integer. The number of times the image is rotated by 90 degrees.
460    name_scope: A valid TensorFlow name scope.
462  Returns:
463    A 3-D tensor of the same type and shape as `image`.
465  """
467  def _rot90():
468    return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2])
470  def _rot180():
471    return array_ops.reverse_v2(image, [0, 1])
473  def _rot270():
474    return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1])
476  cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
477           (math_ops.equal(k, 3), _rot270)]
479  result = control_flow_ops.case(
480      cases, default=lambda: image, exclusive=True, name=name_scope)
481  result.set_shape([None, None, image.get_shape()[2]])
482  return result
485def _rot90_4D(images, k, name_scope):
486  """Rotate batch of images counter-clockwise by 90 degrees `k` times.
488  Args:
489    images: 4-D Tensor of shape `[height, width, channels]`.
490    k: A scalar integer. The number of times the images are rotated by 90
491      degrees.
492    name_scope: A valid TensorFlow name scope.
494  Returns:
495    A 4-D tensor of the same type and shape as `images`.
497  """
499  def _rot90():
500    return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3])
502  def _rot180():
503    return array_ops.reverse_v2(images, [1, 2])
504  def _rot270():
505    return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2])
507  cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
508           (math_ops.equal(k, 3), _rot270)]
510  result = control_flow_ops.case(
511      cases, default=lambda: images, exclusive=True, name=name_scope)
512  shape = result.get_shape()
513  result.set_shape([shape[0], None, None, shape[3]])
514  return result
517@tf_export(v1=['image.transpose', 'image.transpose_image'])
518def transpose_image(image):
519  return transpose(image=image, name=None)
522@tf_export('image.transpose', v1=[])
523def transpose(image, name=None):
524  """Transpose image(s) by swapping the height and width dimension.
526  Args:
527    image: 4-D Tensor of shape `[batch, height, width, channels]` or
528           3-D Tensor of shape `[height, width, channels]`.
529    name: A name for this operation (optional).
531  Returns:
532    If `image` was 4-D, a 4-D float Tensor of shape
533   `[batch, width, height, channels]`
534    If `image` was 3-D, a 3-D float Tensor of shape
535   `[width, height, channels]`
537  Raises:
538    ValueError: if the shape of `image` not supported.
539  """
540  with ops.name_scope(name, 'transpose', [image]):
541    image = ops.convert_to_tensor(image, name='image')
542    image = _AssertAtLeast3DImage(image)
543    shape = image.get_shape()
544    if shape.ndims == 3 or shape.ndims is None:
545      return array_ops.transpose(image, [1, 0, 2], name=name)
546    elif shape.ndims == 4:
547      return array_ops.transpose(image, [0, 2, 1, 3], name=name)
548    else:
549      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
553def central_crop(image, central_fraction):
554  """Crop the central region of the image(s).
556  Remove the outer parts of an image but retain the central region of the image
557  along each dimension. If we specify central_fraction = 0.5, this function
558  returns the region marked with "X" in the below diagram.
560       --------
561      |        |
562      |  XXXX  |
563      |  XXXX  |
564      |        |   where "X" is the central 50% of the image.
565       --------
567  This function works on either a single image (`image` is a 3-D Tensor), or a
568  batch of images (`image` is a 4-D Tensor).
570  Args:
571    image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D
572      Tensor of shape [batch_size, height, width, depth].
573    central_fraction: float (0, 1], fraction of size to crop
575  Raises:
576    ValueError: if central_crop_fraction is not within (0, 1].
578  Returns:
579    3-D / 4-D float Tensor, as per the input.
580  """
581  with ops.name_scope(None, 'central_crop', [image]):
582    image = ops.convert_to_tensor(image, name='image')
583    if central_fraction <= 0.0 or central_fraction > 1.0:
584      raise ValueError('central_fraction must be within (0, 1]')
585    if central_fraction == 1.0:
586      return image
588    _AssertAtLeast3DImage(image)
589    rank = image.get_shape().ndims
590    if rank != 3 and rank != 4:
591      raise ValueError('`image` should either be a Tensor with rank = 3 or '
592                       'rank = 4. Had rank = {}.'.format(rank))
594    # Helper method to return the `idx`-th dimension of `tensor`, along with
595    # a boolean signifying if the dimension is dynamic.
596    def _get_dim(tensor, idx):
597      static_shape = tensor.get_shape().dims[idx].value
598      if static_shape is not None:
599        return static_shape, False
600      return array_ops.shape(tensor)[idx], True
602    # Get the height, width, depth (and batch size, if the image is a 4-D
603    # tensor).
604    if rank == 3:
605      img_h, dynamic_h = _get_dim(image, 0)
606      img_w, dynamic_w = _get_dim(image, 1)
607      img_d = image.get_shape()[2]
608    else:
609      img_bs = image.get_shape()[0]
610      img_h, dynamic_h = _get_dim(image, 1)
611      img_w, dynamic_w = _get_dim(image, 2)
612      img_d = image.get_shape()[3]
614    # Compute the bounding boxes for the crop. The type and value of the
615    # bounding boxes depend on the `image` tensor's rank and whether / not the
616    # dimensions are statically defined.
617    if dynamic_h:
618      img_hd = math_ops.cast(img_h, dtypes.float64)
619      bbox_h_start = math_ops.cast(
620          (img_hd - img_hd * central_fraction) / 2, dtypes.int32)
621    else:
622      img_hd = float(img_h)
623      bbox_h_start = int((img_hd - img_hd * central_fraction) / 2)
625    if dynamic_w:
626      img_wd = math_ops.cast(img_w, dtypes.float64)
627      bbox_w_start = math_ops.cast(
628          (img_wd - img_wd * central_fraction) / 2, dtypes.int32)
629    else:
630      img_wd = float(img_w)
631      bbox_w_start = int((img_wd - img_wd * central_fraction) / 2)
633    bbox_h_size = img_h - bbox_h_start * 2
634    bbox_w_size = img_w - bbox_w_start * 2
636    if rank == 3:
637      bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0])
638      bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1])
639    else:
640      bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0])
641      bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1])
643    image = array_ops.slice(image, bbox_begin, bbox_size)
645    # Reshape the `image` tensor to the desired size.
646    if rank == 3:
647      image.set_shape([
648          None if dynamic_h else bbox_h_size,
649          None if dynamic_w else bbox_w_size,
650          img_d
651      ])
652    else:
653      image.set_shape([
654          img_bs,
655          None if dynamic_h else bbox_h_size,
656          None if dynamic_w else bbox_w_size,
657          img_d
658      ])
659    return image
663def pad_to_bounding_box(image, offset_height, offset_width, target_height,
664                        target_width):
665  """Pad `image` with zeros to the specified `height` and `width`.
667  Adds `offset_height` rows of zeros on top, `offset_width` columns of
668  zeros on the left, and then pads the image on the bottom and right
669  with zeros until it has dimensions `target_height`, `target_width`.
671  This op does nothing if `offset_*` is zero and the image already has size
672  `target_height` by `target_width`.
674  Args:
675    image: 4-D Tensor of shape `[batch, height, width, channels]` or
676           3-D Tensor of shape `[height, width, channels]`.
677    offset_height: Number of rows of zeros to add on top.
678    offset_width: Number of columns of zeros to add on the left.
679    target_height: Height of output image.
680    target_width: Width of output image.
682  Returns:
683    If `image` was 4-D, a 4-D float Tensor of shape
684    `[batch, target_height, target_width, channels]`
685    If `image` was 3-D, a 3-D float Tensor of shape
686    `[target_height, target_width, channels]`
688  Raises:
689    ValueError: If the shape of `image` is incompatible with the `offset_*` or
690      `target_*` arguments, or either `offset_height` or `offset_width` is
691      negative.
692  """
693  with ops.name_scope(None, 'pad_to_bounding_box', [image]):
694    image = ops.convert_to_tensor(image, name='image')
696    is_batch = True
697    image_shape = image.get_shape()
698    if image_shape.ndims == 3:
699      is_batch = False
700      image = array_ops.expand_dims(image, 0)
701    elif image_shape.ndims is None:
702      is_batch = False
703      image = array_ops.expand_dims(image, 0)
704      image.set_shape([None] * 4)
705    elif image_shape.ndims != 4:
706      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
708    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
709    batch, height, width, depth = _ImageDimensions(image, rank=4)
711    after_padding_width = target_width - offset_width - width
713    after_padding_height = target_height - offset_height - height
715    assert_ops += _assert(offset_height >= 0, ValueError,
716                          'offset_height must be >= 0')
717    assert_ops += _assert(offset_width >= 0, ValueError,
718                          'offset_width must be >= 0')
719    assert_ops += _assert(after_padding_width >= 0, ValueError,
720                          'width must be <= target - offset')
721    assert_ops += _assert(after_padding_height >= 0, ValueError,
722                          'height must be <= target - offset')
723    image = control_flow_ops.with_dependencies(assert_ops, image)
725    # Do not pad on the depth dimensions.
726    paddings = array_ops.reshape(
727        array_ops.stack([
728            0, 0, offset_height, after_padding_height, offset_width,
729            after_padding_width, 0, 0
730        ]), [4, 2])
731    padded = array_ops.pad(image, paddings)
733    padded_shape = [
734        None if _is_tensor(i) else i
735        for i in [batch, target_height, target_width, depth]
736    ]
737    padded.set_shape(padded_shape)
739    if not is_batch:
740      padded = array_ops.squeeze(padded, axis=[0])
742    return padded
746def crop_to_bounding_box(image, offset_height, offset_width, target_height,
747                         target_width):
748  """Crops an image to a specified bounding box.
750  This op cuts a rectangular part out of `image`. The top-left corner of the
751  returned image is at `offset_height, offset_width` in `image`, and its
752  lower-right corner is at
753  `offset_height + target_height, offset_width + target_width`.
755  Args:
756    image: 4-D Tensor of shape `[batch, height, width, channels]` or
757           3-D Tensor of shape `[height, width, channels]`.
758    offset_height: Vertical coordinate of the top-left corner of the result in
759                   the input.
760    offset_width: Horizontal coordinate of the top-left corner of the result in
761                  the input.
762    target_height: Height of the result.
763    target_width: Width of the result.
765  Returns:
766    If `image` was 4-D, a 4-D float Tensor of shape
767    `[batch, target_height, target_width, channels]`
768    If `image` was 3-D, a 3-D float Tensor of shape
769    `[target_height, target_width, channels]`
771  Raises:
772    ValueError: If the shape of `image` is incompatible with the `offset_*` or
773      `target_*` arguments, or either `offset_height` or `offset_width` is
774      negative, or either `target_height` or `target_width` is not positive.
775  """
776  with ops.name_scope(None, 'crop_to_bounding_box', [image]):
777    image = ops.convert_to_tensor(image, name='image')
779    is_batch = True
780    image_shape = image.get_shape()
781    if image_shape.ndims == 3:
782      is_batch = False
783      image = array_ops.expand_dims(image, 0)
784    elif image_shape.ndims is None:
785      is_batch = False
786      image = array_ops.expand_dims(image, 0)
787      image.set_shape([None] * 4)
788    elif image_shape.ndims != 4:
789      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
791    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
793    batch, height, width, depth = _ImageDimensions(image, rank=4)
795    assert_ops += _assert(offset_width >= 0, ValueError,
796                          'offset_width must be >= 0.')
797    assert_ops += _assert(offset_height >= 0, ValueError,
798                          'offset_height must be >= 0.')
799    assert_ops += _assert(target_width > 0, ValueError,
800                          'target_width must be > 0.')
801    assert_ops += _assert(target_height > 0, ValueError,
802                          'target_height must be > 0.')
803    assert_ops += _assert(width >= (target_width + offset_width), ValueError,
804                          'width must be >= target + offset.')
805    assert_ops += _assert(height >= (target_height + offset_height), ValueError,
806                          'height must be >= target + offset.')
807    image = control_flow_ops.with_dependencies(assert_ops, image)
809    cropped = array_ops.slice(
810        image, array_ops.stack([0, offset_height, offset_width, 0]),
811        array_ops.stack([-1, target_height, target_width, -1]))
813    cropped_shape = [
814        None if _is_tensor(i) else i
815        for i in [batch, target_height, target_width, depth]
816    ]
817    cropped.set_shape(cropped_shape)
819    if not is_batch:
820      cropped = array_ops.squeeze(cropped, axis=[0])
822    return cropped
826def resize_image_with_crop_or_pad(image, target_height, target_width):
827  """Crops and/or pads an image to a target width and height.
829  Resizes an image to a target width and height by either centrally
830  cropping the image or padding it evenly with zeros.
832  If `width` or `height` is greater than the specified `target_width` or
833  `target_height` respectively, this op centrally crops along that dimension.
834  If `width` or `height` is smaller than the specified `target_width` or
835  `target_height` respectively, this op centrally pads with 0 along that
836  dimension.
838  Args:
839    image: 4-D Tensor of shape `[batch, height, width, channels]` or
840           3-D Tensor of shape `[height, width, channels]`.
841    target_height: Target height.
842    target_width: Target width.
844  Raises:
845    ValueError: if `target_height` or `target_width` are zero or negative.
847  Returns:
848    Cropped and/or padded image.
849    If `images` was 4-D, a 4-D float Tensor of shape
850    `[batch, new_height, new_width, channels]`.
851    If `images` was 3-D, a 3-D float Tensor of shape
852    `[new_height, new_width, channels]`.
853  """
854  with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]):
855    image = ops.convert_to_tensor(image, name='image')
856    image_shape = image.get_shape()
857    is_batch = True
858    if image_shape.ndims == 3:
859      is_batch = False
860      image = array_ops.expand_dims(image, 0)
861    elif image_shape.ndims is None:
862      is_batch = False
863      image = array_ops.expand_dims(image, 0)
864      image.set_shape([None] * 4)
865    elif image_shape.ndims != 4:
866      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
868    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
869    assert_ops += _assert(target_width > 0, ValueError,
870                          'target_width must be > 0.')
871    assert_ops += _assert(target_height > 0, ValueError,
872                          'target_height must be > 0.')
874    image = control_flow_ops.with_dependencies(assert_ops, image)
875    # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
876    # Make sure our checks come first, so that error messages are clearer.
877    if _is_tensor(target_height):
878      target_height = control_flow_ops.with_dependencies(
879          assert_ops, target_height)
880    if _is_tensor(target_width):
881      target_width = control_flow_ops.with_dependencies(assert_ops,
882                                                        target_width)
884    def max_(x, y):
885      if _is_tensor(x) or _is_tensor(y):
886        return math_ops.maximum(x, y)
887      else:
888        return max(x, y)
890    def min_(x, y):
891      if _is_tensor(x) or _is_tensor(y):
892        return math_ops.minimum(x, y)
893      else:
894        return min(x, y)
896    def equal_(x, y):
897      if _is_tensor(x) or _is_tensor(y):
898        return math_ops.equal(x, y)
899      else:
900        return x == y
902    _, height, width, _ = _ImageDimensions(image, rank=4)
903    width_diff = target_width - width
904    offset_crop_width = max_(-width_diff // 2, 0)
905    offset_pad_width = max_(width_diff // 2, 0)
907    height_diff = target_height - height
908    offset_crop_height = max_(-height_diff // 2, 0)
909    offset_pad_height = max_(height_diff // 2, 0)
911    # Maybe crop if needed.
912    cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
913                                   min_(target_height, height),
914                                   min_(target_width, width))
916    # Maybe pad if needed.
917    resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
918                                  target_height, target_width)
920    # In theory all the checks below are redundant.
921    if resized.get_shape().ndims is None:
922      raise ValueError('resized contains no shape.')
924    _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4)
926    assert_ops = []
927    assert_ops += _assert(
928        equal_(resized_height, target_height), ValueError,
929        'resized height is not correct.')
930    assert_ops += _assert(
931        equal_(resized_width, target_width), ValueError,
932        'resized width is not correct.')
934    resized = control_flow_ops.with_dependencies(assert_ops, resized)
936    if not is_batch:
937      resized = array_ops.squeeze(resized, axis=[0])
939    return resized
943class ResizeMethodV1(object):
944  BILINEAR = 0
946  BICUBIC = 2
947  AREA = 3
950@tf_export('image.ResizeMethod', v1=[])
951class ResizeMethod(object):
952  BILINEAR = 'bilinear'
953  NEAREST_NEIGHBOR = 'nearest'
954  BICUBIC = 'bicubic'
955  AREA = 'area'
956  LANCZOS3 = 'lanczos3'
957  LANCZOS5 = 'lanczos5'
958  GAUSSIAN = 'gaussian'
959  MITCHELLCUBIC = 'mitchellcubic'
962def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name,
963                          skip_resize_if_same):
964  """Core functionality for v1 and v2 resize functions."""
965  with ops.name_scope(name, 'resize', [images, size]):
966    images = ops.convert_to_tensor(images, name='images')
967    if images.get_shape().ndims is None:
968      raise ValueError('\'images\' contains no shape.')
969    # TODO(shlens): Migrate this functionality to the underlying Op's.
970    is_batch = True
971    if images.get_shape().ndims == 3:
972      is_batch = False
973      images = array_ops.expand_dims(images, 0)
974    elif images.get_shape().ndims != 4:
975      raise ValueError('\'images\' must have either 3 or 4 dimensions.')
977    _, height, width, _ = images.get_shape().as_list()
979    try:
980      size = ops.convert_to_tensor(size, dtypes.int32, name='size')
981    except (TypeError, ValueError):
982      raise ValueError('\'size\' must be a 1-D int32 Tensor')
983    if not size.get_shape().is_compatible_with([2]):
984      raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
985                       'new_height, new_width')
986    size_const_as_shape = tensor_util.constant_value_as_shape(size)
987    new_height_const = size_const_as_shape.dims[0].value
988    new_width_const = size_const_as_shape.dims[1].value
990    if preserve_aspect_ratio:
991      # Get the current shapes of the image, even if dynamic.
992      _, current_height, current_width, _ = _ImageDimensions(images, rank=4)
994      # do the computation to find the right scale and height/width.
995      scale_factor_height = (
996          math_ops.cast(new_height_const, dtypes.float32) /
997          math_ops.cast(current_height, dtypes.float32))
998      scale_factor_width = (
999          math_ops.cast(new_width_const, dtypes.float32) /
1000          math_ops.cast(current_width, dtypes.float32))
1001      scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width)
1002      scaled_height_const = math_ops.cast(
1003          math_ops.round(
1004              scale_factor * math_ops.cast(current_height, dtypes.float32)),
1005          dtypes.int32)
1006      scaled_width_const = math_ops.cast(
1007          math_ops.round(
1008              scale_factor * math_ops.cast(current_width, dtypes.float32)),
1009          dtypes.int32)
1011      # NOTE: Reset the size and other constants used later.
1012      size = ops.convert_to_tensor([scaled_height_const, scaled_width_const],
1013                                   dtypes.int32, name='size')
1014      size_const_as_shape = tensor_util.constant_value_as_shape(size)
1015      new_height_const = size_const_as_shape.dims[0].value
1016      new_width_const = size_const_as_shape.dims[1].value
1018    # If we can determine that the height and width will be unmodified by this
1019    # transformation, we avoid performing the resize.
1020    if skip_resize_if_same and all(
1021        x is not None
1022        for x in [new_width_const, width, new_height_const, height]) and (
1023            width == new_width_const and height == new_height_const):
1024      if not is_batch:
1025        images = array_ops.squeeze(images, axis=[0])
1026      return images
1028    images = resizer_fn(images, size)
1030    # NOTE(mrry): The shape functions for the resize ops cannot unpack
1031    # the packed values in `new_size`, so set the shape here.
1032    images.set_shape([None, new_height_const, new_width_const, None])
1034    if not is_batch:
1035      images = array_ops.squeeze(images, axis=[0])
1036    return images
1039@tf_export(v1=['image.resize_images', 'image.resize'])
1040def resize_images(images,
1041                  size,
1042                  method=ResizeMethodV1.BILINEAR,
1043                  align_corners=False,
1044                  preserve_aspect_ratio=False,
1045                  name=None):
1046  """Resize `images` to `size` using the specified `method`.
1048  Resized images will be distorted if their original aspect ratio is not
1049  the same as `size`.  To avoid distortions see
1050  `tf.image.resize_image_with_pad`.
1052  `method` can be one of:
1054  *   <b>`ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.](
1055    https://en.wikipedia.org/wiki/Bilinear_interpolation)
1056  *   <b>`ResizeMethod.NEAREST_NEIGHBOR`</b>: [Nearest neighbor interpolation.](
1057    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1058  *   <b>`ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.](
1059    https://en.wikipedia.org/wiki/Bicubic_interpolation)
1060  *   <b>`ResizeMethod.AREA`</b>: Area interpolation.
1062  The return value has the same type as `images` if `method` is
1063  `ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type as `images`
1064  if the size of `images` can be statically determined to be the same as `size`,
1065  because `images` is returned in this case. Otherwise, the return value has
1066  type `float32`.
1068  Args:
1069    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1070      of shape `[height, width, channels]`.
1071    size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The new
1072      size for the images.
1073    method: ResizeMethod.  Defaults to `ResizeMethod.BILINEAR`.
1074    align_corners: bool.  If True, the centers of the 4 corner pixels of the
1075      input and output tensors are aligned, preserving the values at the corner
1076      pixels. Defaults to `False`.
1077    preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1078      then `images` will be resized to a size that fits in `size` while
1079      preserving the aspect ratio of the original image. Scales up the image if
1080      `size` is bigger than the current size of the `image`. Defaults to False.
1081    name: A name for this operation (optional).
1083  Raises:
1084    ValueError: if the shape of `images` is incompatible with the
1085      shape arguments to this function
1086    ValueError: if `size` has invalid shape or type.
1087    ValueError: if an unsupported resize method is specified.
1089  Returns:
1090    If `images` was 4-D, a 4-D float Tensor of shape
1091    `[batch, new_height, new_width, channels]`.
1092    If `images` was 3-D, a 3-D float Tensor of shape
1093    `[new_height, new_width, channels]`.
1094  """
1096  def resize_fn(images_t, new_size):
1097    """Legacy resize core function, passed to _resize_images_common."""
1098    if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR:
1099      return gen_image_ops.resize_bilinear(
1100          images_t, new_size, align_corners=align_corners)
1101    elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or
1102          method == ResizeMethod.NEAREST_NEIGHBOR):
1103      return gen_image_ops.resize_nearest_neighbor(
1104          images_t, new_size, align_corners=align_corners)
1105    elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC:
1106      return gen_image_ops.resize_bicubic(
1107          images_t, new_size, align_corners=align_corners)
1108    elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA:
1109      return gen_image_ops.resize_area(
1110          images_t, new_size, align_corners=align_corners)
1111    else:
1112      raise ValueError('Resize method is not implemented.')
1114  return _resize_images_common(
1115      images,
1116      resize_fn,
1117      size,
1118      preserve_aspect_ratio=preserve_aspect_ratio,
1119      name=name,
1120      skip_resize_if_same=True)
1123@tf_export('image.resize', v1=[])
1124def resize_images_v2(images,
1125                     size,
1126                     method=ResizeMethod.BILINEAR,
1127                     preserve_aspect_ratio=False,
1128                     antialias=False,
1129                     name=None):
1130  """Resize `images` to `size` using the specified `method`.
1132  Resized images will be distorted if their original aspect ratio is not
1133  the same as `size`.  To avoid distortions see
1134  `tf.image.resize_with_pad`.
1136  When 'antialias' is true, the sampling filter will anti-alias the input image
1137  as well as interpolate.   When downsampling an image with [anti-aliasing](
1138  https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter
1139  kernel is scaled in order to properly anti-alias the input image signal.
1140  'antialias' has no effect when upsampling an image.
1142  *   <b>`bilinear`</b>: [Bilinear interpolation.](
1143    https://en.wikipedia.org/wiki/Bilinear_interpolation) If 'antialias' is
1144    true, becomes a hat/tent filter function with radius 1 when downsampling.
1145  *   <b>`lanczos3`</b>:  [Lanczos kernel](
1146    https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3.
1147    High-quality practical filter but may have some ringing especially on
1148    synthetic images.
1149  *   <b>`lanczos5`</b>: [Lanczos kernel] (
1150    https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5.
1151    Very-high-quality filter but may have stronger ringing.
1152  *   <b>`bicubic`</b>: [Cubic interpolant](
1153    https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to
1154    Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel,
1155    particularly when upsampling.
1156  *   <b>`gaussian`</b>: [Gaussian kernel](
1157    https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3,
1158    sigma = 1.5 / 3.]
1159  *   <b>`nearest`</b>: [Nearest neighbor interpolation.](
1160    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1161    'antialias' has no effect when used with nearest neighbor interpolation.
1162  *   <b>`area`</b>: Anti-aliased resampling with area interpolation.
1163    'antialias' has no effect when used with area interpolation; it
1164    always anti-aliases.
1165  *   <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter.
1166    For synthetic images (especially those lacking proper prefiltering), less
1167    ringing than Keys cubic kernel but less sharp.
1169  Note that near image edges the filtering kernel may be partially outside the
1170  image boundaries. For these pixels, only input pixels inside the image will be
1171  included in the filter sum, and the output value will be appropriately
1172  normalized.
1174  The return value has the same type as `images` if `method` is
1175  `ResizeMethod.NEAREST_NEIGHBOR`. Otherwise, the return value has type
1176  `float32`.
1178  Args:
1179    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1180      of shape `[height, width, channels]`.
1181    size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The new
1182      size for the images.
1183    method: ResizeMethod.  Defaults to `bilinear`.
1184    preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1185      then `images` will be resized to a size that fits in `size` while
1186      preserving the aspect ratio of the original image. Scales up the image if
1187      `size` is bigger than the current size of the `image`. Defaults to False.
1188    antialias: Whether to use an anti-aliasing filter when downsampling an
1189      image.
1190    name: A name for this operation (optional).
1192  Raises:
1193    ValueError: if the shape of `images` is incompatible with the
1194      shape arguments to this function
1195    ValueError: if `size` has invalid shape or type.
1196    ValueError: if an unsupported resize method is specified.
1198  Returns:
1199    If `images` was 4-D, a 4-D float Tensor of shape
1200    `[batch, new_height, new_width, channels]`.
1201    If `images` was 3-D, a 3-D float Tensor of shape
1202    `[new_height, new_width, channels]`.
1203  """
1205  def resize_fn(images_t, new_size):
1206    """Resize core function, passed to _resize_images_common."""
1207    scale_and_translate_methods = [
1208        ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN,
1209        ResizeMethod.MITCHELLCUBIC
1210    ]
1212    def resize_with_scale_and_translate(method):
1213      scale = (
1214          math_ops.cast(new_size, dtype=dtypes.float32) /
1215          math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32))
1216      return gen_image_ops.scale_and_translate(
1217          images_t,
1218          new_size,
1219          scale,
1220          array_ops.zeros([2]),
1221          kernel_type=method,
1222          antialias=antialias)
1224    if method == ResizeMethod.BILINEAR:
1225      if antialias:
1226        return resize_with_scale_and_translate('triangle')
1227      else:
1228        return gen_image_ops.resize_bilinear(
1229            images_t, new_size, half_pixel_centers=True)
1230    elif method == ResizeMethod.NEAREST_NEIGHBOR:
1231      return gen_image_ops.resize_nearest_neighbor(
1232          images_t, new_size, half_pixel_centers=True)
1233    elif method == ResizeMethod.BICUBIC:
1234      if antialias:
1235        return resize_with_scale_and_translate('keyscubic')
1236      else:
1237        return gen_image_ops.resize_bicubic(
1238            images_t, new_size, half_pixel_centers=True)
1239    elif method == ResizeMethod.AREA:
1240      return gen_image_ops.resize_area(images_t, new_size)
1241    elif method in scale_and_translate_methods:
1242      return resize_with_scale_and_translate(method)
1243    else:
1244      raise ValueError('Resize method is not implemented.')
1246  return _resize_images_common(
1247      images,
1248      resize_fn,
1249      size,
1250      preserve_aspect_ratio=preserve_aspect_ratio,
1251      name=name,
1252      skip_resize_if_same=False)
1255def _resize_image_with_pad_common(image, target_height, target_width,
1256                                  resize_fn):
1257  """Core functionality for v1 and v2 resize_image_with_pad functions."""
1258  with ops.name_scope(None, 'resize_image_with_pad', [image]):
1259    image = ops.convert_to_tensor(image, name='image')
1260    image_shape = image.get_shape()
1261    is_batch = True
1262    if image_shape.ndims == 3:
1263      is_batch = False
1264      image = array_ops.expand_dims(image, 0)
1265    elif image_shape.ndims is None:
1266      is_batch = False
1267      image = array_ops.expand_dims(image, 0)
1268      image.set_shape([None] * 4)
1269    elif image_shape.ndims != 4:
1270      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
1272    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1273    assert_ops += _assert(target_width > 0, ValueError,
1274                          'target_width must be > 0.')
1275    assert_ops += _assert(target_height > 0, ValueError,
1276                          'target_height must be > 0.')
1278    image = control_flow_ops.with_dependencies(assert_ops, image)
1280    def max_(x, y):
1281      if _is_tensor(x) or _is_tensor(y):
1282        return math_ops.maximum(x, y)
1283      else:
1284        return max(x, y)
1286    _, height, width, _ = _ImageDimensions(image, rank=4)
1288    # convert values to float, to ease divisions
1289    f_height = math_ops.cast(height, dtype=dtypes.float64)
1290    f_width = math_ops.cast(width, dtype=dtypes.float64)
1291    f_target_height = math_ops.cast(target_height, dtype=dtypes.float64)
1292    f_target_width = math_ops.cast(target_width, dtype=dtypes.float64)
1294    # Find the ratio by which the image must be adjusted
1295    # to fit within the target
1296    ratio = max_(f_width / f_target_width, f_height / f_target_height)
1297    resized_height_float = f_height / ratio
1298    resized_width_float = f_width / ratio
1299    resized_height = math_ops.cast(
1300        math_ops.floor(resized_height_float), dtype=dtypes.int32)
1301    resized_width = math_ops.cast(
1302        math_ops.floor(resized_width_float), dtype=dtypes.int32)
1304    padding_height = (f_target_height - resized_height_float) / 2
1305    padding_width = (f_target_width - resized_width_float) / 2
1306    f_padding_height = math_ops.floor(padding_height)
1307    f_padding_width = math_ops.floor(padding_width)
1308    p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32))
1309    p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32))
1311    # Resize first, then pad to meet requested dimensions
1312    resized = resize_fn(image, [resized_height, resized_width])
1314    padded = pad_to_bounding_box(resized, p_height, p_width, target_height,
1315                                 target_width)
1317    if padded.get_shape().ndims is None:
1318      raise ValueError('padded contains no shape.')
1320    _ImageDimensions(padded, rank=4)
1322    if not is_batch:
1323      padded = array_ops.squeeze(padded, axis=[0])
1325    return padded
1329def resize_image_with_pad_v1(image,
1330                             target_height,
1331                             target_width,
1332                             method=ResizeMethodV1.BILINEAR,
1333                             align_corners=False):
1334  """Resizes and pads an image to a target width and height.
1336  Resizes an image to a target width and height by keeping
1337  the aspect ratio the same without distortion. If the target
1338  dimensions don't match the image dimensions, the image
1339  is resized and then padded with zeroes to match requested
1340  dimensions.
1342  Args:
1343    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1344      of shape `[height, width, channels]`.
1345    target_height: Target height.
1346    target_width: Target width.
1347    method: Method to use for resizing image. See `resize_images()`
1348    align_corners: bool.  If True, the centers of the 4 corner pixels of the
1349      input and output tensors are aligned, preserving the values at the corner
1350      pixels. Defaults to `False`.
1352  Raises:
1353    ValueError: if `target_height` or `target_width` are zero or negative.
1355  Returns:
1356    Resized and padded image.
1357    If `images` was 4-D, a 4-D float Tensor of shape
1358    `[batch, new_height, new_width, channels]`.
1359    If `images` was 3-D, a 3-D float Tensor of shape
1360    `[new_height, new_width, channels]`.
1361  """
1363  def _resize_fn(im, new_size):
1364    return resize_images(im, new_size, method, align_corners=align_corners)
1366  return _resize_image_with_pad_common(image, target_height, target_width,
1367                                       _resize_fn)
1370@tf_export('image.resize_with_pad', v1=[])
1371def resize_image_with_pad_v2(image,
1372                             target_height,
1373                             target_width,
1374                             method=ResizeMethod.BILINEAR,
1375                             antialias=False):
1376  """Resizes and pads an image to a target width and height.
1378  Resizes an image to a target width and height by keeping
1379  the aspect ratio the same without distortion. If the target
1380  dimensions don't match the image dimensions, the image
1381  is resized and then padded with zeroes to match requested
1382  dimensions.
1384  Args:
1385    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1386      of shape `[height, width, channels]`.
1387    target_height: Target height.
1388    target_width: Target width.
1389    method: Method to use for resizing image. See `image.resize()`
1390    antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'.
1392  Raises:
1393    ValueError: if `target_height` or `target_width` are zero or negative.
1395  Returns:
1396    Resized and padded image.
1397    If `images` was 4-D, a 4-D float Tensor of shape
1398    `[batch, new_height, new_width, channels]`.
1399    If `images` was 3-D, a 3-D float Tensor of shape
1400    `[new_height, new_width, channels]`.
1401  """
1403  def _resize_fn(im, new_size):
1404    return resize_images_v2(im, new_size, method, antialias=antialias)
1406  return _resize_image_with_pad_common(image, target_height, target_width,
1407                                       _resize_fn)
1411def per_image_standardization(image):
1412  """Linearly scales `image` to have zero mean and unit variance.
1414  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
1415  of all values in image, and
1416  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.
1418  `stddev` is the standard deviation of all values in `image`. It is capped
1419  away from zero to protect against division by 0 when handling uniform images.
1421  Args:
1422    image: An n-D Tensor where the last 3 dimensions are `[height, width,
1423      channels]`.
1425  Returns:
1426    The standardized image with same shape as `image`.
1428  Raises:
1429    ValueError: if the shape of 'image' is incompatible with this function.
1430  """
1431  with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
1432    image = ops.convert_to_tensor(image, name='image')
1433    image = _AssertAtLeast3DImage(image)
1434    num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:])
1436    image = math_ops.cast(image, dtype=dtypes.float32)
1437    image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)
1439    variance = (
1440        math_ops.reduce_mean(
1441            math_ops.square(image), axis=[-1, -2, -3], keepdims=True) -
1442        math_ops.square(image_mean))
1443    variance = gen_nn_ops.relu(variance)
1444    stddev = math_ops.sqrt(variance)
1446    # Apply a minimum normalization that protects us against uniform images.
1447    min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
1448    pixel_value_scale = math_ops.maximum(stddev, min_stddev)
1449    pixel_value_offset = image_mean
1451    image = math_ops.subtract(image, pixel_value_offset)
1452    image = math_ops.div(image, pixel_value_scale, name=scope)
1453    return image
1457def random_brightness(image, max_delta, seed=None):
1458  """Adjust the brightness of images by a random factor.
1460  Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
1461  interval `[-max_delta, max_delta)`.
1463  Args:
1464    image: An image or images to adjust.
1465    max_delta: float, must be non-negative.
1466    seed: A Python integer. Used to create a random seed. See
1467      `tf.set_random_seed`
1468      for behavior.
1470  Returns:
1471    The brightness-adjusted image(s).
1473  Raises:
1474    ValueError: if `max_delta` is negative.
1475  """
1476  if max_delta < 0:
1477    raise ValueError('max_delta must be non-negative.')
1479  delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
1480  return adjust_brightness(image, delta)
1484def random_contrast(image, lower, upper, seed=None):
1485  """Adjust the contrast of an image or images by a random factor.
1487  Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly
1488  picked in the interval `[lower, upper]`.
1490  Args:
1491    image: An image tensor with 3 or more dimensions.
1492    lower: float.  Lower bound for the random contrast factor.
1493    upper: float.  Upper bound for the random contrast factor.
1494    seed: A Python integer. Used to create a random seed. See
1495      `tf.set_random_seed` for behavior.
1497  Returns:
1498    The contrast-adjusted image(s).
1500  Raises:
1501    ValueError: if `upper <= lower` or if `lower < 0`.
1502  """
1503  if upper <= lower:
1504    raise ValueError('upper must be > lower.')
1506  if lower < 0:
1507    raise ValueError('lower must be non-negative.')
1509  # Generate an a float in [lower, upper]
1510  contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed)
1511  return adjust_contrast(image, contrast_factor)
1515def adjust_brightness(image, delta):
1516  """Adjust the brightness of RGB or Grayscale images.
1518  This is a convenience method that converts RGB images to float
1519  representation, adjusts their brightness, and then converts them back to the
1520  original data type. If several adjustments are chained, it is advisable to
1521  minimize the number of redundant conversions.
1523  The value `delta` is added to all components of the tensor `image`. `image` is
1524  converted to `float` and scaled appropriately if it is in fixed-point
1525  representation, and `delta` is converted to the same data type. For regular
1526  images, `delta` should be in the range `[0,1)`, as it is added to the image in
1527  floating point representation, where pixel values are in the `[0,1)` range.
1529  Args:
1530    image: RGB image or images to adjust.
1531    delta: A scalar. Amount to add to the pixel values.
1533  Returns:
1534    A brightness-adjusted tensor of the same shape and type as `image`.
1535  """
1536  with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name:
1537    image = ops.convert_to_tensor(image, name='image')
1538    # Remember original dtype to so we can convert back if needed
1539    orig_dtype = image.dtype
1541    if orig_dtype in [dtypes.float16, dtypes.float32]:
1542      flt_image = image
1543    else:
1544      flt_image = convert_image_dtype(image, dtypes.float32)
1546    adjusted = math_ops.add(
1547        flt_image, math_ops.cast(delta, flt_image.dtype), name=name)
1549    return convert_image_dtype(adjusted, orig_dtype, saturate=True)
1553def adjust_contrast(images, contrast_factor):
1554  """Adjust contrast of RGB or grayscale images.
1556  This is a convenience method that converts RGB images to float
1557  representation, adjusts their contrast, and then converts them back to the
1558  original data type. If several adjustments are chained, it is advisable to
1559  minimize the number of redundant conversions.
1561  `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
1562  interpreted as `[height, width, channels]`.  The other dimensions only
1563  represent a collection of images, such as `[batch, height, width, channels].`
1565  Contrast is adjusted independently for each channel of each image.
1567  For each channel, this Op computes the mean of the image pixels in the
1568  channel and then adjusts each component `x` of each pixel to
1569  `(x - mean) * contrast_factor + mean`.
1571  Args:
1572    images: Images to adjust.  At least 3-D.
1573    contrast_factor: A float multiplier for adjusting contrast.
1575  Returns:
1576    The contrast-adjusted image or images.
1577  """
1578  with ops.name_scope(None, 'adjust_contrast',
1579                      [images, contrast_factor]) as name:
1580    images = ops.convert_to_tensor(images, name='images')
1581    # Remember original dtype to so we can convert back if needed
1582    orig_dtype = images.dtype
1584    if orig_dtype in (dtypes.float16, dtypes.float32):
1585      flt_images = images
1586    else:
1587      flt_images = convert_image_dtype(images, dtypes.float32)
1589    adjusted = gen_image_ops.adjust_contrastv2(
1590        flt_images, contrast_factor=contrast_factor, name=name)
1592    return convert_image_dtype(adjusted, orig_dtype, saturate=True)
1596def adjust_gamma(image, gamma=1, gain=1):
1597  """Performs Gamma Correction on the input image.
1599  Also known as Power Law Transform. This function transforms the
1600  input image pixelwise according to the equation `Out = In**gamma`
1601  after scaling each pixel to the range 0 to 1.
1603  Args:
1604    image : A Tensor.
1605    gamma : A scalar or tensor. Non negative real number.
1606    gain  : A scalar or tensor. The constant multiplier.
1608  Returns:
1609    A Tensor. Gamma corrected output image.
1611  Raises:
1612    ValueError: If gamma is negative.
1614  Notes:
1615    For gamma greater than 1, the histogram will shift towards left and
1616    the output image will be darker than the input image.
1617    For gamma less than 1, the histogram will shift towards right and
1618    the output image will be brighter than the input image.
1620  References:
1621    [1] http://en.wikipedia.org/wiki/Gamma_correction
1622  """
1624  with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name:
1625    # Convert pixel value to DT_FLOAT for computing adjusted image.
1626    img = ops.convert_to_tensor(image, name='img', dtype=dtypes.float32)
1627    # Keep image dtype for computing the scale of corresponding dtype.
1628    image = ops.convert_to_tensor(image, name='image')
1630    assert_op = _assert(gamma >= 0, ValueError,
1631                        'Gamma should be a non-negative real number.')
1632    if assert_op:
1633      gamma = control_flow_ops.with_dependencies(assert_op, gamma)
1635    # scale = max(dtype) - min(dtype).
1636    scale = constant_op.constant(
1637        image.dtype.limits[1] - image.dtype.limits[0], dtype=dtypes.float32)
1638    # According to the definition of gamma correction.
1639    adjusted_img = (img / scale)**gamma * scale * gain
1641    return adjusted_img
1645def convert_image_dtype(image, dtype, saturate=False, name=None):
1646  """Convert `image` to `dtype`, scaling its values if needed.
1648  Images that are represented using floating point values are expected to have
1649  values in the range [0,1). Image data stored in integer data types are
1650  expected to have values in the range `[0,MAX]`, where `MAX` is the largest
1651  positive representable number for the data type.
1653  This op converts between data types, scaling the values appropriately before
1654  casting.
1656  Note that converting from floating point inputs to integer types may lead to
1657  over/underflow problems. Set saturate to `True` to avoid such problem in
1658  problematic conversions. If enabled, saturation will clip the output into the
1659  allowed range before performing a potentially dangerous cast (and only before
1660  performing such a cast, i.e., when casting from a floating point to an integer
1661  type, and when casting from a signed to an unsigned type; `saturate` has no
1662  effect on casts between floats, or on casts that increase the type's range).
1664  Args:
1665    image: An image.
1666    dtype: A `DType` to convert `image` to.
1667    saturate: If `True`, clip the input before casting (if necessary).
1668    name: A name for this operation (optional).
1670  Returns:
1671    `image`, converted to `dtype`.
1672  """
1673  image = ops.convert_to_tensor(image, name='image')
1674  if dtype == image.dtype:
1675    return array_ops.identity(image, name=name)
1677  with ops.name_scope(name, 'convert_image', [image]) as name:
1678    # Both integer: use integer multiplication in the larger range
1679    if image.dtype.is_integer and dtype.is_integer:
1680      scale_in = image.dtype.max
1681      scale_out = dtype.max
1682      if scale_in > scale_out:
1683        # Scaling down, scale first, then cast. The scaling factor will
1684        # cause in.max to be mapped to above out.max but below out.max+1,
1685        # so that the output is safely in the supported range.
1686        scale = (scale_in + 1) // (scale_out + 1)
1687        scaled = math_ops.div(image, scale)
1689        if saturate:
1690          return math_ops.saturate_cast(scaled, dtype, name=name)
1691        else:
1692          return math_ops.cast(scaled, dtype, name=name)
1693      else:
1694        # Scaling up, cast first, then scale. The scale will not map in.max to
1695        # out.max, but converting back and forth should result in no change.
1696        if saturate:
1697          cast = math_ops.saturate_cast(image, dtype)
1698        else:
1699          cast = math_ops.cast(image, dtype)
1700        scale = (scale_out + 1) // (scale_in + 1)
1701        return math_ops.multiply(cast, scale, name=name)
1702    elif image.dtype.is_floating and dtype.is_floating:
1703      # Both float: Just cast, no possible overflows in the allowed ranges.
1704      # Note: We're ignoreing float overflows. If your image dynamic range
1705      # exceeds float range you're on your own.
1706      return math_ops.cast(image, dtype, name=name)
1707    else:
1708      if image.dtype.is_integer:
1709        # Converting to float: first cast, then scale. No saturation possible.
1710        cast = math_ops.cast(image, dtype)
1711        scale = 1. / image.dtype.max
1712        return math_ops.multiply(cast, scale, name=name)
1713      else:
1714        # Converting from float: first scale, then cast
1715        scale = dtype.max + 0.5  # avoid rounding problems in the cast
1716        scaled = math_ops.multiply(image, scale)
1717        if saturate:
1718          return math_ops.saturate_cast(scaled, dtype, name=name)
1719        else:
1720          return math_ops.cast(scaled, dtype, name=name)
1724def rgb_to_grayscale(images, name=None):
1725  """Converts one or more images from RGB to Grayscale.
1727  Outputs a tensor of the same `DType` and rank as `images`.  The size of the
1728  last dimension of the output is 1, containing the Grayscale value of the
1729  pixels.
1731  Args:
1732    images: The RGB tensor to convert. Last dimension must have size 3 and
1733      should contain RGB values.
1734    name: A name for the operation (optional).
1736  Returns:
1737    The converted grayscale image(s).
1738  """
1739  with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name:
1740    images = ops.convert_to_tensor(images, name='images')
1741    # Remember original dtype to so we can convert back if needed
1742    orig_dtype = images.dtype
1743    flt_image = convert_image_dtype(images, dtypes.float32)
1745    # Reference for converting between RGB and grayscale.
1746    # https://en.wikipedia.org/wiki/Luma_%28video%29
1747    rgb_weights = [0.2989, 0.5870, 0.1140]
1748    gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1])
1749    gray_float = array_ops.expand_dims(gray_float, -1)
1750    return convert_image_dtype(gray_float, orig_dtype, name=name)
1754def grayscale_to_rgb(images, name=None):
1755  """Converts one or more images from Grayscale to RGB.
1757  Outputs a tensor of the same `DType` and rank as `images`.  The size of the
1758  last dimension of the output is 3, containing the RGB value of the pixels.
1760  Args:
1761    images: The Grayscale tensor to convert. Last dimension must be size 1.
1762    name: A name for the operation (optional).
1764  Returns:
1765    The converted grayscale image(s).
1766  """
1767  with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name:
1768    images = ops.convert_to_tensor(images, name='images')
1769    rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
1770    shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] +
1771                  [array_ops.expand_dims(3, 0)])
1772    multiples = array_ops.concat(shape_list, 0)
1773    rgb = array_ops.tile(images, multiples, name=name)
1774    rgb.set_shape(images.get_shape()[:-1].concatenate([3]))
1775    return rgb
1778# pylint: disable=invalid-name
1780def random_hue(image, max_delta, seed=None):
1781  """Adjust the hue of RGB images by a random factor.
1783  Equivalent to `adjust_hue()` but uses a `delta` randomly
1784  picked in the interval `[-max_delta, max_delta]`.
1786  `max_delta` must be in the interval `[0, 0.5]`.
1788  Args:
1789    image: RGB image or images. Size of the last dimension must be 3.
1790    max_delta: float.  Maximum value for the random delta.
1791    seed: An operation-specific seed. It will be used in conjunction with the
1792      graph-level seed to determine the real seeds that will be used in this
1793      operation. Please see the documentation of set_random_seed for its
1794      interaction with the graph-level random seed.
1796  Returns:
1797    Adjusted image(s), same shape and DType as `image`.
1799  Raises:
1800    ValueError: if `max_delta` is invalid.
1801  """
1802  if max_delta > 0.5:
1803    raise ValueError('max_delta must be <= 0.5.')
1805  if max_delta < 0:
1806    raise ValueError('max_delta must be non-negative.')
1808  delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
1809  return adjust_hue(image, delta)
1813def adjust_hue(image, delta, name=None):
1814  """Adjust hue of RGB images.
1816  This is a convenience method that converts an RGB image to float
1817  representation, converts it to HSV, add an offset to the hue channel, converts
1818  back to RGB and then back to the original data type. If several adjustments
1819  are chained it is advisable to minimize the number of redundant conversions.
1821  `image` is an RGB image.  The image hue is adjusted by converting the
1822  image(s) to HSV and rotating the hue channel (H) by
1823  `delta`.  The image is then converted back to RGB.
1825  `delta` must be in the interval `[-1, 1]`.
1827  Args:
1828    image: RGB image or images. Size of the last dimension must be 3.
1829    delta: float.  How much to add to the hue channel.
1830    name: A name for this operation (optional).
1832  Returns:
1833    Adjusted image(s), same shape and DType as `image`.
1834  """
1835  with ops.name_scope(name, 'adjust_hue', [image]) as name:
1836    image = ops.convert_to_tensor(image, name='image')
1837    # Remember original dtype to so we can convert back if needed
1838    orig_dtype = image.dtype
1839    if orig_dtype in (dtypes.float16, dtypes.float32):
1840      flt_image = image
1841    else:
1842      flt_image = convert_image_dtype(image, dtypes.float32)
1844    rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
1846    return convert_image_dtype(rgb_altered, orig_dtype)
1849# pylint: disable=invalid-name
1851def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None):
1852  """Randomly changes jpeg encoding quality for inducing jpeg noise.
1854  `min_jpeg_quality` must be in the interval `[0, 100]` and less than
1855  `max_jpeg_quality`.
1856  `max_jpeg_quality` must be in the interval `[0, 100]`.
1858  Args:
1859    image: RGB image or images. Size of the last dimension must be 3.
1860    min_jpeg_quality: Minimum jpeg encoding quality to use.
1861    max_jpeg_quality: Maximum jpeg encoding quality to use.
1862    seed: An operation-specific seed. It will be used in conjunction
1863      with the graph-level seed to determine the real seeds that will be
1864      used in this operation. Please see the documentation of
1865      set_random_seed for its interaction with the graph-level random seed.
1867  Returns:
1868    Adjusted image(s), same shape and DType as `image`.
1870  Raises:
1871    ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
1872  """
1873  if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or
1874      min_jpeg_quality > 100 or max_jpeg_quality > 100):
1875    raise ValueError('jpeg encoding range must be between 0 and 100.')
1877  if min_jpeg_quality >= max_jpeg_quality:
1878    raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')
1880  np.random.seed(seed)
1881  jpeg_quality = np.random.randint(min_jpeg_quality, max_jpeg_quality)
1882  return adjust_jpeg_quality(image, jpeg_quality)
1886def adjust_jpeg_quality(image, jpeg_quality, name=None):
1887  """Adjust jpeg encoding quality of an RGB image.
1889  This is a convenience method that adjusts jpeg encoding quality of an
1890  RGB image.
1892  `image` is an RGB image.  The image's encoding quality is adjusted
1893  to `jpeg_quality`.
1894  `jpeg_quality` must be in the interval `[0, 100]`.
1896  Args:
1897    image: RGB image or images. Size of the last dimension must be 3.
1898    jpeg_quality: int.  jpeg encoding quality.
1899    name: A name for this operation (optional).
1901  Returns:
1902    Adjusted image(s), same shape and DType as `image`.
1903  """
1904  with ops.name_scope(name, 'adjust_jpeg_quality', [image]) as name:
1905    image = ops.convert_to_tensor(image, name='image')
1906    # Remember original dtype to so we can convert back if needed
1907    orig_dtype = image.dtype
1908    # Convert to uint8
1909    image = convert_image_dtype(image, dtypes.uint8)
1910    # Encode image to jpeg with given jpeg quality
1911    image = gen_image_ops.encode_jpeg(image, quality=jpeg_quality)
1912    # Decode jpeg image
1913    image = gen_image_ops.decode_jpeg(image)
1914    # Convert back to original dtype and return
1915    return convert_image_dtype(image, orig_dtype)
1919def random_saturation(image, lower, upper, seed=None):
1920  """Adjust the saturation of RGB images by a random factor.
1922  Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
1923  picked in the interval `[lower, upper]`.
1925  Args:
1926    image: RGB image or images. Size of the last dimension must be 3.
1927    lower: float.  Lower bound for the random saturation factor.
1928    upper: float.  Upper bound for the random saturation factor.
1929    seed: An operation-specific seed. It will be used in conjunction with the
1930      graph-level seed to determine the real seeds that will be used in this
1931      operation. Please see the documentation of set_random_seed for its
1932      interaction with the graph-level random seed.
1934  Returns:
1935    Adjusted image(s), same shape and DType as `image`.
1937  Raises:
1938    ValueError: if `upper <= lower` or if `lower < 0`.
1939  """
1940  if upper <= lower:
1941    raise ValueError('upper must be > lower.')
1943  if lower < 0:
1944    raise ValueError('lower must be non-negative.')
1946  # Pick a float in [lower, upper]
1947  saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed)
1948  return adjust_saturation(image, saturation_factor)
1952def adjust_saturation(image, saturation_factor, name=None):
1953  """Adjust saturation of RGB images.
1955  This is a convenience method that converts RGB images to float
1956  representation, converts them to HSV, add an offset to the saturation channel,
1957  converts back to RGB and then back to the original data type. If several
1958  adjustments are chained it is advisable to minimize the number of redundant
1959  conversions.
1961  `image` is an RGB image or images.  The image saturation is adjusted by
1962  converting the images to HSV and multiplying the saturation (S) channel by
1963  `saturation_factor` and clipping. The images are then converted back to RGB.
1965  Args:
1966    image: RGB image or images. Size of the last dimension must be 3.
1967    saturation_factor: float. Factor to multiply the saturation by.
1968    name: A name for this operation (optional).
1970  Returns:
1971    Adjusted image(s), same shape and DType as `image`.
1972  """
1973  with ops.name_scope(name, 'adjust_saturation', [image]) as name:
1974    image = ops.convert_to_tensor(image, name='image')
1975    # Remember original dtype to so we can convert back if needed
1976    orig_dtype = image.dtype
1977    if orig_dtype in (dtypes.float16, dtypes.float32):
1978      flt_image = image
1979    else:
1980      flt_image = convert_image_dtype(image, dtypes.float32)
1982    adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor)
1984    return convert_image_dtype(adjusted, orig_dtype)
1987@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg'])
1988def is_jpeg(contents, name=None):
1989  r"""Convenience function to check if the 'contents' encodes a JPEG image.
1991  Args:
1992    contents: 0-D `string`. The encoded image bytes.
1993    name: A name for the operation (optional)
1995  Returns:
1996     A scalar boolean tensor indicating if 'contents' may be a JPEG image.
1997     is_jpeg is susceptible to false positives.
1998  """
1999  # Normal JPEGs start with \xff\xd8\xff\xe0
2000  # JPEG with EXIF stats with \xff\xd8\xff\xe1
2001  # Use \xff\xd8\xff to cover both.
2002  with ops.name_scope(name, 'is_jpeg'):
2003    substr = string_ops.substr(contents, 0, 3)
2004    return math_ops.equal(substr, b'\xff\xd8\xff', name=name)
2007def _is_png(contents, name=None):
2008  r"""Convenience function to check if the 'contents' encodes a PNG image.
2010  Args:
2011    contents: 0-D `string`. The encoded image bytes.
2012    name: A name for the operation (optional)
2014  Returns:
2015     A scalar boolean tensor indicating if 'contents' may be a PNG image.
2016     is_png is susceptible to false positives.
2017  """
2018  with ops.name_scope(name, 'is_png'):
2019    substr = string_ops.substr(contents, 0, 3)
2020    return math_ops.equal(substr, b'\211PN', name=name)
2022tf_export('io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg',
2023          v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])(
2024              gen_image_ops.decode_and_crop_jpeg)
2026tf_export('io.decode_bmp', 'image.decode_bmp',
2027          v1=['io.decode_bmp', 'image.decode_bmp'])(gen_image_ops.decode_bmp)
2028tf_export('io.decode_gif', 'image.decode_gif',
2029          v1=['io.decode_gif', 'image.decode_gif'])(gen_image_ops.decode_gif)
2030tf_export('io.decode_jpeg', 'image.decode_jpeg',
2031          v1=['io.decode_jpeg', 'image.decode_jpeg'])(gen_image_ops.decode_jpeg)
2032tf_export('io.decode_png', 'image.decode_png',
2033          v1=['io.decode_png', 'image.decode_png'])(gen_image_ops.decode_png)
2035tf_export('io.encode_jpeg', 'image.encode_jpeg',
2036          v1=['io.encode_jpeg', 'image.encode_jpeg'])(gen_image_ops.encode_jpeg)
2037tf_export('io.extract_jpeg_shape', 'image.extract_jpeg_shape',
2038          v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])(
2039              gen_image_ops.extract_jpeg_shape)
2042@tf_export('io.decode_image', 'image.decode_image',
2043           v1=['io.decode_image', 'image.decode_image'])
2044def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None):
2045  """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`,
2046  and `decode_png`.
2048  Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the
2049  appropriate operation to convert the input bytes `string` into a `Tensor`
2050  of type `dtype`.
2052  Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as
2053  opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D
2054  arrays `[height, width, num_channels]`. Make sure to take this into account
2055  when constructing your graph if you are intermixing GIF files with BMP, JPEG,
2056  and/or PNG files.
2058  Args:
2059    contents: 0-D `string`. The encoded image bytes.
2060    channels: An optional `int`. Defaults to `0`. Number of color channels for
2061      the decoded image.
2062    dtype: The desired DType of the returned `Tensor`.
2063    name: A name for the operation (optional)
2065  Returns:
2066    `Tensor` with type `dtype` and shape `[height, width, num_channels]` for
2067      BMP, JPEG, and PNG images and shape `[num_frames, height, width, 3]` for
2068      GIF images.
2070  Raises:
2071    ValueError: On incorrect number of channels.
2072  """
2073  with ops.name_scope(name, 'decode_image'):
2074    if channels not in (None, 0, 1, 3, 4):
2075      raise ValueError('channels must be in (None, 0, 1, 3, 4)')
2076    substr = string_ops.substr(contents, 0, 3)
2078    def _bmp():
2079      """Decodes a GIF image."""
2080      signature = string_ops.substr(contents, 0, 2)
2081      # Create assert op to check that bytes are BMP decodable
2082      is_bmp = math_ops.equal(signature, 'BM', name='is_bmp')
2083      decode_msg = 'Unable to decode bytes as JPEG, PNG, GIF, or BMP'
2084      assert_decode = control_flow_ops.Assert(is_bmp, [decode_msg])
2085      bmp_channels = 0 if channels is None else channels
2086      good_channels = math_ops.not_equal(bmp_channels, 1, name='check_channels')
2087      channels_msg = 'Channels must be in (None, 0, 3) when decoding BMP images'
2088      assert_channels = control_flow_ops.Assert(good_channels, [channels_msg])
2089      with ops.control_dependencies([assert_decode, assert_channels]):
2090        return convert_image_dtype(gen_image_ops.decode_bmp(contents), dtype)
2092    def _gif():
2093      # Create assert to make sure that channels is not set to 1
2094      # Already checked above that channels is in (None, 0, 1, 3)
2096      gif_channels = 0 if channels is None else channels
2097      good_channels = math_ops.logical_and(
2098          math_ops.not_equal(gif_channels, 1, name='check_gif_channels'),
2099          math_ops.not_equal(gif_channels, 4, name='check_gif_channels'))
2100      channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images'
2101      assert_channels = control_flow_ops.Assert(good_channels, [channels_msg])
2102      with ops.control_dependencies([assert_channels]):
2103        return convert_image_dtype(gen_image_ops.decode_gif(contents), dtype)
2105    def check_gif():
2106      # Create assert op to check that bytes are GIF decodable
2107      is_gif = math_ops.equal(substr, b'\x47\x49\x46', name='is_gif')
2108      return control_flow_ops.cond(is_gif, _gif, _bmp, name='cond_gif')
2110    def _png():
2111      """Decodes a PNG image."""
2112      return convert_image_dtype(
2113          gen_image_ops.decode_png(contents, channels,
2114                                   dtype=dtypes.uint8
2115                                   if dtype == dtypes.uint8
2116                                   else dtypes.uint16), dtype)
2118    def check_png():
2119      """Checks if an image is PNG."""
2120      return control_flow_ops.cond(
2121          _is_png(contents), _png, check_gif, name='cond_png')
2123    def _jpeg():
2124      """Decodes a jpeg image."""
2125      jpeg_channels = 0 if channels is None else channels
2126      good_channels = math_ops.not_equal(
2127          jpeg_channels, 4, name='check_jpeg_channels')
2128      channels_msg = ('Channels must be in (None, 0, 1, 3) when decoding JPEG '
2129                      'images')
2130      assert_channels = control_flow_ops.Assert(good_channels, [channels_msg])
2131      with ops.control_dependencies([assert_channels]):
2132        return convert_image_dtype(
2133            gen_image_ops.decode_jpeg(contents, channels), dtype)
2135    # Decode normal JPEG images (start with \xff\xd8\xff\xe0)
2136    # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1).
2137    return control_flow_ops.cond(
2138        is_jpeg(contents), _jpeg, check_png, name='cond_jpeg')
2142def total_variation(images, name=None):
2143  """Calculate and return the total variation for one or more images.
2145  The total variation is the sum of the absolute differences for neighboring
2146  pixel-values in the input images. This measures how much noise is in the
2147  images.
2149  This can be used as a loss-function during optimization so as to suppress
2150  noise in images. If you have a batch of images, then you should calculate
2151  the scalar loss-value as the sum:
2152  `loss = tf.reduce_sum(tf.image.total_variation(images))`
2154  This implements the anisotropic 2-D version of the formula described here:
2156  https://en.wikipedia.org/wiki/Total_variation_denoising
2158  Args:
2159    images: 4-D Tensor of shape `[batch, height, width, channels]` or
2160            3-D Tensor of shape `[height, width, channels]`.
2162    name: A name for the operation (optional).
2164  Raises:
2165    ValueError: if images.shape is not a 3-D or 4-D vector.
2167  Returns:
2168    The total variation of `images`.
2170    If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the
2171    total variation for each image in the batch.
2172    If `images` was 3-D, return a scalar float with the total variation for
2173    that image.
2174  """
2176  with ops.name_scope(name, 'total_variation'):
2177    ndims = images.get_shape().ndims
2179    if ndims == 3:
2180      # The input is a single image with shape [height, width, channels].
2182      # Calculate the difference of neighboring pixel-values.
2183      # The images are shifted one pixel along the height and width by slicing.
2184      pixel_dif1 = images[1:, :, :] - images[:-1, :, :]
2185      pixel_dif2 = images[:, 1:, :] - images[:, :-1, :]
2187      # Sum for all axis. (None is an alias for all axis.)
2188      sum_axis = None
2189    elif ndims == 4:
2190      # The input is a batch of images with shape:
2191      # [batch, height, width, channels].
2193      # Calculate the difference of neighboring pixel-values.
2194      # The images are shifted one pixel along the height and width by slicing.
2195      pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :]
2196      pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :]
2198      # Only sum for the last 3 axis.
2199      # This results in a 1-D tensor with the total variation for each image.
2200      sum_axis = [1, 2, 3]
2201    else:
2202      raise ValueError('\'images\' must be either 3 or 4-dimensional.')
2204    # Calculate the total variation by taking the absolute value of the
2205    # pixel-differences and summing over the appropriate axis.
2206    tot_var = (
2207        math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) +
2208        math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis))
2210  return tot_var
2213@tf_export('image.sample_distorted_bounding_box', v1=[])
2214def sample_distorted_bounding_box_v2(image_size,
2215                                     bounding_boxes,
2216                                     seed=0,
2217                                     min_object_covered=0.1,
2218                                     aspect_ratio_range=None,
2219                                     area_range=None,
2220                                     max_attempts=None,
2221                                     use_image_if_no_bounding_boxes=None,
2222                                     name=None):
2223  """Generate a single randomly distorted bounding box for an image.
2225  Bounding box annotations are often supplied in addition to ground-truth labels
2226  in image recognition or object localization tasks. A common technique for
2227  training such a system is to randomly distort an image while preserving
2228  its content, i.e. *data augmentation*. This Op outputs a randomly distorted
2229  localization of an object, i.e. bounding box, given an `image_size`,
2230  `bounding_boxes` and a series of constraints.
2232  The output of this Op is a single bounding box that may be used to crop the
2233  original image. The output is returned as 3 tensors: `begin`, `size` and
2234  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
2235  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
2236  visualize what the bounding box looks like.
2238  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
2239  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
2240  and height of the underlying image.
2242  For example,
2244  ```python
2245      # Generate a single distorted bounding box.
2246      begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
2247          tf.shape(image),
2248          bounding_boxes=bounding_boxes,
2249          min_object_covered=0.1)
2251      # Draw the bounding box in an image summary.
2252      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
2253                                                    bbox_for_draw)
2254      tf.summary.image('images_with_box', image_with_box)
2256      # Employ the bounding box to distort the image.
2257      distorted_image = tf.slice(image, begin, size)
2258  ```
2260  Note that if no bounding box information is available, setting
2261  `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
2262  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
2263  false and no bounding boxes are supplied, an error is raised.
2265  Args:
2266    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
2267      `int16`, `int32`, `int64`.
2268      1-D, containing `[height, width, channels]`.
2269    bounding_boxes: A `Tensor` of type `float32`.
2270      3-D with shape `[batch, N, 4]` describing the N bounding boxes
2271      associated with the image.
2272    seed: An optional `int`. Defaults to `0`.
2273      If `seed` is set to non-zero, the random number generator is seeded by
2274      the given `seed`.  Otherwise, it is seeded by a random seed.
2275    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`.
2276      The cropped area of the image must contain at least this
2277      fraction of any bounding box supplied. The value of this parameter should
2278      be non-negative. In the case of 0, the cropped area does not need to
2279      overlap any of the bounding boxes supplied.
2280    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
2281      1.33]`.
2282      The cropped area of the image must have an aspect `ratio =
2283      width / height` within this range.
2284    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`.
2285      The cropped area of the image must contain a fraction of the
2286      supplied image within this range.
2287    max_attempts: An optional `int`. Defaults to `100`.
2288      Number of attempts at generating a cropped region of the image
2289      of the specified constraints. After `max_attempts` failures, return the
2290      entire image.
2291    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
2292      Controls behavior if no bounding boxes supplied.
2293      If true, assume an implicit bounding box covering the whole input. If
2294      false, raise an error.
2295    name: A name for the operation (optional).
2297  Returns:
2298    A tuple of `Tensor` objects (begin, size, bboxes).
2300    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
2301    `[offset_height, offset_width, 0]`. Provide as input to
2302      `tf.slice`.
2303    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
2304    `[target_height, target_width, -1]`. Provide as input to
2305      `tf.slice`.
2306    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
2307    the distorted bounding box.
2308    Provide as input to `tf.image.draw_bounding_boxes`.
2309  """
2310  seed1, seed2 = random_seed.get_seed(seed) if seed else (0, 0)
2311  return sample_distorted_bounding_box(
2312      image_size, bounding_boxes, seed1, seed2, min_object_covered,
2313      aspect_ratio_range, area_range, max_attempts,
2314      use_image_if_no_bounding_boxes, name)
2318@deprecation.deprecated(date=None, instructions='`seed2` arg is deprecated.'
2319                        'Use sample_distorted_bounding_box_v2 instead.')
2320def sample_distorted_bounding_box(image_size,
2321                                  bounding_boxes,
2322                                  seed=None,
2323                                  seed2=None,
2324                                  min_object_covered=0.1,
2325                                  aspect_ratio_range=None,
2326                                  area_range=None,
2327                                  max_attempts=None,
2328                                  use_image_if_no_bounding_boxes=None,
2329                                  name=None):
2330  """Generate a single randomly distorted bounding box for an image.
2332  Bounding box annotations are often supplied in addition to ground-truth labels
2333  in image recognition or object localization tasks. A common technique for
2334  training such a system is to randomly distort an image while preserving
2335  its content, i.e. *data augmentation*. This Op outputs a randomly distorted
2336  localization of an object, i.e. bounding box, given an `image_size`,
2337  `bounding_boxes` and a series of constraints.
2339  The output of this Op is a single bounding box that may be used to crop the
2340  original image. The output is returned as 3 tensors: `begin`, `size` and
2341  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
2342  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
2343  visualize
2344  what the bounding box looks like.
2346  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
2347  The
2348  bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
2349  height of the underlying image.
2351  For example,
2353  ```python
2354      # Generate a single distorted bounding box.
2355      begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
2356          tf.shape(image),
2357          bounding_boxes=bounding_boxes,
2358          min_object_covered=0.1)
2360      # Draw the bounding box in an image summary.
2361      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
2362                                                    bbox_for_draw)
2363      tf.summary.image('images_with_box', image_with_box)
2365      # Employ the bounding box to distort the image.
2366      distorted_image = tf.slice(image, begin, size)
2367  ```
2369  Note that if no bounding box information is available, setting
2370  `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
2371  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
2372  false and no bounding boxes are supplied, an error is raised.
2374  Args:
2375    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
2376      `int16`, `int32`, `int64`.
2377      1-D, containing `[height, width, channels]`.
2378    bounding_boxes: A `Tensor` of type `float32`.
2379      3-D with shape `[batch, N, 4]` describing the N bounding boxes
2380      associated with the image.
2381    seed: An optional `int`. Defaults to `0`.
2382      If either `seed` or `seed2` are set to non-zero, the random number
2383      generator is seeded by the given `seed`.  Otherwise, it is seeded by a
2384        random
2385      seed.
2386    seed2: An optional `int`. Defaults to `0`.
2387      A second seed to avoid seed collision.
2388    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`.
2389      The cropped area of the image must contain at least this
2390      fraction of any bounding box supplied. The value of this parameter should
2391        be
2392      non-negative. In the case of 0, the cropped area does not need to overlap
2393      any of the bounding boxes supplied.
2394    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
2395      1.33]`.
2396      The cropped area of the image must have an aspect ratio =
2397      width / height within this range.
2398    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`.
2399      The cropped area of the image must contain a fraction of the
2400      supplied image within this range.
2401    max_attempts: An optional `int`. Defaults to `100`.
2402      Number of attempts at generating a cropped region of the image
2403      of the specified constraints. After `max_attempts` failures, return the
2404        entire
2405      image.
2406    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
2407      Controls behavior if no bounding boxes supplied.
2408      If true, assume an implicit bounding box covering the whole input. If
2409        false,
2410      raise an error.
2411    name: A name for the operation (optional).
2413  Returns:
2414    A tuple of `Tensor` objects (begin, size, bboxes).
2416    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
2417    `[offset_height, offset_width, 0]`. Provide as input to
2418      `tf.slice`.
2419    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
2420    `[target_height, target_width, -1]`. Provide as input to
2421      `tf.slice`.
2422    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
2423    the distorted bounding box.
2424      Provide as input to `tf.image.draw_bounding_boxes`.
2425  """
2426  with ops.name_scope(name, 'sample_distorted_bounding_box'):
2427    return gen_image_ops.sample_distorted_bounding_box_v2(
2428        image_size,
2429        bounding_boxes,
2430        seed=seed,
2431        seed2=seed2,
2432        min_object_covered=min_object_covered,
2433        aspect_ratio_range=aspect_ratio_range,
2434        area_range=area_range,
2435        max_attempts=max_attempts,
2436        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
2437        name=name)
2441def non_max_suppression(boxes,
2442                        scores,
2443                        max_output_size,
2444                        iou_threshold=0.5,
2445                        score_threshold=float('-inf'),
2446                        name=None):
2447  """Greedily selects a subset of bounding boxes in descending order of score.
2449  Prunes away boxes that have high intersection-over-union (IOU) overlap
2450  with previously selected boxes.  Bounding boxes are supplied as
2451  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
2452  diagonal pair of box corners and the coordinates can be provided as normalized
2453  (i.e., lying in the interval `[0, 1]`) or absolute.  Note that this algorithm
2454  is agnostic to where the origin is in the coordinate system.  Note that this
2455  algorithm is invariant to orthogonal transformations and translations
2456  of the coordinate system; thus translating or reflections of the coordinate
2457  system result in the same boxes being selected by the algorithm.
2458  The output of this operation is a set of integers indexing into the input
2459  collection of bounding boxes representing the selected boxes.  The bounding
2460  box coordinates corresponding to the selected indices can then be obtained
2461  using the `tf.gather` operation.  For example:
2462    ```python
2463    selected_indices = tf.image.non_max_suppression(
2464        boxes, scores, max_output_size, iou_threshold)
2465    selected_boxes = tf.gather(boxes, selected_indices)
2466    ```
2468  Args:
2469    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
2470    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
2471      score corresponding to each box (each row of boxes).
2472    max_output_size: A scalar integer `Tensor` representing the maximum number
2473      of boxes to be selected by non max suppression.
2474    iou_threshold: A float representing the threshold for deciding whether boxes
2475      overlap too much with respect to IOU.
2476    score_threshold: A float representing the threshold for deciding when to
2477      remove boxes based on score.
2478    name: A name for the operation (optional).
2480  Returns:
2481    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
2482      selected indices from the boxes tensor, where `M <= max_output_size`.
2483  """
2484  with ops.name_scope(name, 'non_max_suppression'):
2485    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
2486    score_threshold = ops.convert_to_tensor(
2487        score_threshold, name='score_threshold')
2488    return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size,
2489                                                iou_threshold, score_threshold)
2493def non_max_suppression_padded(boxes,
2494                               scores,
2495                               max_output_size,
2496                               iou_threshold=0.5,
2497                               score_threshold=float('-inf'),
2498                               pad_to_max_output_size=False,
2499                               name=None):
2500  """Greedily selects a subset of bounding boxes in descending order of score.
2502  Performs algorithmically equivalent operation to tf.image.non_max_suppression,
2503  with the addition of an optional parameter which zero-pads the output to
2504  be of size `max_output_size`.
2505  The output of this operation is a tuple containing the set of integers
2506  indexing into the input collection of bounding boxes representing the selected
2507  boxes and the number of valid indices in the index set.  The bounding box
2508  coordinates corresponding to the selected indices can then be obtained using
2509  the `tf.slice` and `tf.gather` operations.  For example:
2510    ```python
2511    selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(
2512        boxes, scores, max_output_size, iou_threshold,
2513        score_threshold, pad_to_max_output_size=True)
2514    selected_indices = tf.slice(
2515        selected_indices_padded, tf.constant([0]), num_valid)
2516    selected_boxes = tf.gather(boxes, selected_indices)
2517    ```
2519  Args:
2520    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
2521    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
2522      score corresponding to each box (each row of boxes).
2523    max_output_size: A scalar integer `Tensor` representing the maximum number
2524      of boxes to be selected by non max suppression.
2525    iou_threshold: A float representing the threshold for deciding whether boxes
2526      overlap too much with respect to IOU.
2527    score_threshold: A float representing the threshold for deciding when to
2528      remove boxes based on score.
2529    pad_to_max_output_size: bool.  If True, size of `selected_indices` output
2530      is padded to `max_output_size`.
2531    name: A name for the operation (optional).
2533  Returns:
2534    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
2535      selected indices from the boxes tensor, where `M <= max_output_size`.
2536    valid_outputs: A scalar integer `Tensor` denoting how many elements in
2537    `selected_indices` are valid.  Valid elements occur first, then padding.
2538  """
2539  with ops.name_scope(name, 'non_max_suppression_padded'):
2540    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
2541    score_threshold = ops.convert_to_tensor(
2542        score_threshold, name='score_threshold')
2543    if compat.forward_compatible(2018, 8, 7) or pad_to_max_output_size:
2544      return gen_image_ops.non_max_suppression_v4(
2545          boxes, scores, max_output_size, iou_threshold, score_threshold,
2546          pad_to_max_output_size)
2547    else:
2548      return gen_image_ops.non_max_suppression_v3(
2549          boxes, scores, max_output_size, iou_threshold, score_threshold)
2553def non_max_suppression_with_overlaps(overlaps,
2554                                      scores,
2555                                      max_output_size,
2556                                      overlap_threshold=0.5,
2557                                      score_threshold=float('-inf'),
2558                                      name=None):
2559  """Greedily selects a subset of bounding boxes in descending order of score.
2561  Prunes away boxes that have high overlap with previously selected boxes.
2562  N-by-n overlap values are supplied as square matrix.
2563  The output of this operation is a set of integers indexing into the input
2564  collection of bounding boxes representing the selected boxes.  The bounding
2565  box coordinates corresponding to the selected indices can then be obtained
2566  using the `tf.gather` operation.  For example:
2567    ```python
2568    selected_indices = tf.image.non_max_suppression_overlaps(
2569        overlaps, scores, max_output_size, iou_threshold)
2570    selected_boxes = tf.gather(boxes, selected_indices)
2571    ```
2573  Args:
2574    overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]`.
2575    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
2576      score corresponding to each box (each row of boxes).
2577    max_output_size: A scalar integer `Tensor` representing the maximum number
2578      of boxes to be selected by non max suppression.
2579    overlap_threshold: A float representing the threshold for deciding whether
2580      boxes overlap too much with respect to the provided overlap values.
2581    score_threshold: A float representing the threshold for deciding when to
2582      remove boxes based on score.
2583    name: A name for the operation (optional).
2585  Returns:
2586    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
2587      selected indices from the overlaps tensor, where `M <= max_output_size`.
2588  """
2589  with ops.name_scope(name, 'non_max_suppression_overlaps'):
2590    overlap_threshold = ops.convert_to_tensor(
2591        overlap_threshold, name='overlap_threshold')
2592    # pylint: disable=protected-access
2593    return gen_image_ops.non_max_suppression_with_overlaps(
2594        overlaps, scores, max_output_size, overlap_threshold, score_threshold)
2595    # pylint: enable=protected-access
2598_rgb_to_yiq_kernel = [[0.299, 0.59590059,
2599                       0.2115], [0.587, -0.27455667, -0.52273617],
2600                      [0.114, -0.32134392, 0.31119955]]
2604def rgb_to_yiq(images):
2605  """Converts one or more images from RGB to YIQ.
2607  Outputs a tensor of the same shape as the `images` tensor, containing the YIQ
2608  value of the pixels.
2609  The output is only well defined if the value in images are in [0,1].
2611  Args:
2612    images: 2-D or higher rank. Image data to convert. Last dimension must be
2613    size 3.
2615  Returns:
2616    images: tensor with the same shape as `images`.
2617  """
2618  images = ops.convert_to_tensor(images, name='images')
2619  kernel = ops.convert_to_tensor(
2620      _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel')
2621  ndims = images.get_shape().ndims
2622  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
2625_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021],
2626                      [0.6208248, -0.64720424, 1.70423049]]
2630def yiq_to_rgb(images):
2631  """Converts one or more images from YIQ to RGB.
2633  Outputs a tensor of the same shape as the `images` tensor, containing the RGB
2634  value of the pixels.
2635  The output is only well defined if the Y value in images are in [0,1],
2636  I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226].
2638  Args:
2639    images: 2-D or higher rank. Image data to convert. Last dimension must be
2640    size 3.
2642  Returns:
2643    images: tensor with the same shape as `images`.
2644  """
2645  images = ops.convert_to_tensor(images, name='images')
2646  kernel = ops.convert_to_tensor(
2647      _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel')
2648  ndims = images.get_shape().ndims
2649  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
2652_rgb_to_yuv_kernel = [[0.299, -0.14714119,
2653                       0.61497538], [0.587, -0.28886916, -0.51496512],
2654                      [0.114, 0.43601035, -0.10001026]]
2658def rgb_to_yuv(images):
2659  """Converts one or more images from RGB to YUV.
2661  Outputs a tensor of the same shape as the `images` tensor, containing the YUV
2662  value of the pixels.
2663  The output is only well defined if the value in images are in [0,1].
2665  Args:
2666    images: 2-D or higher rank. Image data to convert. Last dimension must be
2667    size 3.
2669  Returns:
2670    images: tensor with the same shape as `images`.
2671  """
2672  images = ops.convert_to_tensor(images, name='images')
2673  kernel = ops.convert_to_tensor(
2674      _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel')
2675  ndims = images.get_shape().ndims
2676  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
2679_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185],
2680                      [1.13988303, -0.58062185, 0]]
2684def yuv_to_rgb(images):
2685  """Converts one or more images from YUV to RGB.
2687  Outputs a tensor of the same shape as the `images` tensor, containing the RGB
2688  value of the pixels.
2689  The output is only well defined if the Y value in images are in [0,1],
2690  U and V value are in [-0.5,0.5].
2692  Args:
2693    images: 2-D or higher rank. Image data to convert. Last dimension must be
2694    size 3.
2696  Returns:
2697    images: tensor with the same shape as `images`.
2698  """
2699  images = ops.convert_to_tensor(images, name='images')
2700  kernel = ops.convert_to_tensor(
2701      _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel')
2702  ndims = images.get_shape().ndims
2703  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
2706def _verify_compatible_image_shapes(img1, img2):
2707  """Checks if two image tensors are compatible for applying SSIM or PSNR.
2709  This function checks if two sets of images have ranks at least 3, and if the
2710  last three dimensions match.
2712  Args:
2713    img1: Tensor containing the first image batch.
2714    img2: Tensor containing the second image batch.
2716  Returns:
2717    A tuple containing: the first tensor shape, the second tensor shape, and a
2718    list of control_flow_ops.Assert() ops implementing the checks.
2720  Raises:
2721    ValueError: When static shape check fails.
2722  """
2723  shape1 = img1.get_shape().with_rank_at_least(3)
2724  shape2 = img2.get_shape().with_rank_at_least(3)
2725  shape1[-3:].assert_is_compatible_with(shape2[-3:])
2727  if shape1.ndims is not None and shape2.ndims is not None:
2728    for dim1, dim2 in zip(reversed(shape1.dims[:-3]),
2729                          reversed(shape2.dims[:-3])):
2730      if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)):
2731        raise ValueError(
2732            'Two images are not compatible: %s and %s' % (shape1, shape2))
2734  # Now assign shape tensors.
2735  shape1, shape2 = array_ops.shape_n([img1, img2])
2737  # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable.
2738  checks = []
2739  checks.append(control_flow_ops.Assert(
2740      math_ops.greater_equal(array_ops.size(shape1), 3),
2741      [shape1, shape2], summarize=10))
2742  checks.append(control_flow_ops.Assert(
2743      math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])),
2744      [shape1, shape2], summarize=10))
2745  return shape1, shape2, checks
2749def psnr(a, b, max_val, name=None):
2750  """Returns the Peak Signal-to-Noise Ratio between a and b.
2752  This is intended to be used on signals (or images). Produces a PSNR value for
2753  each image in batch.
2755  The last three dimensions of input are expected to be [height, width, depth].
2757  Example:
2759  ```python
2760      # Read images from file.
2761      im1 = tf.decode_png('path/to/im1.png')
2762      im2 = tf.decode_png('path/to/im2.png')
2763      # Compute PSNR over tf.uint8 Tensors.
2764      psnr1 = tf.image.psnr(im1, im2, max_val=255)
2766      # Compute PSNR over tf.float32 Tensors.
2767      im1 = tf.image.convert_image_dtype(im1, tf.float32)
2768      im2 = tf.image.convert_image_dtype(im2, tf.float32)
2769      psnr2 = tf.image.psnr(im1, im2, max_val=1.0)
2770      # psnr1 and psnr2 both have type tf.float32 and are almost equal.
2771  ```
2773  Arguments:
2774    a: First set of images.
2775    b: Second set of images.
2776    max_val: The dynamic range of the images (i.e., the difference between the
2777      maximum the and minimum allowed values).
2778    name: Namespace to embed the computation in.
2780  Returns:
2781    The scalar PSNR between a and b. The returned tensor has type `tf.float32`
2782    and shape [batch_size, 1].
2783  """
2784  with ops.name_scope(name, 'PSNR', [a, b]):
2785    # Need to convert the images to float32.  Scale max_val accordingly so that
2786    # PSNR is computed correctly.
2787    max_val = math_ops.cast(max_val, a.dtype)
2788    max_val = convert_image_dtype(max_val, dtypes.float32)
2789    a = convert_image_dtype(a, dtypes.float32)
2790    b = convert_image_dtype(b, dtypes.float32)
2791    mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1])
2792    psnr_val = math_ops.subtract(
2793        20 * math_ops.log(max_val) / math_ops.log(10.0),
2794        np.float32(10 / np.log(10)) * math_ops.log(mse),
2795        name='psnr')
2797    _, _, checks = _verify_compatible_image_shapes(a, b)
2798    with ops.control_dependencies(checks):
2799      return array_ops.identity(psnr_val)
2801_SSIM_K1 = 0.01
2802_SSIM_K2 = 0.03
2805def _ssim_helper(x, y, reducer, max_val, compensation=1.0):
2806  r"""Helper function for computing SSIM.
2808  SSIM estimates covariances with weighted sums.  The default parameters
2809  use a biased estimate of the covariance:
2810  Suppose `reducer` is a weighted sum, then the mean estimators are
2811    \mu_x = \sum_i w_i x_i,
2812    \mu_y = \sum_i w_i y_i,
2813  where w_i's are the weighted-sum weights, and covariance estimator is
2814    cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
2815  with assumption \sum_i w_i = 1. This covariance estimator is biased, since
2816    E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y).
2817  For SSIM measure with unbiased covariance estimators, pass as `compensation`
2818  argument (1 - \sum_i w_i ^ 2).
2820  Arguments:
2821    x: First set of images.
2822    y: Second set of images.
2823    reducer: Function that computes 'local' averages from set of images.
2824      For non-covolutional version, this is usually tf.reduce_mean(x, [1, 2]),
2825      and for convolutional version, this is usually tf.nn.avg_pool or
2826      tf.nn.conv2d with weighted-sum kernel.
2827    max_val: The dynamic range (i.e., the difference between the maximum
2828      possible allowed value and the minimum allowed value).
2829    compensation: Compensation factor. See above.
2831  Returns:
2832    A pair containing the luminance measure, and the contrast-structure measure.
2833  """
2834  c1 = (_SSIM_K1 * max_val) ** 2
2835  c2 = (_SSIM_K2 * max_val) ** 2
2837  # SSIM luminance measure is
2838  # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1).
2839  mean0 = reducer(x)
2840  mean1 = reducer(y)
2841  num0 = mean0 * mean1 * 2.0
2842  den0 = math_ops.square(mean0) + math_ops.square(mean1)
2843  luminance = (num0 + c1) / (den0 + c1)
2845  # SSIM contrast-structure measure is
2846  #   (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2).
2847  # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then
2848  #   cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
2849  #          = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j).
2850  num1 = reducer(x * y) * 2.0
2851  den1 = reducer(math_ops.square(x) + math_ops.square(y))
2852  c2 *= compensation
2853  cs = (num1 - num0 + c2) / (den1 - den0 + c2)
2855  # SSIM score is the product of the luminance and contrast-structure measures.
2856  return luminance, cs
2859def _fspecial_gauss(size, sigma):
2860  """Function to mimic the 'fspecial' gaussian MATLAB function."""
2861  size = ops.convert_to_tensor(size, dtypes.int32)
2862  sigma = ops.convert_to_tensor(sigma)
2864  coords = math_ops.cast(math_ops.range(size), sigma.dtype)
2865  coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0
2867  g = math_ops.square(coords)
2868  g *= -0.5 / math_ops.square(sigma)
2870  g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1])
2871  g = array_ops.reshape(g, shape=[1, -1])  # For tf.nn.softmax().
2872  g = nn_ops.softmax(g)
2873  return array_ops.reshape(g, shape=[size, size, 1, 1])
2876def _ssim_per_channel(img1, img2, max_val=1.0):
2877  """Computes SSIM index between img1 and img2 per color channel.
2879  This function matches the standard SSIM implementation from:
2880  Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
2881  quality assessment: from error visibility to structural similarity. IEEE
2882  transactions on image processing.
2884  Details:
2885    - 11x11 Gaussian filter of width 1.5 is used.
2886    - k1 = 0.01, k2 = 0.03 as in the original paper.
2888  Args:
2889    img1: First image batch.
2890    img2: Second image batch.
2891    max_val: The dynamic range of the images (i.e., the difference between the
2892      maximum the and minimum allowed values).
2894  Returns:
2895    A pair of tensors containing and channel-wise SSIM and contrast-structure
2896    values. The shape is [..., channels].
2897  """
2898  filter_size = constant_op.constant(11, dtype=dtypes.int32)
2899  filter_sigma = constant_op.constant(1.5, dtype=img1.dtype)
2901  shape1, shape2 = array_ops.shape_n([img1, img2])
2902  checks = [
2903      control_flow_ops.Assert(math_ops.reduce_all(math_ops.greater_equal(
2904          shape1[-3:-1], filter_size)), [shape1, filter_size], summarize=8),
2905      control_flow_ops.Assert(math_ops.reduce_all(math_ops.greater_equal(
2906          shape2[-3:-1], filter_size)), [shape2, filter_size], summarize=8)]
2908  # Enforce the check to run before computation.
2909  with ops.control_dependencies(checks):
2910    img1 = array_ops.identity(img1)
2912  # TODO(sjhwang): Try to cache kernels and compensation factor.
2913  kernel = _fspecial_gauss(filter_size, filter_sigma)
2914  kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1])
2916  # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`,
2917  # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead.
2918  compensation = 1.0
2920  # TODO(sjhwang): Try FFT.
2921  # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying
2922  #   1-by-n and n-by-1 Gaussain filters instead of an n-by-n filter.
2923  def reducer(x):
2924    shape = array_ops.shape(x)
2925    x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0))
2926    y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')
2927    return array_ops.reshape(y, array_ops.concat([shape[:-3],
2928                                                  array_ops.shape(y)[1:]], 0))
2930  luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation)
2932  # Average over the second and the third from the last: height, width.
2933  axes = constant_op.constant([-3, -2], dtype=dtypes.int32)
2934  ssim_val = math_ops.reduce_mean(luminance * cs, axes)
2935  cs = math_ops.reduce_mean(cs, axes)
2936  return ssim_val, cs
2940def ssim(img1, img2, max_val):
2941  """Computes SSIM index between img1 and img2.
2943  This function is based on the standard SSIM implementation from:
2944  Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
2945  quality assessment: from error visibility to structural similarity. IEEE
2946  transactions on image processing.
2948  Note: The true SSIM is only defined on grayscale.  This function does not
2949  perform any colorspace transform.  (If input is already YUV, then it will
2950  compute YUV SSIM average.)
2952  Details:
2953    - 11x11 Gaussian filter of width 1.5 is used.
2954    - k1 = 0.01, k2 = 0.03 as in the original paper.
2956  The image sizes must be at least 11x11 because of the filter size.
2958  Example:
2960  ```python
2961      # Read images from file.
2962      im1 = tf.decode_png('path/to/im1.png')
2963      im2 = tf.decode_png('path/to/im2.png')
2964      # Compute SSIM over tf.uint8 Tensors.
2965      ssim1 = tf.image.ssim(im1, im2, max_val=255)
2967      # Compute SSIM over tf.float32 Tensors.
2968      im1 = tf.image.convert_image_dtype(im1, tf.float32)
2969      im2 = tf.image.convert_image_dtype(im2, tf.float32)
2970      ssim2 = tf.image.ssim(im1, im2, max_val=1.0)
2971      # ssim1 and ssim2 both have type tf.float32 and are almost equal.
2972  ```
2974  Args:
2975    img1: First image batch.
2976    img2: Second image batch.
2977    max_val: The dynamic range of the images (i.e., the difference between the
2978      maximum the and minimum allowed values).
2980  Returns:
2981    A tensor containing an SSIM value for each image in batch.  Returned SSIM
2982    values are in range (-1, 1], when pixel values are non-negative. Returns
2983    a tensor with shape: broadcast(img1.shape[:-3], img2.shape[:-3]).
2984  """
2985  _, _, checks = _verify_compatible_image_shapes(img1, img2)
2986  with ops.control_dependencies(checks):
2987    img1 = array_ops.identity(img1)
2989  # Need to convert the images to float32.  Scale max_val accordingly so that
2990  # SSIM is computed correctly.
2991  max_val = math_ops.cast(max_val, img1.dtype)
2992  max_val = convert_image_dtype(max_val, dtypes.float32)
2993  img1 = convert_image_dtype(img1, dtypes.float32)
2994  img2 = convert_image_dtype(img2, dtypes.float32)
2995  ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val)
2996  # Compute average over color channels.
2997  return math_ops.reduce_mean(ssim_per_channel, [-1])
3000# Default values obtained by Wang et al.
3001_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
3005def ssim_multiscale(img1, img2, max_val, power_factors=_MSSSIM_WEIGHTS):
3006  """Computes the MS-SSIM between img1 and img2.
3008  This function assumes that `img1` and `img2` are image batches, i.e. the last
3009  three dimensions are [height, width, channels].
3011  Note: The true SSIM is only defined on grayscale.  This function does not
3012  perform any colorspace transform.  (If input is already YUV, then it will
3013  compute YUV SSIM average.)
3015  Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale
3016  structural similarity for image quality assessment." Signals, Systems and
3017  Computers, 2004.
3019  Arguments:
3020    img1: First image batch.
3021    img2: Second image batch. Must have the same rank as img1.
3022    max_val: The dynamic range of the images (i.e., the difference between the
3023      maximum the and minimum allowed values).
3024    power_factors: Iterable of weights for each of the scales. The number of
3025      scales used is the length of the list. Index 0 is the unscaled
3026      resolution's weight and each increasing scale corresponds to the image
3027      being downsampled by 2.  Defaults to (0.0448, 0.2856, 0.3001, 0.2363,
3028      0.1333), which are the values obtained in the original paper.
3030  Returns:
3031    A tensor containing an MS-SSIM value for each image in batch.  The values
3032    are in range [0, 1].  Returns a tensor with shape:
3033    broadcast(img1.shape[:-3], img2.shape[:-3]).
3034  """
3035  # Shape checking.
3036  shape1 = img1.get_shape().with_rank_at_least(3)
3037  shape2 = img2.get_shape().with_rank_at_least(3)
3038  shape1[-3:].merge_with(shape2[-3:])
3040  with ops.name_scope(None, 'MS-SSIM', [img1, img2]):
3041    shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2)
3042    with ops.control_dependencies(checks):
3043      img1 = array_ops.identity(img1)
3045    # Need to convert the images to float32.  Scale max_val accordingly so that
3046    # SSIM is computed correctly.
3047    max_val = math_ops.cast(max_val, img1.dtype)
3048    max_val = convert_image_dtype(max_val, dtypes.float32)
3049    img1 = convert_image_dtype(img1, dtypes.float32)
3050    img2 = convert_image_dtype(img2, dtypes.float32)
3052    imgs = [img1, img2]
3053    shapes = [shape1, shape2]
3055    # img1 and img2 are assumed to be a (multi-dimensional) batch of
3056    # 3-dimensional images (height, width, channels). `heads` contain the batch
3057    # dimensions, and `tails` contain the image dimensions.
3058    heads = [s[:-3] for s in shapes]
3059    tails = [s[-3:] for s in shapes]
3061    divisor = [1, 2, 2, 1]
3062    divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32)
3064    def do_pad(images, remainder):
3065      padding = array_ops.expand_dims(remainder, -1)
3066      padding = array_ops.pad(padding, [[1, 0], [1, 0]])
3067      return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images]
3069    mcs = []
3070    for k in range(len(power_factors)):
3071      with ops.name_scope(None, 'Scale%d' % k, imgs):
3072        if k > 0:
3073          # Avg pool takes rank 4 tensors. Flatten leading dimensions.
3074          flat_imgs = [
3075              array_ops.reshape(x, array_ops.concat([[-1], t], 0))
3076              for x, t in zip(imgs, tails)
3077          ]
3079          remainder = tails[0] % divisor_tensor
3080          need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0))
3081          # pylint: disable=cell-var-from-loop
3082          padded = control_flow_ops.cond(need_padding,
3083                                         lambda: do_pad(flat_imgs, remainder),
3084                                         lambda: flat_imgs)
3085          # pylint: enable=cell-var-from-loop
3087          downscaled = [nn_ops.avg_pool(x, ksize=divisor, strides=divisor,
3088                                        padding='VALID')
3089                        for x in padded]
3090          tails = [x[1:] for x in array_ops.shape_n(downscaled)]
3091          imgs = [
3092              array_ops.reshape(x, array_ops.concat([h, t], 0))
3093              for x, h, t in zip(downscaled, heads, tails)
3094          ]
3096        # Overwrite previous ssim value since we only need the last one.
3097        ssim_per_channel, cs = _ssim_per_channel(*imgs, max_val=max_val)
3098        mcs.append(nn_ops.relu(cs))
3100    # Remove the cs score for the last scale. In the MS-SSIM calculation,
3101    # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p).
3102    mcs.pop()  # Remove the cs score for the last scale.
3103    mcs_and_ssim = array_ops.stack(mcs + [nn_ops.relu(ssim_per_channel)],
3104                                   axis=-1)
3105    # Take weighted geometric mean across the scale axis.
3106    ms_ssim = math_ops.reduce_prod(math_ops.pow(mcs_and_ssim, power_factors),
3107                                   [-1])
3109    return math_ops.reduce_mean(ms_ssim, [-1])  # Avg over color channels.
3113def image_gradients(image):
3114  """Returns image gradients (dy, dx) for each color channel.
3116  Both output tensors have the same shape as the input: [batch_size, h, w,
3117  d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in
3118  location (x, y). That means that dy will always have zeros in the last row,
3119  and dx will always have zeros in the last column.
3121  Arguments:
3122    image: Tensor with shape [batch_size, h, w, d].
3124  Returns:
3125    Pair of tensors (dy, dx) holding the vertical and horizontal image
3126    gradients (1-step finite difference).
3128  Raises:
3129    ValueError: If `image` is not a 4D tensor.
3130  """
3131  if image.get_shape().ndims != 4:
3132    raise ValueError('image_gradients expects a 4D tensor '
3133                     '[batch_size, h, w, d], not %s.', image.get_shape())
3134  image_shape = array_ops.shape(image)
3135  batch_size, height, width, depth = array_ops.unstack(image_shape)
3136  dy = image[:, 1:, :, :] - image[:, :-1, :, :]
3137  dx = image[:, :, 1:, :] - image[:, :, :-1, :]
3139  # Return tensors with same size as original image by concatenating
3140  # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y).
3141  shape = array_ops.stack([batch_size, 1, width, depth])
3142  dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1)
3143  dy = array_ops.reshape(dy, image_shape)
3145  shape = array_ops.stack([batch_size, height, 1, depth])
3146  dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2)
3147  dx = array_ops.reshape(dx, image_shape)
3149  return dy, dx
3153def sobel_edges(image):
3154  """Returns a tensor holding Sobel edge maps.
3156  Arguments:
3157    image: Image tensor with shape [batch_size, h, w, d] and type float32 or
3158    float64.  The image(s) must be 2x2 or larger.
3160  Returns:
3161    Tensor holding edge maps for each channel. Returns a tensor with shape
3162    [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]],
3163    [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter.
3164  """
3165  # Define vertical and horizontal Sobel filters.
3166  static_image_shape = image.get_shape()
3167  image_shape = array_ops.shape(image)
3168  kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]],
3169             [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]]
3170  num_kernels = len(kernels)
3171  kernels = np.transpose(np.asarray(kernels), (1, 2, 0))
3172  kernels = np.expand_dims(kernels, -2)
3173  kernels_tf = constant_op.constant(kernels, dtype=image.dtype)
3175  kernels_tf = array_ops.tile(kernels_tf, [1, 1, image_shape[-1], 1],
3176                              name='sobel_filters')
3178  # Use depth-wise convolution to calculate edge maps per channel.
3179  pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]]
3180  padded = array_ops.pad(image, pad_sizes, mode='REFLECT')
3182  # Output tensor has shape [batch_size, h, w, d * num_kernels].
3183  strides = [1, 1, 1, 1]
3184  output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID')
3186  # Reshape to [batch_size, h, w, d, num_kernels].
3187  shape = array_ops.concat([image_shape, [num_kernels]], 0)
3188  output = array_ops.reshape(output, shape=shape)
3189  output.set_shape(static_image_shape.concatenate([num_kernels]))
3190  return output
3193def resize_bicubic(images,
3194                   size,
3195                   align_corners=False,
3196                   name=None,
3197                   half_pixel_centers=False):
3198  return gen_image_ops.resize_bicubic(
3199      images=images,
3200      size=size,
3201      align_corners=align_corners,
3202      half_pixel_centers=half_pixel_centers,
3203      name=name)
3206def resize_bilinear(images,
3207                    size,
3208                    align_corners=False,
3209                    name=None,
3210                    half_pixel_centers=False):
3211  return gen_image_ops.resize_bilinear(
3212      images=images,
3213      size=size,
3214      align_corners=align_corners,
3215      half_pixel_centers=half_pixel_centers,
3216      name=name)
3219def resize_nearest_neighbor(images,
3220                            size,
3221                            align_corners=False,
3222                            name=None,
3223                            half_pixel_centers=False):
3224  return gen_image_ops.resize_nearest_neighbor(
3225      images=images,
3226      size=size,
3227      align_corners=align_corners,
3228      half_pixel_centers=half_pixel_centers,
3229      name=name)
3232resize_area_deprecation = deprecation.deprecated(
3233    date=None,
3234    instructions=(
3235        'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.'))
3237    resize_area_deprecation(gen_image_ops.resize_area))
3239resize_bicubic_deprecation = deprecation.deprecated(
3240    date=None,
3241    instructions=(
3242        'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.'))
3244    resize_bicubic_deprecation(resize_bicubic))
3246resize_bilinear_deprecation = deprecation.deprecated(
3247    date=None,
3248    instructions=(
3249        'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.'))
3251    resize_bilinear_deprecation(resize_bilinear))
3253resize_nearest_neighbor_deprecation = deprecation.deprecated(
3254    date=None,
3255    instructions=(
3256        'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` '
3257        'instead.'))
3259    resize_nearest_neighbor_deprecation(resize_nearest_neighbor))
3262@tf_export('image.crop_and_resize', v1=[])
3263def crop_and_resize_v2(
3264    image,
3265    boxes,
3266    box_indices,
3267    crop_size,
3268    method='bilinear',
3269    extrapolation_value=0,
3270    name=None):
3271  """Extracts crops from the input image tensor and resizes them.
3273  Extracts crops from the input image tensor and resizes them using bilinear
3274  sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
3275  common output size specified by `crop_size`. This is more general than the
3276  `crop_to_bounding_box` op which extracts a fixed size slice from the input
3277  image and does not allow resizing or aspect ratio change.
3279  Returns a tensor with `crops` from the input `image` at positions defined at
3280  the bounding box locations in `boxes`. The cropped boxes are all resized (with
3281  bilinear or nearest neighbor interpolation) to a fixed
3282  `size = [crop_height, crop_width]`. The result is a 4-D tensor
3283  `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
3284  In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
3285  results to using `tf.image.resize_bilinear()` or
3286  `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with
3287  `align_corners=True`.
3289  Args:
3290    image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
3291      Both `image_height` and `image_width` need to be positive.
3292    boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
3293      specifies the coordinates of a box in the `box_ind[i]` image and is
3294      specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
3295      coordinate value of `y` is mapped to the image coordinate at `y *
3296      (image_height - 1)`, so as the `[0, 1]` interval of normalized image
3297      height is mapped to `[0, image_height - 1]` in image height coordinates.
3298      We do allow `y1` > `y2`, in which case the sampled crop is an up-down
3299      flipped version of the original image. The width dimension is treated
3300      similarly. Normalized coordinates outside the `[0, 1]` range are allowed,
3301      in which case we use `extrapolation_value` to extrapolate the input image
3302      values.
3303    box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0,
3304      batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box
3305      refers to.
3306    crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`.
3307      All cropped image patches are resized to this size. The aspect ratio of
3308      the image content is not preserved. Both `crop_height` and `crop_width`
3309      need to be positive.
3310    method: An optional string specifying the sampling method for resizing. It
3311      can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`.
3312      Currently two sampling methods are supported: Bilinear and Nearest
3313      Neighbor.
3314    extrapolation_value: An optional `float`. Defaults to `0`. Value used for
3315      extrapolation, when applicable.
3316    name: A name for the operation (optional).
3318  Returns:
3319    A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
3320  """
3321  return gen_image_ops.crop_and_resize(
3322      image, boxes, box_indices, crop_size, method, extrapolation_value, name)
3327    None, 'box_ind is deprecated, use box_indices instead', 'box_ind')
3328def crop_and_resize_v1(   # pylint: disable=missing-docstring
3329    image,
3330    boxes,
3331    box_ind=None,
3332    crop_size=None,
3333    method='bilinear',
3334    extrapolation_value=0,
3335    name=None,
3336    box_indices=None):
3337  box_ind = deprecation.deprecated_argument_lookup(
3338      "box_indices", box_indices, "box_ind", box_ind)
3339  return gen_image_ops.crop_and_resize(
3340      image, boxes, box_ind, crop_size, method, extrapolation_value, name)
3342crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__
3346def extract_glimpse(
3347    input,  # pylint: disable=redefined-builtin
3348    size,
3349    offsets,
3350    centered=True,
3351    normalized=True,
3352    uniform_noise=True,
3353    name=None):
3354  """Extracts a glimpse from the input tensor.
3356  Returns a set of windows called glimpses extracted at location
3357  `offsets` from the input tensor. If the windows only partially
3358  overlaps the inputs, the non overlapping areas will be filled with
3359  random noise.
3361  The result is a 4-D tensor of shape `[batch_size, glimpse_height,
3362  glimpse_width, channels]`. The channels and batch dimensions are the
3363  same as that of the input tensor. The height and width of the output
3364  windows are specified in the `size` parameter.
3366  The argument `normalized` and `centered` controls how the windows are built:
3368  * If the coordinates are normalized but not centered, 0.0 and 1.0
3369    correspond to the minimum and maximum of each height and width
3370    dimension.
3371  * If the coordinates are both normalized and centered, they range from
3372    -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
3373    left corner, the lower right corner is located at (1.0, 1.0) and the
3374    center is at (0, 0).
3375  * If the coordinates are not normalized they are interpreted as
3376    numbers of pixels.
3378  Args:
3379    input: A `Tensor` of type `float32`. A 4-D float tensor of shape
3380      `[batch_size, height, width, channels]`.
3381    size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
3382      size of the glimpses to extract.  The glimpse height must be specified
3383      first, following by the glimpse width.
3384    offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
3385      `[batch_size, 2]` containing the y, x locations of the center of each
3386      window.
3387    centered: An optional `bool`. Defaults to `True`. indicates if the offset
3388      coordinates are centered relative to the image, in which case the (0, 0)
3389      offset is relative to the center of the input images. If false, the (0,0)
3390      offset corresponds to the upper left corner of the input images.
3391    normalized: An optional `bool`. Defaults to `True`. indicates if the offset
3392      coordinates are normalized.
3393    uniform_noise: An optional `bool`. Defaults to `True`. indicates if the
3394      noise should be generated using a uniform distribution or a Gaussian
3395      distribution.
3396    name: A name for the operation (optional).
3398  Returns:
3399    A `Tensor` of type `float32`.
3400  """
3401  return gen_image_ops.extract_glimpse(
3402      input=input,
3403      size=size,
3404      offsets=offsets,
3405      centered=centered,
3406      normalized=normalized,
3407      uniform_noise=uniform_noise,
3408      name=name)
3411@tf_export('image.extract_glimpse', v1=[])
3412def extract_glimpse_v2(
3413    input,  # pylint: disable=redefined-builtin
3414    size,
3415    offsets,
3416    centered=True,
3417    normalized=True,
3418    noise='uniform',
3419    name=None):
3420  """Extracts a glimpse from the input tensor.
3422  Returns a set of windows called glimpses extracted at location
3423  `offsets` from the input tensor. If the windows only partially
3424  overlaps the inputs, the non overlapping areas will be filled with
3425  random noise.
3427  The result is a 4-D tensor of shape `[batch_size, glimpse_height,
3428  glimpse_width, channels]`. The channels and batch dimensions are the
3429  same as that of the input tensor. The height and width of the output
3430  windows are specified in the `size` parameter.
3432  The argument `normalized` and `centered` controls how the windows are built:
3434  * If the coordinates are normalized but not centered, 0.0 and 1.0
3435    correspond to the minimum and maximum of each height and width
3436    dimension.
3437  * If the coordinates are both normalized and centered, they range from
3438    -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
3439    left corner, the lower right corner is located at (1.0, 1.0) and the
3440    center is at (0, 0).
3441  * If the coordinates are not normalized they are interpreted as
3442    numbers of pixels.
3444  Args:
3445    input: A `Tensor` of type `float32`. A 4-D float tensor of shape
3446      `[batch_size, height, width, channels]`.
3447    size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
3448      size of the glimpses to extract.  The glimpse height must be specified
3449      first, following by the glimpse width.
3450    offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
3451      `[batch_size, 2]` containing the y, x locations of the center of each
3452      window.
3453    centered: An optional `bool`. Defaults to `True`. indicates if the offset
3454      coordinates are centered relative to the image, in which case the (0, 0)
3455      offset is relative to the center of the input images. If false, the (0,0)
3456      offset corresponds to the upper left corner of the input images.
3457    normalized: An optional `bool`. Defaults to `True`. indicates if the offset
3458      coordinates are normalized.
3459    noise: An optional `string`. Defaults to `uniform`. indicates if the noise
3460      should be `uniform` (uniform distribution), `gaussian` (gaussian
3461      distribution), or `zero` (zero padding).
3462    name: A name for the operation (optional).
3464  Returns:
3465    A `Tensor` of type `float32`.
3466  """
3467  return gen_image_ops.extract_glimpse(
3468      input=input,
3469      size=size,
3470      offsets=offsets,
3471      centered=centered,
3472      normalized=normalized,
3473      noise=noise,
3474      uniform_noise=False,
3475      name=name)
3479def combined_non_max_suppression(boxes,
3480                                 scores,
3481                                 max_output_size_per_class,
3482                                 max_total_size,
3483                                 iou_threshold=0.5,
3484                                 score_threshold=float('-inf'),
3485                                 pad_per_class=False,
3486                                 name=None):
3487  """Greedily selects a subset of bounding boxes in descending order of score.
3489  This operation performs non_max_suppression on the inputs per batch, across
3490  all classes.
3491  Prunes away boxes that have high intersection-over-union (IOU) overlap
3492  with previously selected boxes.  Bounding boxes are supplied as
3493  [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
3494  diagonal pair of box corners and the coordinates can be provided as normalized
3495  (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
3496  is agnostic to where the origin is in the coordinate system. Also note that
3497  this algorithm is invariant to orthogonal transformations and translations
3498  of the coordinate system; thus translating or reflections of the coordinate
3499  system result in the same boxes being selected by the algorithm.
3500  The output of this operation is the final boxes, scores and classes tensor
3501  returned after performing non_max_suppression.
3503  Args:
3504    boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q`
3505      is 1 then same boxes are used for all classes otherwise, if `q` is equal
3506      to number of classes, class-specific boxes are used.
3507    scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]`
3508      representing a single score corresponding to each box (each row of boxes).
3509    max_output_size_per_class: A scalar integer `Tensor` representing the
3510      maximum number of boxes to be selected by non max suppression per class
3511    max_total_size: A scalar representing maximum number of boxes retained over
3512      all classes.
3513    iou_threshold: A float representing the threshold for deciding whether boxes
3514      overlap too much with respect to IOU.
3515    score_threshold: A float representing the threshold for deciding when to
3516      remove boxes based on score.
3517    pad_per_class: If false, the output nmsed boxes, scores and classes are
3518      padded/clipped to `max_total_size`. If true, the output nmsed boxes,
3519      scores and classes are padded to be of length
3520      `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in
3521      which case it is clipped to `max_total_size`. Defaults to false.
3522    name: A name for the operation (optional).
3524  Returns:
3525    'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
3526      containing the non-max suppressed boxes.
3527    'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
3528      the scores for the boxes.
3529    'nmsed_classes': A [batch_size, max_detections] float32 tensor
3530      containing the class for boxes.
3531    'valid_detections': A [batch_size] int32 tensor indicating the number of
3532      valid detections per batch item. Only the top valid_detections[i] entries
3533      in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
3534      entries are zero paddings.
3535  """
3536  with ops.name_scope(name, 'combined_non_max_suppression'):
3537    iou_threshold = ops.convert_to_tensor(
3538        iou_threshold, dtype=dtypes.float32, name='iou_threshold')
3539    score_threshold = ops.convert_to_tensor(
3540        score_threshold, dtype=dtypes.float32, name='score_threshold')
3541    return gen_image_ops.combined_non_max_suppression(
3542        boxes, scores, max_output_size_per_class, max_total_size, iou_threshold,
3543        score_threshold, pad_per_class)