• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Utilities for ImageNet data preprocessing & prediction decoding."""
16
17import json
18import warnings
19
20import numpy as np
21
22from tensorflow.python.keras import activations
23from tensorflow.python.keras import backend
24from tensorflow.python.keras.utils import data_utils
25from tensorflow.python.util.tf_export import keras_export
26
27
28CLASS_INDEX = None
29CLASS_INDEX_PATH = ('https://storage.googleapis.com/download.tensorflow.org/'
30                    'data/imagenet_class_index.json')
31
32
33PREPROCESS_INPUT_DOC = """
34  Preprocesses a tensor or Numpy array encoding a batch of images.
35
36  Usage example with `applications.MobileNet`:
37
38  ```python
39  i = tf.keras.layers.Input([None, None, 3], dtype = tf.uint8)
40  x = tf.cast(i, tf.float32)
41  x = tf.keras.applications.mobilenet.preprocess_input(x)
42  core = tf.keras.applications.MobileNet()
43  x = core(x)
44  model = tf.keras.Model(inputs=[i], outputs=[x])
45
46  image = tf.image.decode_png(tf.io.read_file('file.png'))
47  result = model(image)
48  ```
49
50  Args:
51    x: A floating point `numpy.array` or a `tf.Tensor`, 3D or 4D with 3 color
52      channels, with values in the range [0, 255].
53      The preprocessed data are written over the input data
54      if the data types are compatible. To avoid this
55      behaviour, `numpy.copy(x)` can be used.
56    data_format: Optional data format of the image tensor/array. Defaults to
57      None, in which case the global setting
58      `tf.keras.backend.image_data_format()` is used (unless you changed it,
59      it defaults to "channels_last").{mode}
60
61  Returns:
62      Preprocessed `numpy.array` or a `tf.Tensor` with type `float32`.
63      {ret}
64
65  Raises:
66      {error}
67  """
68
69PREPROCESS_INPUT_MODE_DOC = """
70    mode: One of "caffe", "tf" or "torch". Defaults to "caffe".
71      - caffe: will convert the images from RGB to BGR,
72          then will zero-center each color channel with
73          respect to the ImageNet dataset,
74          without scaling.
75      - tf: will scale pixels between -1 and 1,
76          sample-wise.
77      - torch: will scale pixels between 0 and 1 and then
78          will normalize each channel with respect to the
79          ImageNet dataset.
80  """
81
82PREPROCESS_INPUT_DEFAULT_ERROR_DOC = """
83    ValueError: In case of unknown `mode` or `data_format` argument."""
84
85PREPROCESS_INPUT_ERROR_DOC = """
86    ValueError: In case of unknown `data_format` argument."""
87
88PREPROCESS_INPUT_RET_DOC_TF = """
89      The inputs pixel values are scaled between -1 and 1, sample-wise."""
90
91PREPROCESS_INPUT_RET_DOC_TORCH = """
92      The input pixels values are scaled between 0 and 1 and each channel is
93      normalized with respect to the ImageNet dataset."""
94
95PREPROCESS_INPUT_RET_DOC_CAFFE = """
96      The images are converted from RGB to BGR, then each color channel is
97      zero-centered with respect to the ImageNet dataset, without scaling."""
98
99
100@keras_export('keras.applications.imagenet_utils.preprocess_input')
101def preprocess_input(x, data_format=None, mode='caffe'):
102  """Preprocesses a tensor or Numpy array encoding a batch of images."""
103  if mode not in {'caffe', 'tf', 'torch'}:
104    raise ValueError('Unknown mode ' + str(mode))
105
106  if data_format is None:
107    data_format = backend.image_data_format()
108  elif data_format not in {'channels_first', 'channels_last'}:
109    raise ValueError('Unknown data_format ' + str(data_format))
110
111  if isinstance(x, np.ndarray):
112    return _preprocess_numpy_input(
113        x, data_format=data_format, mode=mode)
114  else:
115    return _preprocess_symbolic_input(
116        x, data_format=data_format, mode=mode)
117
118
119preprocess_input.__doc__ = PREPROCESS_INPUT_DOC.format(
120    mode=PREPROCESS_INPUT_MODE_DOC,
121    ret='',
122    error=PREPROCESS_INPUT_DEFAULT_ERROR_DOC)
123
124
125@keras_export('keras.applications.imagenet_utils.decode_predictions')
126def decode_predictions(preds, top=5):
127  """Decodes the prediction of an ImageNet model.
128
129  Args:
130    preds: Numpy array encoding a batch of predictions.
131    top: Integer, how many top-guesses to return. Defaults to 5.
132
133  Returns:
134    A list of lists of top class prediction tuples
135    `(class_name, class_description, score)`.
136    One list of tuples per sample in batch input.
137
138  Raises:
139    ValueError: In case of invalid shape of the `pred` array
140      (must be 2D).
141  """
142  global CLASS_INDEX
143
144  if len(preds.shape) != 2 or preds.shape[1] != 1000:
145    raise ValueError('`decode_predictions` expects '
146                     'a batch of predictions '
147                     '(i.e. a 2D array of shape (samples, 1000)). '
148                     'Found array with shape: ' + str(preds.shape))
149  if CLASS_INDEX is None:
150    fpath = data_utils.get_file(
151        'imagenet_class_index.json',
152        CLASS_INDEX_PATH,
153        cache_subdir='models',
154        file_hash='c2c37ea517e94d9795004a39431a14cb')
155    with open(fpath) as f:
156      CLASS_INDEX = json.load(f)
157  results = []
158  for pred in preds:
159    top_indices = pred.argsort()[-top:][::-1]
160    result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
161    result.sort(key=lambda x: x[2], reverse=True)
162    results.append(result)
163  return results
164
165
166def _preprocess_numpy_input(x, data_format, mode):
167  """Preprocesses a Numpy array encoding a batch of images.
168
169  Args:
170    x: Input array, 3D or 4D.
171    data_format: Data format of the image array.
172    mode: One of "caffe", "tf" or "torch".
173      - caffe: will convert the images from RGB to BGR,
174          then will zero-center each color channel with
175          respect to the ImageNet dataset,
176          without scaling.
177      - tf: will scale pixels between -1 and 1,
178          sample-wise.
179      - torch: will scale pixels between 0 and 1 and then
180          will normalize each channel with respect to the
181          ImageNet dataset.
182
183  Returns:
184      Preprocessed Numpy array.
185  """
186  if not issubclass(x.dtype.type, np.floating):
187    x = x.astype(backend.floatx(), copy=False)
188
189  if mode == 'tf':
190    x /= 127.5
191    x -= 1.
192    return x
193  elif mode == 'torch':
194    x /= 255.
195    mean = [0.485, 0.456, 0.406]
196    std = [0.229, 0.224, 0.225]
197  else:
198    if data_format == 'channels_first':
199      # 'RGB'->'BGR'
200      if x.ndim == 3:
201        x = x[::-1, ...]
202      else:
203        x = x[:, ::-1, ...]
204    else:
205      # 'RGB'->'BGR'
206      x = x[..., ::-1]
207    mean = [103.939, 116.779, 123.68]
208    std = None
209
210  # Zero-center by mean pixel
211  if data_format == 'channels_first':
212    if x.ndim == 3:
213      x[0, :, :] -= mean[0]
214      x[1, :, :] -= mean[1]
215      x[2, :, :] -= mean[2]
216      if std is not None:
217        x[0, :, :] /= std[0]
218        x[1, :, :] /= std[1]
219        x[2, :, :] /= std[2]
220    else:
221      x[:, 0, :, :] -= mean[0]
222      x[:, 1, :, :] -= mean[1]
223      x[:, 2, :, :] -= mean[2]
224      if std is not None:
225        x[:, 0, :, :] /= std[0]
226        x[:, 1, :, :] /= std[1]
227        x[:, 2, :, :] /= std[2]
228  else:
229    x[..., 0] -= mean[0]
230    x[..., 1] -= mean[1]
231    x[..., 2] -= mean[2]
232    if std is not None:
233      x[..., 0] /= std[0]
234      x[..., 1] /= std[1]
235      x[..., 2] /= std[2]
236  return x
237
238
239def _preprocess_symbolic_input(x, data_format, mode):
240  """Preprocesses a tensor encoding a batch of images.
241
242  Args:
243    x: Input tensor, 3D or 4D.
244    data_format: Data format of the image tensor.
245    mode: One of "caffe", "tf" or "torch".
246      - caffe: will convert the images from RGB to BGR,
247          then will zero-center each color channel with
248          respect to the ImageNet dataset,
249          without scaling.
250      - tf: will scale pixels between -1 and 1,
251          sample-wise.
252      - torch: will scale pixels between 0 and 1 and then
253          will normalize each channel with respect to the
254          ImageNet dataset.
255
256  Returns:
257      Preprocessed tensor.
258  """
259  if mode == 'tf':
260    x /= 127.5
261    x -= 1.
262    return x
263  elif mode == 'torch':
264    x /= 255.
265    mean = [0.485, 0.456, 0.406]
266    std = [0.229, 0.224, 0.225]
267  else:
268    if data_format == 'channels_first':
269      # 'RGB'->'BGR'
270      if backend.ndim(x) == 3:
271        x = x[::-1, ...]
272      else:
273        x = x[:, ::-1, ...]
274    else:
275      # 'RGB'->'BGR'
276      x = x[..., ::-1]
277    mean = [103.939, 116.779, 123.68]
278    std = None
279
280  mean_tensor = backend.constant(-np.array(mean))
281
282  # Zero-center by mean pixel
283  if backend.dtype(x) != backend.dtype(mean_tensor):
284    x = backend.bias_add(
285        x, backend.cast(mean_tensor, backend.dtype(x)), data_format=data_format)
286  else:
287    x = backend.bias_add(x, mean_tensor, data_format)
288  if std is not None:
289    std_tensor = backend.constant(np.array(std))
290    if data_format == 'channels_first':
291      std_tensor = backend.reshape(std_tensor, (-1, 1, 1))
292    x /= std_tensor
293  return x
294
295
296def obtain_input_shape(input_shape,
297                       default_size,
298                       min_size,
299                       data_format,
300                       require_flatten,
301                       weights=None):
302  """Internal utility to compute/validate a model's input shape.
303
304  Args:
305    input_shape: Either None (will return the default network input shape),
306      or a user-provided shape to be validated.
307    default_size: Default input width/height for the model.
308    min_size: Minimum input width/height accepted by the model.
309    data_format: Image data format to use.
310    require_flatten: Whether the model is expected to
311      be linked to a classifier via a Flatten layer.
312    weights: One of `None` (random initialization)
313      or 'imagenet' (pre-training on ImageNet).
314      If weights='imagenet' input channels must be equal to 3.
315
316  Returns:
317    An integer shape tuple (may include None entries).
318
319  Raises:
320    ValueError: In case of invalid argument values.
321  """
322  if weights != 'imagenet' and input_shape and len(input_shape) == 3:
323    if data_format == 'channels_first':
324      if input_shape[0] not in {1, 3}:
325        warnings.warn('This model usually expects 1 or 3 input channels. '
326                      'However, it was passed an input_shape with ' +
327                      str(input_shape[0]) + ' input channels.')
328      default_shape = (input_shape[0], default_size, default_size)
329    else:
330      if input_shape[-1] not in {1, 3}:
331        warnings.warn('This model usually expects 1 or 3 input channels. '
332                      'However, it was passed an input_shape with ' +
333                      str(input_shape[-1]) + ' input channels.')
334      default_shape = (default_size, default_size, input_shape[-1])
335  else:
336    if data_format == 'channels_first':
337      default_shape = (3, default_size, default_size)
338    else:
339      default_shape = (default_size, default_size, 3)
340  if weights == 'imagenet' and require_flatten:
341    if input_shape is not None:
342      if input_shape != default_shape:
343        raise ValueError('When setting `include_top=True` '
344                         'and loading `imagenet` weights, '
345                         '`input_shape` should be ' + str(default_shape) + '.')
346    return default_shape
347  if input_shape:
348    if data_format == 'channels_first':
349      if input_shape is not None:
350        if len(input_shape) != 3:
351          raise ValueError('`input_shape` must be a tuple of three integers.')
352        if input_shape[0] != 3 and weights == 'imagenet':
353          raise ValueError('The input must have 3 channels; got '
354                           '`input_shape=' + str(input_shape) + '`')
355        if ((input_shape[1] is not None and input_shape[1] < min_size) or
356            (input_shape[2] is not None and input_shape[2] < min_size)):
357          raise ValueError('Input size must be at least ' + str(min_size) +
358                           'x' + str(min_size) + '; got `input_shape=' +
359                           str(input_shape) + '`')
360    else:
361      if input_shape is not None:
362        if len(input_shape) != 3:
363          raise ValueError('`input_shape` must be a tuple of three integers.')
364        if input_shape[-1] != 3 and weights == 'imagenet':
365          raise ValueError('The input must have 3 channels; got '
366                           '`input_shape=' + str(input_shape) + '`')
367        if ((input_shape[0] is not None and input_shape[0] < min_size) or
368            (input_shape[1] is not None and input_shape[1] < min_size)):
369          raise ValueError('Input size must be at least ' + str(min_size) +
370                           'x' + str(min_size) + '; got `input_shape=' +
371                           str(input_shape) + '`')
372  else:
373    if require_flatten:
374      input_shape = default_shape
375    else:
376      if data_format == 'channels_first':
377        input_shape = (3, None, None)
378      else:
379        input_shape = (None, None, 3)
380  if require_flatten:
381    if None in input_shape:
382      raise ValueError('If `include_top` is True, '
383                       'you should specify a static `input_shape`. '
384                       'Got `input_shape=' + str(input_shape) + '`')
385  return input_shape
386
387
388def correct_pad(inputs, kernel_size):
389  """Returns a tuple for zero-padding for 2D convolution with downsampling.
390
391  Args:
392    inputs: Input tensor.
393    kernel_size: An integer or tuple/list of 2 integers.
394
395  Returns:
396    A tuple.
397  """
398  img_dim = 2 if backend.image_data_format() == 'channels_first' else 1
399  input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)]
400  if isinstance(kernel_size, int):
401    kernel_size = (kernel_size, kernel_size)
402  if input_size[0] is None:
403    adjust = (1, 1)
404  else:
405    adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
406  correct = (kernel_size[0] // 2, kernel_size[1] // 2)
407  return ((correct[0] - adjust[0], correct[0]),
408          (correct[1] - adjust[1], correct[1]))
409
410
411def validate_activation(classifier_activation, weights):
412  """validates that the classifer_activation is compatible with the weights.
413
414  Args:
415    classifier_activation: str or callable activation function
416    weights: The pretrained weights to load.
417
418  Raises:
419    ValueError: if an activation other than `None` or `softmax` are used with
420      pretrained weights.
421  """
422  if weights is None:
423    return
424
425  classifier_activation = activations.get(classifier_activation)
426  if classifier_activation not in {
427      activations.get('softmax'),
428      activations.get(None)
429  }:
430    raise ValueError('Only `None` and `softmax` activations are allowed '
431                     'for the `classifier_activation` argument when using '
432                     'pretrained weights, with `include_top=True`')
433