1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Utilities for ImageNet data preprocessing & prediction decoding.""" 16 17import json 18import warnings 19 20import numpy as np 21 22from tensorflow.python.keras import activations 23from tensorflow.python.keras import backend 24from tensorflow.python.keras.utils import data_utils 25from tensorflow.python.util.tf_export import keras_export 26 27 28CLASS_INDEX = None 29CLASS_INDEX_PATH = ('https://storage.googleapis.com/download.tensorflow.org/' 30 'data/imagenet_class_index.json') 31 32 33PREPROCESS_INPUT_DOC = """ 34 Preprocesses a tensor or Numpy array encoding a batch of images. 35 36 Usage example with `applications.MobileNet`: 37 38 ```python 39 i = tf.keras.layers.Input([None, None, 3], dtype = tf.uint8) 40 x = tf.cast(i, tf.float32) 41 x = tf.keras.applications.mobilenet.preprocess_input(x) 42 core = tf.keras.applications.MobileNet() 43 x = core(x) 44 model = tf.keras.Model(inputs=[i], outputs=[x]) 45 46 image = tf.image.decode_png(tf.io.read_file('file.png')) 47 result = model(image) 48 ``` 49 50 Args: 51 x: A floating point `numpy.array` or a `tf.Tensor`, 3D or 4D with 3 color 52 channels, with values in the range [0, 255]. 53 The preprocessed data are written over the input data 54 if the data types are compatible. To avoid this 55 behaviour, `numpy.copy(x)` can be used. 56 data_format: Optional data format of the image tensor/array. Defaults to 57 None, in which case the global setting 58 `tf.keras.backend.image_data_format()` is used (unless you changed it, 59 it defaults to "channels_last").{mode} 60 61 Returns: 62 Preprocessed `numpy.array` or a `tf.Tensor` with type `float32`. 63 {ret} 64 65 Raises: 66 {error} 67 """ 68 69PREPROCESS_INPUT_MODE_DOC = """ 70 mode: One of "caffe", "tf" or "torch". Defaults to "caffe". 71 - caffe: will convert the images from RGB to BGR, 72 then will zero-center each color channel with 73 respect to the ImageNet dataset, 74 without scaling. 75 - tf: will scale pixels between -1 and 1, 76 sample-wise. 77 - torch: will scale pixels between 0 and 1 and then 78 will normalize each channel with respect to the 79 ImageNet dataset. 80 """ 81 82PREPROCESS_INPUT_DEFAULT_ERROR_DOC = """ 83 ValueError: In case of unknown `mode` or `data_format` argument.""" 84 85PREPROCESS_INPUT_ERROR_DOC = """ 86 ValueError: In case of unknown `data_format` argument.""" 87 88PREPROCESS_INPUT_RET_DOC_TF = """ 89 The inputs pixel values are scaled between -1 and 1, sample-wise.""" 90 91PREPROCESS_INPUT_RET_DOC_TORCH = """ 92 The input pixels values are scaled between 0 and 1 and each channel is 93 normalized with respect to the ImageNet dataset.""" 94 95PREPROCESS_INPUT_RET_DOC_CAFFE = """ 96 The images are converted from RGB to BGR, then each color channel is 97 zero-centered with respect to the ImageNet dataset, without scaling.""" 98 99 100@keras_export('keras.applications.imagenet_utils.preprocess_input') 101def preprocess_input(x, data_format=None, mode='caffe'): 102 """Preprocesses a tensor or Numpy array encoding a batch of images.""" 103 if mode not in {'caffe', 'tf', 'torch'}: 104 raise ValueError('Unknown mode ' + str(mode)) 105 106 if data_format is None: 107 data_format = backend.image_data_format() 108 elif data_format not in {'channels_first', 'channels_last'}: 109 raise ValueError('Unknown data_format ' + str(data_format)) 110 111 if isinstance(x, np.ndarray): 112 return _preprocess_numpy_input( 113 x, data_format=data_format, mode=mode) 114 else: 115 return _preprocess_symbolic_input( 116 x, data_format=data_format, mode=mode) 117 118 119preprocess_input.__doc__ = PREPROCESS_INPUT_DOC.format( 120 mode=PREPROCESS_INPUT_MODE_DOC, 121 ret='', 122 error=PREPROCESS_INPUT_DEFAULT_ERROR_DOC) 123 124 125@keras_export('keras.applications.imagenet_utils.decode_predictions') 126def decode_predictions(preds, top=5): 127 """Decodes the prediction of an ImageNet model. 128 129 Args: 130 preds: Numpy array encoding a batch of predictions. 131 top: Integer, how many top-guesses to return. Defaults to 5. 132 133 Returns: 134 A list of lists of top class prediction tuples 135 `(class_name, class_description, score)`. 136 One list of tuples per sample in batch input. 137 138 Raises: 139 ValueError: In case of invalid shape of the `pred` array 140 (must be 2D). 141 """ 142 global CLASS_INDEX 143 144 if len(preds.shape) != 2 or preds.shape[1] != 1000: 145 raise ValueError('`decode_predictions` expects ' 146 'a batch of predictions ' 147 '(i.e. a 2D array of shape (samples, 1000)). ' 148 'Found array with shape: ' + str(preds.shape)) 149 if CLASS_INDEX is None: 150 fpath = data_utils.get_file( 151 'imagenet_class_index.json', 152 CLASS_INDEX_PATH, 153 cache_subdir='models', 154 file_hash='c2c37ea517e94d9795004a39431a14cb') 155 with open(fpath) as f: 156 CLASS_INDEX = json.load(f) 157 results = [] 158 for pred in preds: 159 top_indices = pred.argsort()[-top:][::-1] 160 result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] 161 result.sort(key=lambda x: x[2], reverse=True) 162 results.append(result) 163 return results 164 165 166def _preprocess_numpy_input(x, data_format, mode): 167 """Preprocesses a Numpy array encoding a batch of images. 168 169 Args: 170 x: Input array, 3D or 4D. 171 data_format: Data format of the image array. 172 mode: One of "caffe", "tf" or "torch". 173 - caffe: will convert the images from RGB to BGR, 174 then will zero-center each color channel with 175 respect to the ImageNet dataset, 176 without scaling. 177 - tf: will scale pixels between -1 and 1, 178 sample-wise. 179 - torch: will scale pixels between 0 and 1 and then 180 will normalize each channel with respect to the 181 ImageNet dataset. 182 183 Returns: 184 Preprocessed Numpy array. 185 """ 186 if not issubclass(x.dtype.type, np.floating): 187 x = x.astype(backend.floatx(), copy=False) 188 189 if mode == 'tf': 190 x /= 127.5 191 x -= 1. 192 return x 193 elif mode == 'torch': 194 x /= 255. 195 mean = [0.485, 0.456, 0.406] 196 std = [0.229, 0.224, 0.225] 197 else: 198 if data_format == 'channels_first': 199 # 'RGB'->'BGR' 200 if x.ndim == 3: 201 x = x[::-1, ...] 202 else: 203 x = x[:, ::-1, ...] 204 else: 205 # 'RGB'->'BGR' 206 x = x[..., ::-1] 207 mean = [103.939, 116.779, 123.68] 208 std = None 209 210 # Zero-center by mean pixel 211 if data_format == 'channels_first': 212 if x.ndim == 3: 213 x[0, :, :] -= mean[0] 214 x[1, :, :] -= mean[1] 215 x[2, :, :] -= mean[2] 216 if std is not None: 217 x[0, :, :] /= std[0] 218 x[1, :, :] /= std[1] 219 x[2, :, :] /= std[2] 220 else: 221 x[:, 0, :, :] -= mean[0] 222 x[:, 1, :, :] -= mean[1] 223 x[:, 2, :, :] -= mean[2] 224 if std is not None: 225 x[:, 0, :, :] /= std[0] 226 x[:, 1, :, :] /= std[1] 227 x[:, 2, :, :] /= std[2] 228 else: 229 x[..., 0] -= mean[0] 230 x[..., 1] -= mean[1] 231 x[..., 2] -= mean[2] 232 if std is not None: 233 x[..., 0] /= std[0] 234 x[..., 1] /= std[1] 235 x[..., 2] /= std[2] 236 return x 237 238 239def _preprocess_symbolic_input(x, data_format, mode): 240 """Preprocesses a tensor encoding a batch of images. 241 242 Args: 243 x: Input tensor, 3D or 4D. 244 data_format: Data format of the image tensor. 245 mode: One of "caffe", "tf" or "torch". 246 - caffe: will convert the images from RGB to BGR, 247 then will zero-center each color channel with 248 respect to the ImageNet dataset, 249 without scaling. 250 - tf: will scale pixels between -1 and 1, 251 sample-wise. 252 - torch: will scale pixels between 0 and 1 and then 253 will normalize each channel with respect to the 254 ImageNet dataset. 255 256 Returns: 257 Preprocessed tensor. 258 """ 259 if mode == 'tf': 260 x /= 127.5 261 x -= 1. 262 return x 263 elif mode == 'torch': 264 x /= 255. 265 mean = [0.485, 0.456, 0.406] 266 std = [0.229, 0.224, 0.225] 267 else: 268 if data_format == 'channels_first': 269 # 'RGB'->'BGR' 270 if backend.ndim(x) == 3: 271 x = x[::-1, ...] 272 else: 273 x = x[:, ::-1, ...] 274 else: 275 # 'RGB'->'BGR' 276 x = x[..., ::-1] 277 mean = [103.939, 116.779, 123.68] 278 std = None 279 280 mean_tensor = backend.constant(-np.array(mean)) 281 282 # Zero-center by mean pixel 283 if backend.dtype(x) != backend.dtype(mean_tensor): 284 x = backend.bias_add( 285 x, backend.cast(mean_tensor, backend.dtype(x)), data_format=data_format) 286 else: 287 x = backend.bias_add(x, mean_tensor, data_format) 288 if std is not None: 289 std_tensor = backend.constant(np.array(std)) 290 if data_format == 'channels_first': 291 std_tensor = backend.reshape(std_tensor, (-1, 1, 1)) 292 x /= std_tensor 293 return x 294 295 296def obtain_input_shape(input_shape, 297 default_size, 298 min_size, 299 data_format, 300 require_flatten, 301 weights=None): 302 """Internal utility to compute/validate a model's input shape. 303 304 Args: 305 input_shape: Either None (will return the default network input shape), 306 or a user-provided shape to be validated. 307 default_size: Default input width/height for the model. 308 min_size: Minimum input width/height accepted by the model. 309 data_format: Image data format to use. 310 require_flatten: Whether the model is expected to 311 be linked to a classifier via a Flatten layer. 312 weights: One of `None` (random initialization) 313 or 'imagenet' (pre-training on ImageNet). 314 If weights='imagenet' input channels must be equal to 3. 315 316 Returns: 317 An integer shape tuple (may include None entries). 318 319 Raises: 320 ValueError: In case of invalid argument values. 321 """ 322 if weights != 'imagenet' and input_shape and len(input_shape) == 3: 323 if data_format == 'channels_first': 324 if input_shape[0] not in {1, 3}: 325 warnings.warn('This model usually expects 1 or 3 input channels. ' 326 'However, it was passed an input_shape with ' + 327 str(input_shape[0]) + ' input channels.') 328 default_shape = (input_shape[0], default_size, default_size) 329 else: 330 if input_shape[-1] not in {1, 3}: 331 warnings.warn('This model usually expects 1 or 3 input channels. ' 332 'However, it was passed an input_shape with ' + 333 str(input_shape[-1]) + ' input channels.') 334 default_shape = (default_size, default_size, input_shape[-1]) 335 else: 336 if data_format == 'channels_first': 337 default_shape = (3, default_size, default_size) 338 else: 339 default_shape = (default_size, default_size, 3) 340 if weights == 'imagenet' and require_flatten: 341 if input_shape is not None: 342 if input_shape != default_shape: 343 raise ValueError('When setting `include_top=True` ' 344 'and loading `imagenet` weights, ' 345 '`input_shape` should be ' + str(default_shape) + '.') 346 return default_shape 347 if input_shape: 348 if data_format == 'channels_first': 349 if input_shape is not None: 350 if len(input_shape) != 3: 351 raise ValueError('`input_shape` must be a tuple of three integers.') 352 if input_shape[0] != 3 and weights == 'imagenet': 353 raise ValueError('The input must have 3 channels; got ' 354 '`input_shape=' + str(input_shape) + '`') 355 if ((input_shape[1] is not None and input_shape[1] < min_size) or 356 (input_shape[2] is not None and input_shape[2] < min_size)): 357 raise ValueError('Input size must be at least ' + str(min_size) + 358 'x' + str(min_size) + '; got `input_shape=' + 359 str(input_shape) + '`') 360 else: 361 if input_shape is not None: 362 if len(input_shape) != 3: 363 raise ValueError('`input_shape` must be a tuple of three integers.') 364 if input_shape[-1] != 3 and weights == 'imagenet': 365 raise ValueError('The input must have 3 channels; got ' 366 '`input_shape=' + str(input_shape) + '`') 367 if ((input_shape[0] is not None and input_shape[0] < min_size) or 368 (input_shape[1] is not None and input_shape[1] < min_size)): 369 raise ValueError('Input size must be at least ' + str(min_size) + 370 'x' + str(min_size) + '; got `input_shape=' + 371 str(input_shape) + '`') 372 else: 373 if require_flatten: 374 input_shape = default_shape 375 else: 376 if data_format == 'channels_first': 377 input_shape = (3, None, None) 378 else: 379 input_shape = (None, None, 3) 380 if require_flatten: 381 if None in input_shape: 382 raise ValueError('If `include_top` is True, ' 383 'you should specify a static `input_shape`. ' 384 'Got `input_shape=' + str(input_shape) + '`') 385 return input_shape 386 387 388def correct_pad(inputs, kernel_size): 389 """Returns a tuple for zero-padding for 2D convolution with downsampling. 390 391 Args: 392 inputs: Input tensor. 393 kernel_size: An integer or tuple/list of 2 integers. 394 395 Returns: 396 A tuple. 397 """ 398 img_dim = 2 if backend.image_data_format() == 'channels_first' else 1 399 input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)] 400 if isinstance(kernel_size, int): 401 kernel_size = (kernel_size, kernel_size) 402 if input_size[0] is None: 403 adjust = (1, 1) 404 else: 405 adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) 406 correct = (kernel_size[0] // 2, kernel_size[1] // 2) 407 return ((correct[0] - adjust[0], correct[0]), 408 (correct[1] - adjust[1], correct[1])) 409 410 411def validate_activation(classifier_activation, weights): 412 """validates that the classifer_activation is compatible with the weights. 413 414 Args: 415 classifier_activation: str or callable activation function 416 weights: The pretrained weights to load. 417 418 Raises: 419 ValueError: if an activation other than `None` or `softmax` are used with 420 pretrained weights. 421 """ 422 if weights is None: 423 return 424 425 classifier_activation = activations.get(classifier_activation) 426 if classifier_activation not in { 427 activations.get('softmax'), 428 activations.get(None) 429 }: 430 raise ValueError('Only `None` and `softmax` activations are allowed ' 431 'for the `classifier_activation` argument when using ' 432 'pretrained weights, with `include_top=True`') 433