• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15# pylint: disable=invalid-name
16"""MobileNet v2 models for Keras.
17
18MobileNetV2 is a general architecture and can be used for multiple use cases.
19Depending on the use case, it can use different input layer size and
20different width factors. This allows different width models to reduce
21the number of multiply-adds and thereby
22reduce inference cost on mobile devices.
23
24MobileNetV2 is very similar to the original MobileNet,
25except that it uses inverted residual blocks with
26bottlenecking features. It has a drastically lower
27parameter count than the original MobileNet.
28MobileNets support any input size greater
29than 32 x 32, with larger image sizes
30offering better performance.
31
32The number of parameters and number of multiply-adds
33can be modified by using the `alpha` parameter,
34which increases/decreases the number of filters in each layer.
35By altering the image size and `alpha` parameter,
36all 22 models from the paper can be built, with ImageNet weights provided.
37
38The paper demonstrates the performance of MobileNets using `alpha` values of
391.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4
40For each of these `alpha` values, weights for 5 different input image sizes
41are provided (224, 192, 160, 128, and 96).
42
43The following table describes the performance of
44MobileNet on various input sizes:
45------------------------------------------------------------------------
46MACs stands for Multiply Adds
47 Classification Checkpoint|MACs (M)|Parameters (M)|Top 1 Accuracy|Top 5 Accuracy
48--------------------------|------------|---------------|---------|----|---------
49| [mobilenet_v2_1.4_224]  | 582 | 6.06 |          75.0 | 92.5 |
50| [mobilenet_v2_1.3_224]  | 509 | 5.34 |          74.4 | 92.1 |
51| [mobilenet_v2_1.0_224]  | 300 | 3.47 |          71.8 | 91.0 |
52| [mobilenet_v2_1.0_192]  | 221 | 3.47 |          70.7 | 90.1 |
53| [mobilenet_v2_1.0_160]  | 154 | 3.47 |          68.8 | 89.0 |
54| [mobilenet_v2_1.0_128]  | 99  | 3.47 |          65.3 | 86.9 |
55| [mobilenet_v2_1.0_96]   | 56  | 3.47 |          60.3 | 83.2 |
56| [mobilenet_v2_0.75_224] | 209 | 2.61 |          69.8 | 89.6 |
57| [mobilenet_v2_0.75_192] | 153 | 2.61 |          68.7 | 88.9 |
58| [mobilenet_v2_0.75_160] | 107 | 2.61 |          66.4 | 87.3 |
59| [mobilenet_v2_0.75_128] | 69  | 2.61 |          63.2 | 85.3 |
60| [mobilenet_v2_0.75_96]  | 39  | 2.61 |          58.8 | 81.6 |
61| [mobilenet_v2_0.5_224]  | 97  | 1.95 |          65.4 | 86.4 |
62| [mobilenet_v2_0.5_192]  | 71  | 1.95 |          63.9 | 85.4 |
63| [mobilenet_v2_0.5_160]  | 50  | 1.95 |          61.0 | 83.2 |
64| [mobilenet_v2_0.5_128]  | 32  | 1.95 |          57.7 | 80.8 |
65| [mobilenet_v2_0.5_96]   | 18  | 1.95 |          51.2 | 75.8 |
66| [mobilenet_v2_0.35_224] | 59  | 1.66 |          60.3 | 82.9 |
67| [mobilenet_v2_0.35_192] | 43  | 1.66 |          58.2 | 81.2 |
68| [mobilenet_v2_0.35_160] | 30  | 1.66 |          55.7 | 79.1 |
69| [mobilenet_v2_0.35_128] | 20  | 1.66 |          50.8 | 75.0 |
70| [mobilenet_v2_0.35_96]  | 11  | 1.66 |          45.5 | 70.4 |
71
72  Reference:
73  - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](
74      https://arxiv.org/abs/1801.04381) (CVPR 2018)
75"""
76
77from tensorflow.python.keras import backend
78from tensorflow.python.keras.applications import imagenet_utils
79from tensorflow.python.keras.engine import training
80from tensorflow.python.keras.layers import VersionAwareLayers
81from tensorflow.python.keras.utils import data_utils
82from tensorflow.python.keras.utils import layer_utils
83from tensorflow.python.lib.io import file_io
84from tensorflow.python.platform import tf_logging as logging
85from tensorflow.python.util.tf_export import keras_export
86
87BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/'
88                    'keras-applications/mobilenet_v2/')
89layers = None
90
91
92@keras_export('keras.applications.mobilenet_v2.MobileNetV2',
93              'keras.applications.MobileNetV2')
94def MobileNetV2(input_shape=None,
95                alpha=1.0,
96                include_top=True,
97                weights='imagenet',
98                input_tensor=None,
99                pooling=None,
100                classes=1000,
101                classifier_activation='softmax',
102                **kwargs):
103  """Instantiates the MobileNetV2 architecture.
104
105  MobileNetV2 is very similar to the original MobileNet,
106  except that it uses inverted residual blocks with
107  bottlenecking features. It has a drastically lower
108  parameter count than the original MobileNet.
109  MobileNets support any input size greater
110  than 32 x 32, with larger image sizes
111  offering better performance.
112
113  Reference:
114  - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](
115      https://arxiv.org/abs/1801.04381) (CVPR 2018)
116
117  This function returns a Keras image classification model,
118  optionally loaded with weights pre-trained on ImageNet.
119
120  For image classification use cases, see
121  [this page for detailed examples](
122    https://keras.io/api/applications/#usage-examples-for-image-classification-models).
123
124  For transfer learning use cases, make sure to read the
125  [guide to transfer learning & fine-tuning](
126    https://keras.io/guides/transfer_learning/).
127
128  Note: each Keras Application expects a specific kind of input preprocessing.
129  For MobileNetV2, call `tf.keras.applications.mobilenet_v2.preprocess_input`
130  on your inputs before passing them to the model.
131  `mobilenet_v2.preprocess_input` will scale input pixels between -1 and 1.
132
133  Args:
134    input_shape: Optional shape tuple, to be specified if you would
135      like to use a model with an input image resolution that is not
136      (224, 224, 3).
137      It should have exactly 3 inputs channels (224, 224, 3).
138      You can also omit this option if you would like
139      to infer input_shape from an input_tensor.
140      If you choose to include both input_tensor and input_shape then
141      input_shape will be used if they match, if the shapes
142      do not match then we will throw an error.
143      E.g. `(160, 160, 3)` would be one valid value.
144    alpha: Float between 0 and 1. controls the width of the network.
145      This is known as the width multiplier in the MobileNetV2 paper,
146      but the name is kept for consistency with `applications.MobileNetV1`
147      model in Keras.
148      - If `alpha` < 1.0, proportionally decreases the number
149          of filters in each layer.
150      - If `alpha` > 1.0, proportionally increases the number
151          of filters in each layer.
152      - If `alpha` = 1.0, default number of filters from the paper
153          are used at each layer.
154    include_top: Boolean, whether to include the fully-connected
155      layer at the top of the network. Defaults to `True`.
156    weights: String, one of `None` (random initialization),
157      'imagenet' (pre-training on ImageNet),
158      or the path to the weights file to be loaded.
159    input_tensor: Optional Keras tensor (i.e. output of
160      `layers.Input()`)
161      to use as image input for the model.
162    pooling: String, optional pooling mode for feature extraction
163      when `include_top` is `False`.
164      - `None` means that the output of the model
165          will be the 4D tensor output of the
166          last convolutional block.
167      - `avg` means that global average pooling
168          will be applied to the output of the
169          last convolutional block, and thus
170          the output of the model will be a
171          2D tensor.
172      - `max` means that global max pooling will
173          be applied.
174    classes: Integer, optional number of classes to classify images
175      into, only to be specified if `include_top` is True, and
176      if no `weights` argument is specified.
177    classifier_activation: A `str` or callable. The activation function to use
178      on the "top" layer. Ignored unless `include_top=True`. Set
179      `classifier_activation=None` to return the logits of the "top" layer.
180      When loading pretrained weights, `classifier_activation` can only
181      be `None` or `"softmax"`.
182    **kwargs: For backwards compatibility only.
183
184  Returns:
185    A `keras.Model` instance.
186  """
187  global layers
188  if 'layers' in kwargs:
189    layers = kwargs.pop('layers')
190  else:
191    layers = VersionAwareLayers()
192  if kwargs:
193    raise ValueError('Unknown argument(s): %s' % (kwargs,))
194  if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)):
195    raise ValueError('The `weights` argument should be either '
196                     '`None` (random initialization), `imagenet` '
197                     '(pre-training on ImageNet), '
198                     'or the path to the weights file to be loaded.')
199
200  if weights == 'imagenet' and include_top and classes != 1000:
201    raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
202                     'as true, `classes` should be 1000')
203
204  # Determine proper input shape and default size.
205  # If both input_shape and input_tensor are used, they should match
206  if input_shape is not None and input_tensor is not None:
207    try:
208      is_input_t_tensor = backend.is_keras_tensor(input_tensor)
209    except ValueError:
210      try:
211        is_input_t_tensor = backend.is_keras_tensor(
212            layer_utils.get_source_inputs(input_tensor))
213      except ValueError:
214        raise ValueError('input_tensor: ', input_tensor,
215                         'is not type input_tensor')
216    if is_input_t_tensor:
217      if backend.image_data_format() == 'channels_first':
218        if backend.int_shape(input_tensor)[1] != input_shape[1]:
219          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
220                           input_tensor,
221                           'do not meet the same shape requirements')
222      else:
223        if backend.int_shape(input_tensor)[2] != input_shape[1]:
224          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
225                           input_tensor,
226                           'do not meet the same shape requirements')
227    else:
228      raise ValueError('input_tensor specified: ', input_tensor,
229                       'is not a keras tensor')
230
231  # If input_shape is None, infer shape from input_tensor
232  if input_shape is None and input_tensor is not None:
233
234    try:
235      backend.is_keras_tensor(input_tensor)
236    except ValueError:
237      raise ValueError('input_tensor: ', input_tensor, 'is type: ',
238                       type(input_tensor), 'which is not a valid type')
239
240    if input_shape is None and not backend.is_keras_tensor(input_tensor):
241      default_size = 224
242    elif input_shape is None and backend.is_keras_tensor(input_tensor):
243      if backend.image_data_format() == 'channels_first':
244        rows = backend.int_shape(input_tensor)[2]
245        cols = backend.int_shape(input_tensor)[3]
246      else:
247        rows = backend.int_shape(input_tensor)[1]
248        cols = backend.int_shape(input_tensor)[2]
249
250      if rows == cols and rows in [96, 128, 160, 192, 224]:
251        default_size = rows
252      else:
253        default_size = 224
254
255  # If input_shape is None and no input_tensor
256  elif input_shape is None:
257    default_size = 224
258
259  # If input_shape is not None, assume default size
260  else:
261    if backend.image_data_format() == 'channels_first':
262      rows = input_shape[1]
263      cols = input_shape[2]
264    else:
265      rows = input_shape[0]
266      cols = input_shape[1]
267
268    if rows == cols and rows in [96, 128, 160, 192, 224]:
269      default_size = rows
270    else:
271      default_size = 224
272
273  input_shape = imagenet_utils.obtain_input_shape(
274      input_shape,
275      default_size=default_size,
276      min_size=32,
277      data_format=backend.image_data_format(),
278      require_flatten=include_top,
279      weights=weights)
280
281  if backend.image_data_format() == 'channels_last':
282    row_axis, col_axis = (0, 1)
283  else:
284    row_axis, col_axis = (1, 2)
285  rows = input_shape[row_axis]
286  cols = input_shape[col_axis]
287
288  if weights == 'imagenet':
289    if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
290      raise ValueError('If imagenet weights are being loaded, '
291                       'alpha can be one of `0.35`, `0.50`, `0.75`, '
292                       '`1.0`, `1.3` or `1.4` only.')
293
294    if rows != cols or rows not in [96, 128, 160, 192, 224]:
295      rows = 224
296      logging.warning('`input_shape` is undefined or non-square, '
297                      'or `rows` is not in [96, 128, 160, 192, 224].'
298                      ' Weights for input shape (224, 224) will be'
299                      ' loaded as the default.')
300
301  if input_tensor is None:
302    img_input = layers.Input(shape=input_shape)
303  else:
304    if not backend.is_keras_tensor(input_tensor):
305      img_input = layers.Input(tensor=input_tensor, shape=input_shape)
306    else:
307      img_input = input_tensor
308
309  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
310
311  first_block_filters = _make_divisible(32 * alpha, 8)
312  x = layers.Conv2D(
313      first_block_filters,
314      kernel_size=3,
315      strides=(2, 2),
316      padding='same',
317      use_bias=False,
318      name='Conv1')(img_input)
319  x = layers.BatchNormalization(
320      axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')(
321          x)
322  x = layers.ReLU(6., name='Conv1_relu')(x)
323
324  x = _inverted_res_block(
325      x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0)
326
327  x = _inverted_res_block(
328      x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1)
329  x = _inverted_res_block(
330      x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2)
331
332  x = _inverted_res_block(
333      x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3)
334  x = _inverted_res_block(
335      x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4)
336  x = _inverted_res_block(
337      x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5)
338
339  x = _inverted_res_block(
340      x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6)
341  x = _inverted_res_block(
342      x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7)
343  x = _inverted_res_block(
344      x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8)
345  x = _inverted_res_block(
346      x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9)
347
348  x = _inverted_res_block(
349      x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10)
350  x = _inverted_res_block(
351      x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11)
352  x = _inverted_res_block(
353      x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12)
354
355  x = _inverted_res_block(
356      x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13)
357  x = _inverted_res_block(
358      x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14)
359  x = _inverted_res_block(
360      x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15)
361
362  x = _inverted_res_block(
363      x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16)
364
365  # no alpha applied to last conv as stated in the paper:
366  # if the width multiplier is greater than 1 we
367  # increase the number of output channels
368  if alpha > 1.0:
369    last_block_filters = _make_divisible(1280 * alpha, 8)
370  else:
371    last_block_filters = 1280
372
373  x = layers.Conv2D(
374      last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(
375          x)
376  x = layers.BatchNormalization(
377      axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(
378          x)
379  x = layers.ReLU(6., name='out_relu')(x)
380
381  if include_top:
382    x = layers.GlobalAveragePooling2D()(x)
383    imagenet_utils.validate_activation(classifier_activation, weights)
384    x = layers.Dense(classes, activation=classifier_activation,
385                     name='predictions')(x)
386
387  else:
388    if pooling == 'avg':
389      x = layers.GlobalAveragePooling2D()(x)
390    elif pooling == 'max':
391      x = layers.GlobalMaxPooling2D()(x)
392
393  # Ensure that the model takes into account
394  # any potential predecessors of `input_tensor`.
395  if input_tensor is not None:
396    inputs = layer_utils.get_source_inputs(input_tensor)
397  else:
398    inputs = img_input
399
400  # Create model.
401  model = training.Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows))
402
403  # Load weights.
404  if weights == 'imagenet':
405    if include_top:
406      model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
407                    str(float(alpha)) + '_' + str(rows) + '.h5')
408      weight_path = BASE_WEIGHT_PATH + model_name
409      weights_path = data_utils.get_file(
410          model_name, weight_path, cache_subdir='models')
411    else:
412      model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
413                    str(float(alpha)) + '_' + str(rows) + '_no_top' + '.h5')
414      weight_path = BASE_WEIGHT_PATH + model_name
415      weights_path = data_utils.get_file(
416          model_name, weight_path, cache_subdir='models')
417    model.load_weights(weights_path)
418  elif weights is not None:
419    model.load_weights(weights)
420
421  return model
422
423
424def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
425  """Inverted ResNet block."""
426  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
427
428  in_channels = backend.int_shape(inputs)[channel_axis]
429  pointwise_conv_filters = int(filters * alpha)
430  pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
431  x = inputs
432  prefix = 'block_{}_'.format(block_id)
433
434  if block_id:
435    # Expand
436    x = layers.Conv2D(
437        expansion * in_channels,
438        kernel_size=1,
439        padding='same',
440        use_bias=False,
441        activation=None,
442        name=prefix + 'expand')(
443            x)
444    x = layers.BatchNormalization(
445        axis=channel_axis,
446        epsilon=1e-3,
447        momentum=0.999,
448        name=prefix + 'expand_BN')(
449            x)
450    x = layers.ReLU(6., name=prefix + 'expand_relu')(x)
451  else:
452    prefix = 'expanded_conv_'
453
454  # Depthwise
455  if stride == 2:
456    x = layers.ZeroPadding2D(
457        padding=imagenet_utils.correct_pad(x, 3),
458        name=prefix + 'pad')(x)
459  x = layers.DepthwiseConv2D(
460      kernel_size=3,
461      strides=stride,
462      activation=None,
463      use_bias=False,
464      padding='same' if stride == 1 else 'valid',
465      name=prefix + 'depthwise')(
466          x)
467  x = layers.BatchNormalization(
468      axis=channel_axis,
469      epsilon=1e-3,
470      momentum=0.999,
471      name=prefix + 'depthwise_BN')(
472          x)
473
474  x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x)
475
476  # Project
477  x = layers.Conv2D(
478      pointwise_filters,
479      kernel_size=1,
480      padding='same',
481      use_bias=False,
482      activation=None,
483      name=prefix + 'project')(
484          x)
485  x = layers.BatchNormalization(
486      axis=channel_axis,
487      epsilon=1e-3,
488      momentum=0.999,
489      name=prefix + 'project_BN')(
490          x)
491
492  if in_channels == pointwise_filters and stride == 1:
493    return layers.Add(name=prefix + 'add')([inputs, x])
494  return x
495
496
497def _make_divisible(v, divisor, min_value=None):
498  if min_value is None:
499    min_value = divisor
500  new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
501  # Make sure that round down does not go down by more than 10%.
502  if new_v < 0.9 * v:
503    new_v += divisor
504  return new_v
505
506
507@keras_export('keras.applications.mobilenet_v2.preprocess_input')
508def preprocess_input(x, data_format=None):
509  return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf')
510
511
512@keras_export('keras.applications.mobilenet_v2.decode_predictions')
513def decode_predictions(preds, top=5):
514  return imagenet_utils.decode_predictions(preds, top=top)
515
516
517preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(
518    mode='',
519    ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF,
520    error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC)
521decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__
522