• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15# pylint: disable=invalid-name
16"""MobileNet v2 models for Keras.
17
18MobileNetV2 is a general architecture and can be used for multiple use cases.
19Depending on the use case, it can use different input layer size and
20different width factors. This allows different width models to reduce
21the number of multiply-adds and thereby
22reduce inference cost on mobile devices.
23
24MobileNetV2 is very similar to the original MobileNet,
25except that it uses inverted residual blocks with
26bottlenecking features. It has a drastically lower
27parameter count than the original MobileNet.
28MobileNets support any input size greater
29than 32 x 32, with larger image sizes
30offering better performance.
31
32The number of parameters and number of multiply-adds
33can be modified by using the `alpha` parameter,
34which increases/decreases the number of filters in each layer.
35By altering the image size and `alpha` parameter,
36all 22 models from the paper can be built, with ImageNet weights provided.
37
38The paper demonstrates the performance of MobileNets using `alpha` values of
391.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4
40For each of these `alpha` values, weights for 5 different input image sizes
41are provided (224, 192, 160, 128, and 96).
42
43The following table describes the performance of
44MobileNet on various input sizes:
45------------------------------------------------------------------------
46MACs stands for Multiply Adds
47 Classification Checkpoint|MACs (M)|Parameters (M)|Top 1 Accuracy|Top 5 Accuracy
48--------------------------|------------|---------------|---------|----|---------
49| [mobilenet_v2_1.4_224]  | 582 | 6.06 |          75.0 | 92.5 |
50| [mobilenet_v2_1.3_224]  | 509 | 5.34 |          74.4 | 92.1 |
51| [mobilenet_v2_1.0_224]  | 300 | 3.47 |          71.8 | 91.0 |
52| [mobilenet_v2_1.0_192]  | 221 | 3.47 |          70.7 | 90.1 |
53| [mobilenet_v2_1.0_160]  | 154 | 3.47 |          68.8 | 89.0 |
54| [mobilenet_v2_1.0_128]  | 99  | 3.47 |          65.3 | 86.9 |
55| [mobilenet_v2_1.0_96]   | 56  | 3.47 |          60.3 | 83.2 |
56| [mobilenet_v2_0.75_224] | 209 | 2.61 |          69.8 | 89.6 |
57| [mobilenet_v2_0.75_192] | 153 | 2.61 |          68.7 | 88.9 |
58| [mobilenet_v2_0.75_160] | 107 | 2.61 |          66.4 | 87.3 |
59| [mobilenet_v2_0.75_128] | 69  | 2.61 |          63.2 | 85.3 |
60| [mobilenet_v2_0.75_96]  | 39  | 2.61 |          58.8 | 81.6 |
61| [mobilenet_v2_0.5_224]  | 97  | 1.95 |          65.4 | 86.4 |
62| [mobilenet_v2_0.5_192]  | 71  | 1.95 |          63.9 | 85.4 |
63| [mobilenet_v2_0.5_160]  | 50  | 1.95 |          61.0 | 83.2 |
64| [mobilenet_v2_0.5_128]  | 32  | 1.95 |          57.7 | 80.8 |
65| [mobilenet_v2_0.5_96]   | 18  | 1.95 |          51.2 | 75.8 |
66| [mobilenet_v2_0.35_224] | 59  | 1.66 |          60.3 | 82.9 |
67| [mobilenet_v2_0.35_192] | 43  | 1.66 |          58.2 | 81.2 |
68| [mobilenet_v2_0.35_160] | 30  | 1.66 |          55.7 | 79.1 |
69| [mobilenet_v2_0.35_128] | 20  | 1.66 |          50.8 | 75.0 |
70| [mobilenet_v2_0.35_96]  | 11  | 1.66 |          45.5 | 70.4 |
71
72"""
73from __future__ import absolute_import
74from __future__ import division
75from __future__ import print_function
76
77import os
78
79from tensorflow.python.keras import backend
80from tensorflow.python.keras import layers
81from tensorflow.python.keras.applications import imagenet_utils
82from tensorflow.python.keras.engine import training
83from tensorflow.python.keras.utils import data_utils
84from tensorflow.python.keras.utils import layer_utils
85from tensorflow.python.platform import tf_logging as logging
86from tensorflow.python.util.tf_export import keras_export
87
88
89BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/'
90                    'keras-applications/mobilenet_v2/')
91
92
93@keras_export('keras.applications.mobilenet_v2.MobileNetV2',
94              'keras.applications.MobileNetV2')
95def MobileNetV2(input_shape=None,
96                alpha=1.0,
97                include_top=True,
98                weights='imagenet',
99                input_tensor=None,
100                pooling=None,
101                classes=1000,
102                **kwargs):
103  """Instantiates the MobileNetV2 architecture.
104
105  Reference paper:
106  - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]
107  (https://arxiv.org/abs/1801.04381) (CVPR 2018)
108
109  Optionally loads weights pre-trained on ImageNet.
110
111  Arguments:
112    input_shape: Optional shape tuple, to be specified if you would
113      like to use a model with an input image resolution that is not
114      (224, 224, 3).
115      It should have exactly 3 inputs channels (224, 224, 3).
116      You can also omit this option if you would like
117      to infer input_shape from an input_tensor.
118      If you choose to include both input_tensor and input_shape then
119      input_shape will be used if they match, if the shapes
120      do not match then we will throw an error.
121      E.g. `(160, 160, 3)` would be one valid value.
122    alpha: Float between 0 and 1. controls the width of the network.
123      This is known as the width multiplier in the MobileNetV2 paper,
124      but the name is kept for consistency with `applications.MobileNetV1`
125      model in Keras.
126      - If `alpha` < 1.0, proportionally decreases the number
127          of filters in each layer.
128      - If `alpha` > 1.0, proportionally increases the number
129          of filters in each layer.
130      - If `alpha` = 1, default number of filters from the paper
131          are used at each layer.
132    include_top: Boolean, whether to include the fully-connected
133      layer at the top of the network. Defaults to `True`.
134    weights: String, one of `None` (random initialization),
135      'imagenet' (pre-training on ImageNet),
136      or the path to the weights file to be loaded.
137    input_tensor: Optional Keras tensor (i.e. output of
138      `layers.Input()`)
139      to use as image input for the model.
140    pooling: String, optional pooling mode for feature extraction
141      when `include_top` is `False`.
142      - `None` means that the output of the model
143          will be the 4D tensor output of the
144          last convolutional block.
145      - `avg` means that global average pooling
146          will be applied to the output of the
147          last convolutional block, and thus
148          the output of the model will be a
149          2D tensor.
150      - `max` means that global max pooling will
151          be applied.
152    classes: Integer, optional number of classes to classify images
153      into, only to be specified if `include_top` is True, and
154      if no `weights` argument is specified.
155    **kwargs: For backwards compatibility only.
156
157  Returns:
158    A `keras.Model` instance.
159
160  Raises:
161    ValueError: in case of invalid argument for `weights`,
162      or invalid input shape or invalid alpha, rows when
163      weights='imagenet'
164  """
165  if 'layers' in kwargs:
166    global layers
167    layers = kwargs.pop('layers')
168  if kwargs:
169    raise ValueError('Unknown argument(s): %s' % (kwargs,))
170  if not (weights in {'imagenet', None} or os.path.exists(weights)):
171    raise ValueError('The `weights` argument should be either '
172                     '`None` (random initialization), `imagenet` '
173                     '(pre-training on ImageNet), '
174                     'or the path to the weights file to be loaded.')
175
176  if weights == 'imagenet' and include_top and classes != 1000:
177    raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
178                     'as true, `classes` should be 1000')
179
180  # Determine proper input shape and default size.
181  # If both input_shape and input_tensor are used, they should match
182  if input_shape is not None and input_tensor is not None:
183    try:
184      is_input_t_tensor = backend.is_keras_tensor(input_tensor)
185    except ValueError:
186      try:
187        is_input_t_tensor = backend.is_keras_tensor(
188            layer_utils.get_source_inputs(input_tensor))
189      except ValueError:
190        raise ValueError('input_tensor: ', input_tensor,
191                         'is not type input_tensor')
192    if is_input_t_tensor:
193      if backend.image_data_format == 'channels_first':
194        if backend.int_shape(input_tensor)[1] != input_shape[1]:
195          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
196                           input_tensor,
197                           'do not meet the same shape requirements')
198      else:
199        if backend.int_shape(input_tensor)[2] != input_shape[1]:
200          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
201                           input_tensor,
202                           'do not meet the same shape requirements')
203    else:
204      raise ValueError('input_tensor specified: ', input_tensor,
205                       'is not a keras tensor')
206
207  # If input_shape is None, infer shape from input_tensor
208  if input_shape is None and input_tensor is not None:
209
210    try:
211      backend.is_keras_tensor(input_tensor)
212    except ValueError:
213      raise ValueError('input_tensor: ', input_tensor, 'is type: ',
214                       type(input_tensor), 'which is not a valid type')
215
216    if input_shape is None and not backend.is_keras_tensor(input_tensor):
217      default_size = 224
218    elif input_shape is None and backend.is_keras_tensor(input_tensor):
219      if backend.image_data_format() == 'channels_first':
220        rows = backend.int_shape(input_tensor)[2]
221        cols = backend.int_shape(input_tensor)[3]
222      else:
223        rows = backend.int_shape(input_tensor)[1]
224        cols = backend.int_shape(input_tensor)[2]
225
226      if rows == cols and rows in [96, 128, 160, 192, 224]:
227        default_size = rows
228      else:
229        default_size = 224
230
231  # If input_shape is None and no input_tensor
232  elif input_shape is None:
233    default_size = 224
234
235  # If input_shape is not None, assume default size
236  else:
237    if backend.image_data_format() == 'channels_first':
238      rows = input_shape[1]
239      cols = input_shape[2]
240    else:
241      rows = input_shape[0]
242      cols = input_shape[1]
243
244    if rows == cols and rows in [96, 128, 160, 192, 224]:
245      default_size = rows
246    else:
247      default_size = 224
248
249  input_shape = imagenet_utils.obtain_input_shape(
250      input_shape,
251      default_size=default_size,
252      min_size=32,
253      data_format=backend.image_data_format(),
254      require_flatten=include_top,
255      weights=weights)
256
257  if backend.image_data_format() == 'channels_last':
258    row_axis, col_axis = (0, 1)
259  else:
260    row_axis, col_axis = (1, 2)
261  rows = input_shape[row_axis]
262  cols = input_shape[col_axis]
263
264  if weights == 'imagenet':
265    if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
266      raise ValueError('If imagenet weights are being loaded, '
267                       'alpha can be one of `0.35`, `0.50`, `0.75`, '
268                       '`1.0`, `1.3` or `1.4` only.')
269
270    if rows != cols or rows not in [96, 128, 160, 192, 224]:
271      rows = 224
272      logging.warning('`input_shape` is undefined or non-square, '
273                      'or `rows` is not in [96, 128, 160, 192, 224].'
274                      ' Weights for input shape (224, 224) will be'
275                      ' loaded as the default.')
276
277  if input_tensor is None:
278    img_input = layers.Input(shape=input_shape)
279  else:
280    if not backend.is_keras_tensor(input_tensor):
281      img_input = layers.Input(tensor=input_tensor, shape=input_shape)
282    else:
283      img_input = input_tensor
284
285  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
286
287  first_block_filters = _make_divisible(32 * alpha, 8)
288  x = layers.ZeroPadding2D(
289      padding=imagenet_utils.correct_pad(img_input, 3),
290      name='Conv1_pad')(img_input)
291  x = layers.Conv2D(
292      first_block_filters,
293      kernel_size=3,
294      strides=(2, 2),
295      padding='valid',
296      use_bias=False,
297      name='Conv1')(
298          x)
299  x = layers.BatchNormalization(
300      axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')(
301          x)
302  x = layers.ReLU(6., name='Conv1_relu')(x)
303
304  x = _inverted_res_block(
305      x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0)
306
307  x = _inverted_res_block(
308      x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1)
309  x = _inverted_res_block(
310      x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2)
311
312  x = _inverted_res_block(
313      x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3)
314  x = _inverted_res_block(
315      x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4)
316  x = _inverted_res_block(
317      x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5)
318
319  x = _inverted_res_block(
320      x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6)
321  x = _inverted_res_block(
322      x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7)
323  x = _inverted_res_block(
324      x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8)
325  x = _inverted_res_block(
326      x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9)
327
328  x = _inverted_res_block(
329      x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10)
330  x = _inverted_res_block(
331      x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11)
332  x = _inverted_res_block(
333      x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12)
334
335  x = _inverted_res_block(
336      x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13)
337  x = _inverted_res_block(
338      x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14)
339  x = _inverted_res_block(
340      x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15)
341
342  x = _inverted_res_block(
343      x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16)
344
345  # no alpha applied to last conv as stated in the paper:
346  # if the width multiplier is greater than 1 we
347  # increase the number of output channels
348  if alpha > 1.0:
349    last_block_filters = _make_divisible(1280 * alpha, 8)
350  else:
351    last_block_filters = 1280
352
353  x = layers.Conv2D(
354      last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(
355          x)
356  x = layers.BatchNormalization(
357      axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(
358          x)
359  x = layers.ReLU(6., name='out_relu')(x)
360
361  if include_top:
362    x = layers.GlobalAveragePooling2D()(x)
363    x = layers.Dense(
364        classes, activation='softmax', use_bias=True, name='Logits')(
365            x)
366  else:
367    if pooling == 'avg':
368      x = layers.GlobalAveragePooling2D()(x)
369    elif pooling == 'max':
370      x = layers.GlobalMaxPooling2D()(x)
371
372  # Ensure that the model takes into account
373  # any potential predecessors of `input_tensor`.
374  if input_tensor is not None:
375    inputs = layer_utils.get_source_inputs(input_tensor)
376  else:
377    inputs = img_input
378
379  # Create model.
380  model = training.Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows))
381
382  # Load weights.
383  if weights == 'imagenet':
384    if include_top:
385      model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
386                    str(alpha) + '_' + str(rows) + '.h5')
387      weight_path = BASE_WEIGHT_PATH + model_name
388      weights_path = data_utils.get_file(
389          model_name, weight_path, cache_subdir='models')
390    else:
391      model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
392                    str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
393      weight_path = BASE_WEIGHT_PATH + model_name
394      weights_path = data_utils.get_file(
395          model_name, weight_path, cache_subdir='models')
396    model.load_weights(weights_path)
397  elif weights is not None:
398    model.load_weights(weights)
399
400  return model
401
402
403def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
404  """Inverted ResNet block."""
405  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
406
407  in_channels = backend.int_shape(inputs)[channel_axis]
408  pointwise_conv_filters = int(filters * alpha)
409  pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
410  x = inputs
411  prefix = 'block_{}_'.format(block_id)
412
413  if block_id:
414    # Expand
415    x = layers.Conv2D(
416        expansion * in_channels,
417        kernel_size=1,
418        padding='same',
419        use_bias=False,
420        activation=None,
421        name=prefix + 'expand')(
422            x)
423    x = layers.BatchNormalization(
424        axis=channel_axis,
425        epsilon=1e-3,
426        momentum=0.999,
427        name=prefix + 'expand_BN')(
428            x)
429    x = layers.ReLU(6., name=prefix + 'expand_relu')(x)
430  else:
431    prefix = 'expanded_conv_'
432
433  # Depthwise
434  if stride == 2:
435    x = layers.ZeroPadding2D(
436        padding=imagenet_utils.correct_pad(x, 3),
437        name=prefix + 'pad')(x)
438  x = layers.DepthwiseConv2D(
439      kernel_size=3,
440      strides=stride,
441      activation=None,
442      use_bias=False,
443      padding='same' if stride == 1 else 'valid',
444      name=prefix + 'depthwise')(
445          x)
446  x = layers.BatchNormalization(
447      axis=channel_axis,
448      epsilon=1e-3,
449      momentum=0.999,
450      name=prefix + 'depthwise_BN')(
451          x)
452
453  x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x)
454
455  # Project
456  x = layers.Conv2D(
457      pointwise_filters,
458      kernel_size=1,
459      padding='same',
460      use_bias=False,
461      activation=None,
462      name=prefix + 'project')(
463          x)
464  x = layers.BatchNormalization(
465      axis=channel_axis,
466      epsilon=1e-3,
467      momentum=0.999,
468      name=prefix + 'project_BN')(
469          x)
470
471  if in_channels == pointwise_filters and stride == 1:
472    return layers.Add(name=prefix + 'add')([inputs, x])
473  return x
474
475
476def _make_divisible(v, divisor, min_value=None):
477  if min_value is None:
478    min_value = divisor
479  new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
480  # Make sure that round down does not go down by more than 10%.
481  if new_v < 0.9 * v:
482    new_v += divisor
483  return new_v
484
485
486@keras_export('keras.applications.mobilenet_v2.preprocess_input')
487def preprocess_input(x, data_format=None):
488  """Preprocesses the input (encoding a batch of images) for the model."""
489  return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf')
490
491
492@keras_export('keras.applications.mobilenet_v2.decode_predictions')
493def decode_predictions(preds, top=5):
494  """Decodes the prediction result from the model."""
495  return imagenet_utils.decode_predictions(preds, top=top)
496