• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15# pylint: disable=invalid-name
16"""MobileNet v1 models for Keras.
17
18MobileNet is a general architecture and can be used for multiple use cases.
19Depending on the use case, it can use different input layer size and
20different width factors. This allows different width models to reduce
21the number of multiply-adds and thereby
22reduce inference cost on mobile devices.
23
24MobileNets support any input size greater than 32 x 32, with larger image sizes
25offering better performance.
26The number of parameters and number of multiply-adds
27can be modified by using the `alpha` parameter,
28which increases/decreases the number of filters in each layer.
29By altering the image size and `alpha` parameter,
30all 16 models from the paper can be built, with ImageNet weights provided.
31
32The paper demonstrates the performance of MobileNets using `alpha` values of
331.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25.
34For each of these `alpha` values, weights for 4 different input image sizes
35are provided (224, 192, 160, 128).
36
37The following table describes the size and accuracy of the 100% MobileNet
38on size 224 x 224:
39----------------------------------------------------------------------------
40Width Multiplier (alpha) | ImageNet Acc |  Multiply-Adds (M) |  Params (M)
41----------------------------------------------------------------------------
42|   1.0 MobileNet-224    |    70.6 %     |        529        |     4.2     |
43|   0.75 MobileNet-224   |    68.4 %     |        325        |     2.6     |
44|   0.50 MobileNet-224   |    63.7 %     |        149        |     1.3     |
45|   0.25 MobileNet-224   |    50.6 %     |        41         |     0.5     |
46----------------------------------------------------------------------------
47
48The following table describes the performance of
49the 100 % MobileNet on various input sizes:
50------------------------------------------------------------------------
51      Resolution      | ImageNet Acc | Multiply-Adds (M) | Params (M)
52------------------------------------------------------------------------
53|  1.0 MobileNet-224  |    70.6 %    |        529        |     4.2     |
54|  1.0 MobileNet-192  |    69.1 %    |        529        |     4.2     |
55|  1.0 MobileNet-160  |    67.2 %    |        529        |     4.2     |
56|  1.0 MobileNet-128  |    64.4 %    |        529        |     4.2     |
57------------------------------------------------------------------------
58
59Reference paper:
60  - [MobileNets: Efficient Convolutional Neural Networks for
61     Mobile Vision Applications](https://arxiv.org/abs/1704.04861)
62"""
63from __future__ import absolute_import
64from __future__ import division
65from __future__ import print_function
66
67import os
68
69from tensorflow.python.keras import backend
70from tensorflow.python.keras import layers
71from tensorflow.python.keras.applications import imagenet_utils
72from tensorflow.python.keras.engine import training
73from tensorflow.python.keras.utils import data_utils
74from tensorflow.python.keras.utils import layer_utils
75from tensorflow.python.platform import tf_logging as logging
76from tensorflow.python.util.tf_export import keras_export
77
78BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/'
79                    'keras-applications/mobilenet/')
80
81
82@keras_export('keras.applications.mobilenet.MobileNet',
83              'keras.applications.MobileNet')
84def MobileNet(input_shape=None,
85              alpha=1.0,
86              depth_multiplier=1,
87              dropout=1e-3,
88              include_top=True,
89              weights='imagenet',
90              input_tensor=None,
91              pooling=None,
92              classes=1000,
93              **kwargs):
94  """Instantiates the MobileNet architecture.
95
96  Reference paper:
97  - [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision
98    Applications](https://arxiv.org/abs/1704.04861)
99
100  Optionally loads weights pre-trained on ImageNet.
101  Note that the data format convention used by the model is
102  the one specified in the `tf.keras.backend.image_data_format()`.
103
104  Arguments:
105    input_shape: Optional shape tuple, only to be specified if `include_top`
106      is False (otherwise the input shape has to be `(224, 224, 3)` (with
107      `channels_last` data format) or (3, 224, 224) (with `channels_first`
108      data format). It should have exactly 3 inputs channels, and width and
109      height should be no smaller than 32. E.g. `(200, 200, 3)` would be one
110      valid value. Default to `None`.
111      `input_shape` will be ignored if the `input_tensor` is provided.
112    alpha: Controls the width of the network. This is known as the width
113      multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally
114      decreases the number of filters in each layer. - If `alpha` > 1.0,
115      proportionally increases the number of filters in each layer. - If
116      `alpha` = 1, default number of filters from the paper are used at each
117      layer. Default to 1.0.
118    depth_multiplier: Depth multiplier for depthwise convolution. This is
119      called the resolution multiplier in the MobileNet paper. Default to 1.0.
120    dropout: Dropout rate. Default to 0.001.
121    include_top: Boolean, whether to include the fully-connected layer at the
122      top of the network. Default to `True`.
123    weights: One of `None` (random initialization), 'imagenet' (pre-training
124      on ImageNet), or the path to the weights file to be loaded. Default to
125      `imagenet`.
126    input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to
127      use as image input for the model. `input_tensor` is useful for sharing
128      inputs between multiple different networks. Default to None.
129    pooling: Optional pooling mode for feature extraction when `include_top`
130      is `False`.
131      - `None` (default) means that the output of the model will be
132          the 4D tensor output of the last convolutional block.
133      - `avg` means that global average pooling
134          will be applied to the output of the
135          last convolutional block, and thus
136          the output of the model will be a 2D tensor.
137      - `max` means that global max pooling will be applied.
138    classes: Optional number of classes to classify images into, only to be
139      specified if `include_top` is True, and if no `weights` argument is
140      specified. Defaults to 1000.
141    **kwargs: For backwards compatibility only.
142
143  Returns:
144    A `tf.keras.Model` instance.
145
146  Raises:
147    ValueError: in case of invalid argument for `weights`,
148      or invalid input shape.
149  """
150  if 'layers' in kwargs:
151    global layers
152    layers = kwargs.pop('layers')
153  if kwargs:
154    raise ValueError('Unknown argument(s): %s' % (kwargs,))
155  if not (weights in {'imagenet', None} or os.path.exists(weights)):
156    raise ValueError('The `weights` argument should be either '
157                     '`None` (random initialization), `imagenet` '
158                     '(pre-training on ImageNet), '
159                     'or the path to the weights file to be loaded.')
160
161  if weights == 'imagenet' and include_top and classes != 1000:
162    raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
163                     'as true, `classes` should be 1000')
164
165  # Determine proper input shape and default size.
166  if input_shape is None:
167    default_size = 224
168  else:
169    if backend.image_data_format() == 'channels_first':
170      rows = input_shape[1]
171      cols = input_shape[2]
172    else:
173      rows = input_shape[0]
174      cols = input_shape[1]
175
176    if rows == cols and rows in [128, 160, 192, 224]:
177      default_size = rows
178    else:
179      default_size = 224
180
181  input_shape = imagenet_utils.obtain_input_shape(
182      input_shape,
183      default_size=default_size,
184      min_size=32,
185      data_format=backend.image_data_format(),
186      require_flatten=include_top,
187      weights=weights)
188
189  if backend.image_data_format() == 'channels_last':
190    row_axis, col_axis = (0, 1)
191  else:
192    row_axis, col_axis = (1, 2)
193  rows = input_shape[row_axis]
194  cols = input_shape[col_axis]
195
196  if weights == 'imagenet':
197    if depth_multiplier != 1:
198      raise ValueError('If imagenet weights are being loaded, '
199                       'depth multiplier must be 1')
200
201    if alpha not in [0.25, 0.50, 0.75, 1.0]:
202      raise ValueError('If imagenet weights are being loaded, '
203                       'alpha can be one of'
204                       '`0.25`, `0.50`, `0.75` or `1.0` only.')
205
206    if rows != cols or rows not in [128, 160, 192, 224]:
207      rows = 224
208      logging.warning('`input_shape` is undefined or non-square, '
209                      'or `rows` is not in [128, 160, 192, 224]. '
210                      'Weights for input shape (224, 224) will be'
211                      ' loaded as the default.')
212
213  if input_tensor is None:
214    img_input = layers.Input(shape=input_shape)
215  else:
216    if not backend.is_keras_tensor(input_tensor):
217      img_input = layers.Input(tensor=input_tensor, shape=input_shape)
218    else:
219      img_input = input_tensor
220
221  x = _conv_block(img_input, 32, alpha, strides=(2, 2))
222  x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
223
224  x = _depthwise_conv_block(
225      x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2)
226  x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
227
228  x = _depthwise_conv_block(
229      x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4)
230  x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
231
232  x = _depthwise_conv_block(
233      x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6)
234  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
235  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
236  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
237  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
238  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)
239
240  x = _depthwise_conv_block(
241      x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12)
242  x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)
243
244  if include_top:
245    if backend.image_data_format() == 'channels_first':
246      shape = (int(1024 * alpha), 1, 1)
247    else:
248      shape = (1, 1, int(1024 * alpha))
249
250    x = layers.GlobalAveragePooling2D()(x)
251    x = layers.Reshape(shape, name='reshape_1')(x)
252    x = layers.Dropout(dropout, name='dropout')(x)
253    x = layers.Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x)
254    x = layers.Reshape((classes,), name='reshape_2')(x)
255    x = layers.Activation('softmax', name='act_softmax')(x)
256  else:
257    if pooling == 'avg':
258      x = layers.GlobalAveragePooling2D()(x)
259    elif pooling == 'max':
260      x = layers.GlobalMaxPooling2D()(x)
261
262  # Ensure that the model takes into account
263  # any potential predecessors of `input_tensor`.
264  if input_tensor is not None:
265    inputs = layer_utils.get_source_inputs(input_tensor)
266  else:
267    inputs = img_input
268
269  # Create model.
270  model = training.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows))
271
272  # Load weights.
273  if weights == 'imagenet':
274    if alpha == 1.0:
275      alpha_text = '1_0'
276    elif alpha == 0.75:
277      alpha_text = '7_5'
278    elif alpha == 0.50:
279      alpha_text = '5_0'
280    else:
281      alpha_text = '2_5'
282
283    if include_top:
284      model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows)
285      weight_path = BASE_WEIGHT_PATH + model_name
286      weights_path = data_utils.get_file(
287          model_name, weight_path, cache_subdir='models')
288    else:
289      model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows)
290      weight_path = BASE_WEIGHT_PATH + model_name
291      weights_path = data_utils.get_file(
292          model_name, weight_path, cache_subdir='models')
293    model.load_weights(weights_path)
294  elif weights is not None:
295    model.load_weights(weights)
296
297  return model
298
299
300def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
301  """Adds an initial convolution layer (with batch normalization and relu6).
302
303  Arguments:
304    inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last`
305      data format) or (3, rows, cols) (with `channels_first` data format).
306      It should have exactly 3 inputs channels, and width and height should
307      be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value.
308    filters: Integer, the dimensionality of the output space (i.e. the
309      number of output filters in the convolution).
310    alpha: controls the width of the network. - If `alpha` < 1.0,
311      proportionally decreases the number of filters in each layer. - If
312      `alpha` > 1.0, proportionally increases the number of filters in each
313      layer. - If `alpha` = 1, default number of filters from the paper are
314      used at each layer.
315    kernel: An integer or tuple/list of 2 integers, specifying the width and
316      height of the 2D convolution window. Can be a single integer to
317      specify the same value for all spatial dimensions.
318    strides: An integer or tuple/list of 2 integers, specifying the strides
319      of the convolution along the width and height. Can be a single integer
320      to specify the same value for all spatial dimensions. Specifying any
321      stride value != 1 is incompatible with specifying any `dilation_rate`
322      value != 1. # Input shape
323    4D tensor with shape: `(samples, channels, rows, cols)` if
324      data_format='channels_first'
325    or 4D tensor with shape: `(samples, rows, cols, channels)` if
326      data_format='channels_last'. # Output shape
327    4D tensor with shape: `(samples, filters, new_rows, new_cols)` if
328      data_format='channels_first'
329    or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if
330      data_format='channels_last'. `rows` and `cols` values might have
331      changed due to stride.
332
333  Returns:
334    Output tensor of block.
335  """
336  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
337  filters = int(filters * alpha)
338  x = layers.ZeroPadding2D(padding=((0, 1), (0, 1)), name='conv1_pad')(inputs)
339  x = layers.Conv2D(
340      filters,
341      kernel,
342      padding='valid',
343      use_bias=False,
344      strides=strides,
345      name='conv1')(
346          x)
347  x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
348  return layers.ReLU(6., name='conv1_relu')(x)
349
350
351def _depthwise_conv_block(inputs,
352                          pointwise_conv_filters,
353                          alpha,
354                          depth_multiplier=1,
355                          strides=(1, 1),
356                          block_id=1):
357  """Adds a depthwise convolution block.
358
359  A depthwise convolution block consists of a depthwise conv,
360  batch normalization, relu6, pointwise convolution,
361  batch normalization and relu6 activation.
362
363  Arguments:
364    inputs: Input tensor of shape `(rows, cols, channels)` (with
365      `channels_last` data format) or (channels, rows, cols) (with
366      `channels_first` data format).
367    pointwise_conv_filters: Integer, the dimensionality of the output space
368      (i.e. the number of output filters in the pointwise convolution).
369    alpha: controls the width of the network. - If `alpha` < 1.0,
370      proportionally decreases the number of filters in each layer. - If
371      `alpha` > 1.0, proportionally increases the number of filters in each
372      layer. - If `alpha` = 1, default number of filters from the paper are
373      used at each layer.
374    depth_multiplier: The number of depthwise convolution output channels
375      for each input channel. The total number of depthwise convolution
376      output channels will be equal to `filters_in * depth_multiplier`.
377    strides: An integer or tuple/list of 2 integers, specifying the strides
378      of the convolution along the width and height. Can be a single integer
379      to specify the same value for all spatial dimensions. Specifying any
380      stride value != 1 is incompatible with specifying any `dilation_rate`
381      value != 1.
382    block_id: Integer, a unique identification designating the block number.
383      # Input shape
384    4D tensor with shape: `(batch, channels, rows, cols)` if
385      data_format='channels_first'
386    or 4D tensor with shape: `(batch, rows, cols, channels)` if
387      data_format='channels_last'. # Output shape
388    4D tensor with shape: `(batch, filters, new_rows, new_cols)` if
389      data_format='channels_first'
390    or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if
391      data_format='channels_last'. `rows` and `cols` values might have
392      changed due to stride.
393
394  Returns:
395    Output tensor of block.
396  """
397  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
398  pointwise_conv_filters = int(pointwise_conv_filters * alpha)
399
400  if strides == (1, 1):
401    x = inputs
402  else:
403    x = layers.ZeroPadding2D(((0, 1), (0, 1)), name='conv_pad_%d' % block_id)(
404        inputs)
405  x = layers.DepthwiseConv2D((3, 3),
406                             padding='same' if strides == (1, 1) else 'valid',
407                             depth_multiplier=depth_multiplier,
408                             strides=strides,
409                             use_bias=False,
410                             name='conv_dw_%d' % block_id)(
411                                 x)
412  x = layers.BatchNormalization(
413      axis=channel_axis, name='conv_dw_%d_bn' % block_id)(
414          x)
415  x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x)
416
417  x = layers.Conv2D(
418      pointwise_conv_filters, (1, 1),
419      padding='same',
420      use_bias=False,
421      strides=(1, 1),
422      name='conv_pw_%d' % block_id)(
423          x)
424  x = layers.BatchNormalization(
425      axis=channel_axis, name='conv_pw_%d_bn' % block_id)(
426          x)
427  return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
428
429
430@keras_export('keras.applications.mobilenet.preprocess_input')
431def preprocess_input(x, data_format=None):
432  return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf')
433
434
435@keras_export('keras.applications.mobilenet.decode_predictions')
436def decode_predictions(preds, top=5):
437  return imagenet_utils.decode_predictions(preds, top=top)
438