1# -*- coding: utf-8 -*- 2# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# ============================================================================== 16 17# pylint: disable=g-short-docstring-punctuation 18"""Higher level ops for building layers.""" 19 20from __future__ import absolute_import 21from __future__ import division 22from __future__ import print_function 23 24import functools 25import six 26 27from tensorflow.contrib.framework.python.ops import add_arg_scope 28from tensorflow.contrib.framework.python.ops import variables 29from tensorflow.contrib.layers.python.layers import initializers 30from tensorflow.contrib.layers.python.layers import utils 31from tensorflow.python.eager import context 32from tensorflow.python.framework import constant_op 33from tensorflow.python.framework import dtypes 34from tensorflow.python.framework import function 35from tensorflow.python.framework import ops 36from tensorflow.python.framework import sparse_tensor 37from tensorflow.python.framework import tensor_shape 38from tensorflow.python.keras.engine import input_spec 39from tensorflow.python.layers import base 40from tensorflow.python.layers import convolutional as convolutional_layers 41from tensorflow.python.layers import core as core_layers 42from tensorflow.python.layers import normalization as normalization_layers 43from tensorflow.python.layers import pooling as pooling_layers 44from tensorflow.python.ops import array_ops 45from tensorflow.python.ops import check_ops 46from tensorflow.python.ops import init_ops 47from tensorflow.python.ops import linalg_ops 48from tensorflow.python.ops import math_ops 49from tensorflow.python.ops import nn 50from tensorflow.python.ops import sparse_ops 51from tensorflow.python.ops import standard_ops 52from tensorflow.python.ops import variable_scope 53from tensorflow.python.ops import variables as tf_variables 54from tensorflow.python.training import moving_averages 55 56# TODO(b/28426988): Replace legacy_* fns migrated from slim. 57# TODO(b/28426988): Remove legacy_* when all uses have migrated to new API. 58__all__ = [ 59 'avg_pool2d', 'avg_pool3d', 'batch_norm', 'bias_add', 'conv1d', 'conv2d', 60 'conv3d', 'conv2d_in_plane', 'conv2d_transpose', 'conv3d_transpose', 61 'convolution', 'convolution1d', 'convolution2d', 'convolution2d_in_plane', 62 'convolution2d_transpose', 'convolution3d', 'convolution3d_transpose', 63 'dense_to_sparse', 'dropout', 'elu', 'flatten', 'fully_connected', 'GDN', 64 'gdn', 'images_to_sequence', 'layer_norm', 'linear', 'pool', 'max_pool2d', 65 'max_pool3d', 'one_hot_encoding', 'relu', 'relu6', 'repeat', 66 'scale_gradient', 'separable_conv2d', 'separable_convolution2d', 67 'sequence_to_images', 'softmax', 'spatial_softmax', 'stack', 'unit_norm', 68 'legacy_fully_connected', 'legacy_linear', 'legacy_relu', 'maxout' 69] 70 71DATA_FORMAT_NCHW = 'NCHW' 72DATA_FORMAT_NHWC = 'NHWC' 73DATA_FORMAT_NCDHW = 'NCDHW' 74DATA_FORMAT_NDHWC = 'NDHWC' 75 76 77@add_arg_scope 78def avg_pool2d(inputs, 79 kernel_size, 80 stride=2, 81 padding='VALID', 82 data_format=DATA_FORMAT_NHWC, 83 outputs_collections=None, 84 scope=None): 85 """Adds a 2D average pooling op. 86 87 It is assumed that the pooling is done per image but not in batch or channels. 88 89 Args: 90 inputs: A 4-D tensor of shape `[batch_size, height, width, channels]` if 91 `data_format` is `NHWC`, and `[batch_size, channels, height, width]` if 92 `data_format` is `NCHW`. 93 kernel_size: A list of length 2: [kernel_height, kernel_width] of the 94 pooling kernel over which the op is computed. Can be an int if both 95 values are the same. 96 stride: A list of length 2: [stride_height, stride_width]. 97 Can be an int if both strides are the same. Note that presently 98 both strides must have the same value. 99 padding: The padding method, either 'VALID' or 'SAME'. 100 data_format: A string. `NHWC` (default) and `NCHW` are supported. 101 outputs_collections: The collections to which the outputs are added. 102 scope: Optional scope for name_scope. 103 104 Returns: 105 A `Tensor` representing the results of the pooling operation. 106 107 Raises: 108 ValueError: If `data_format` is neither `NHWC` nor `NCHW`. 109 """ 110 if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): 111 raise ValueError('data_format has to be either NCHW or NHWC.') 112 with ops.name_scope(scope, 'AvgPool2D', [inputs]) as sc: 113 inputs = ops.convert_to_tensor(inputs) 114 df = ('channels_first' 115 if data_format and data_format.startswith('NC') else 'channels_last') 116 layer = pooling_layers.AveragePooling2D( 117 pool_size=kernel_size, 118 strides=stride, 119 padding=padding, 120 data_format=df, 121 _scope=sc) 122 outputs = layer.apply(inputs) 123 return utils.collect_named_outputs(outputs_collections, sc, outputs) 124 125 126@add_arg_scope 127def avg_pool3d(inputs, 128 kernel_size, 129 stride=2, 130 padding='VALID', 131 data_format=DATA_FORMAT_NDHWC, 132 outputs_collections=None, 133 scope=None): 134 """Adds a 3D average pooling op. 135 136 It is assumed that the pooling is done per image but not in batch or channels. 137 138 Args: 139 inputs: A 5-D tensor of shape `[batch_size, depth, height, width, channels]` 140 if `data_format` is `NDHWC`, and `[batch_size, channels, depth, height, 141 width]` if `data_format` is `NCDHW`. 142 kernel_size: A list of length 3: [kernel_depth, kernel_height, kernel_width] 143 of the pooling kernel over which the op is computed. Can be an int if both 144 values are the same. 145 stride: A list of length 3: [stride_depth, stride_height, stride_width]. 146 Can be an int if both strides are the same. Note that presently 147 both strides must have the same value. 148 padding: The padding method, either 'VALID' or 'SAME'. 149 data_format: A string. `NDHWC` (default) and `NCDHW` are supported. 150 outputs_collections: The collections to which the outputs are added. 151 scope: Optional scope for name_scope. 152 153 Returns: 154 A `Tensor` representing the results of the pooling operation. 155 156 Raises: 157 ValueError: If `data_format` is neither `NDHWC` nor `NCDHW`. 158 """ 159 if data_format not in (DATA_FORMAT_NCDHW, DATA_FORMAT_NDHWC): 160 raise ValueError('data_format has to be either NCDHW or NDHWC.') 161 with ops.name_scope(scope, 'AvgPool3D', [inputs]) as sc: 162 inputs = ops.convert_to_tensor(inputs) 163 df = ('channels_first' 164 if data_format and data_format.startswith('NC') else 'channels_last') 165 layer = pooling_layers.AveragePooling3D( 166 pool_size=kernel_size, 167 strides=stride, 168 padding=padding, 169 data_format=df, 170 _scope=sc) 171 outputs = layer.apply(inputs) 172 return utils.collect_named_outputs(outputs_collections, sc, outputs) 173 174 175def _fused_batch_norm(inputs, 176 decay=0.999, 177 center=True, 178 scale=False, 179 epsilon=0.001, 180 activation_fn=None, 181 param_initializers=None, 182 param_regularizers=None, 183 updates_collections=ops.GraphKeys.UPDATE_OPS, 184 is_training=True, 185 reuse=None, 186 variables_collections=None, 187 outputs_collections=None, 188 trainable=True, 189 data_format=DATA_FORMAT_NHWC, 190 zero_debias_moving_mean=False, 191 scope=None): 192 """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167. 193 194 "Batch Normalization: Accelerating Deep Network Training by Reducing 195 Internal Covariate Shift" 196 197 Sergey Ioffe, Christian Szegedy 198 199 Can be used as a normalizer function for conv2d and fully_connected. 200 201 Note: when training, the moving_mean and moving_variance need to be updated. 202 By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they 203 need to be added as a dependency to the `train_op`. For example: 204 205 ```python 206 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 207 with tf.control_dependencies(update_ops): 208 train_op = optimizer.minimize(loss) 209 ``` 210 211 One can set updates_collections=None to force the updates in place, but that 212 can have a speed penalty, especially in distributed settings. 213 214 Args: 215 inputs: A tensor with 2 or more dimensions, where the first dimension has 216 `batch_size`. The normalization is over all but the last dimension if 217 `data_format` is `NHWC` and the second dimension if `data_format` is 218 `NCHW`. 219 decay: Decay for the moving average. Reasonable values for `decay` are close 220 to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc. 221 Lower `decay` value (recommend trying `decay`=0.9) if model experiences 222 reasonably good training performance but poor validation and/or test 223 performance. 224 center: If True, add offset of `beta` to normalized tensor. If False, 225 `beta` is ignored. 226 scale: If True, multiply by `gamma`. If False, `gamma` is 227 not used. When the next layer is linear (also e.g. `nn.relu`), this can be 228 disabled since the scaling can be done by the next layer. 229 epsilon: Small float added to variance to avoid dividing by zero. 230 activation_fn: Activation function, default set to None to skip it and 231 maintain a linear activation. 232 param_initializers: Optional initializers for beta, gamma, moving mean and 233 moving variance. 234 param_regularizers: Optional regularizer for beta and gamma. 235 updates_collections: Collections to collect the update ops for computation. 236 The updates_ops need to be executed with the train_op. 237 If None, a control dependency would be added to make sure the updates are 238 computed in place. 239 is_training: Whether or not the layer is in training mode. In training mode 240 it would accumulate the statistics of the moments into `moving_mean` and 241 `moving_variance` using an exponential moving average with the given 242 `decay`. When it is not in training mode then it would use the values of 243 the `moving_mean` and the `moving_variance`. 244 reuse: Whether or not the layer and its variables should be reused. To be 245 able to reuse the layer scope must be given. 246 variables_collections: Optional collections for the variables. 247 outputs_collections: Collections to add the outputs. 248 trainable: If `True` also add variables to the graph collection 249 `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). 250 data_format: A string. `NHWC` (default) and `NCHW` are supported. 251 zero_debias_moving_mean: Use zero_debias for moving_mean. 252 scope: Optional scope for `variable_scope`. 253 254 Returns: 255 A `Tensor` representing the output of the operation. 256 257 Raises: 258 ValueError: If `data_format` is neither `NHWC` nor `NCHW`. 259 ValueError: If the rank of `inputs` is undefined. 260 ValueError: If the rank of `inputs` is neither 2 or 4. 261 ValueError: If rank or `C` dimension of `inputs` is undefined. 262 """ 263 if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): 264 raise ValueError('data_format has to be either NCHW or NHWC.') 265 with variable_scope.variable_scope( 266 scope, 'BatchNorm', [inputs], reuse=reuse) as sc: 267 inputs = ops.convert_to_tensor(inputs) 268 original_shape = inputs.get_shape() 269 original_inputs = inputs 270 original_rank = original_shape.ndims 271 if original_rank is None: 272 raise ValueError('Inputs %s has undefined rank' % inputs.name) 273 elif original_rank not in [2, 4]: 274 raise ValueError('Inputs %s has unsupported rank.' 275 ' Expected 2 or 4 but got %d' % (inputs.name, 276 original_rank)) 277 if original_rank == 2: 278 channels = inputs.get_shape().dims[-1].value 279 if channels is None: 280 raise ValueError('`C` dimension must be known but is None') 281 new_shape = [-1, 1, 1, channels] 282 if data_format == DATA_FORMAT_NCHW: 283 new_shape = [-1, channels, 1, 1] 284 inputs = array_ops.reshape(inputs, new_shape) 285 inputs_shape = inputs.get_shape() 286 if data_format == DATA_FORMAT_NHWC: 287 params_shape = inputs_shape[-1:] 288 else: 289 params_shape = inputs_shape[1:2] 290 if not params_shape.is_fully_defined(): 291 raise ValueError('Inputs %s has undefined `C` dimension %s.' % 292 (inputs.name, params_shape)) 293 294 # Allocate parameters for the beta and gamma of the normalization. 295 beta_collections = utils.get_variable_collections(variables_collections, 296 'beta') 297 # Float32 required to avoid precision-loss when using fp16 input/output 298 variable_dtype = dtypes.float32 299 if not param_initializers: 300 param_initializers = {} 301 if not param_regularizers: 302 param_regularizers = {} 303 beta_regularizer = param_regularizers.get('beta') 304 gamma_regularizer = param_regularizers.get('gamma') 305 306 if center: 307 beta_initializer = param_initializers.get('beta', 308 init_ops.zeros_initializer()) 309 beta = variables.model_variable( 310 'beta', 311 shape=params_shape, 312 dtype=variable_dtype, 313 initializer=beta_initializer, 314 regularizer=beta_regularizer, 315 collections=beta_collections, 316 trainable=trainable) 317 else: 318 beta = array_ops.constant(0.0, dtype=variable_dtype, shape=params_shape) 319 320 if scale: 321 gamma_collections = utils.get_variable_collections( 322 variables_collections, 'gamma') 323 gamma_initializer = param_initializers.get('gamma', 324 init_ops.ones_initializer()) 325 gamma = variables.model_variable( 326 'gamma', 327 shape=params_shape, 328 dtype=variable_dtype, 329 initializer=gamma_initializer, 330 regularizer=gamma_regularizer, 331 collections=gamma_collections, 332 trainable=trainable) 333 else: 334 gamma = array_ops.constant(1.0, dtype=variable_dtype, shape=params_shape) 335 336 # Create moving_mean and moving_variance variables and add them to the 337 # appropriate collections. We disable variable partitioning while creating 338 # them, because assign_moving_average is not yet supported for partitioned 339 # variables (this needs to be handled carefully, as it may break 340 # the checkpoint backward compatibility). 341 with variable_scope.variable_scope( 342 variable_scope.get_variable_scope()) as local_scope: 343 local_scope.set_partitioner(None) 344 moving_mean_collections = utils.get_variable_collections( 345 variables_collections, 'moving_mean') 346 moving_mean_initializer = param_initializers.get( 347 'moving_mean', init_ops.zeros_initializer()) 348 moving_mean = variables.model_variable( 349 'moving_mean', 350 shape=params_shape, 351 dtype=variable_dtype, 352 initializer=moving_mean_initializer, 353 trainable=False, 354 collections=moving_mean_collections) 355 moving_variance_collections = utils.get_variable_collections( 356 variables_collections, 'moving_variance') 357 moving_variance_initializer = param_initializers.get( 358 'moving_variance', init_ops.ones_initializer()) 359 moving_variance = variables.model_variable( 360 'moving_variance', 361 shape=params_shape, 362 dtype=variable_dtype, 363 initializer=moving_variance_initializer, 364 trainable=False, 365 collections=moving_variance_collections) 366 367 def _fused_batch_norm_training(): 368 return nn.fused_batch_norm( 369 inputs, gamma, beta, epsilon=epsilon, data_format=data_format) 370 371 def _fused_batch_norm_inference(): 372 return nn.fused_batch_norm( 373 inputs, 374 gamma, 375 beta, 376 mean=moving_mean, 377 variance=moving_variance, 378 epsilon=epsilon, 379 is_training=False, 380 data_format=data_format) 381 382 outputs, mean, variance = utils.smart_cond( 383 is_training, _fused_batch_norm_training, _fused_batch_norm_inference) 384 385 # If `is_training` doesn't have a constant value, because it is a `Tensor`, 386 # a `Variable` or `Placeholder` then is_training_value will be None and 387 # `need_updates` will be true. 388 is_training_value = utils.constant_value(is_training) 389 need_updates = is_training_value is None or is_training_value 390 if need_updates: 391 if updates_collections is None: 392 no_updates = lambda: outputs 393 394 def _force_updates(): 395 """Internal function forces updates moving_vars if is_training.""" 396 update_moving_mean = moving_averages.assign_moving_average( 397 moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) 398 update_moving_variance = moving_averages.assign_moving_average( 399 moving_variance, variance, decay, zero_debias=False) 400 with ops.control_dependencies( 401 [update_moving_mean, update_moving_variance]): 402 return array_ops.identity(outputs) 403 404 outputs = utils.smart_cond(is_training, _force_updates, no_updates) 405 else: 406 moving_vars_fn = lambda: (moving_mean, moving_variance) 407 408 def _delay_updates(): 409 """Internal function that delay updates moving_vars if is_training.""" 410 update_moving_mean = moving_averages.assign_moving_average( 411 moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) 412 update_moving_variance = moving_averages.assign_moving_average( 413 moving_variance, variance, decay, zero_debias=False) 414 return update_moving_mean, update_moving_variance 415 416 update_mean, update_variance = utils.smart_cond( 417 is_training, _delay_updates, moving_vars_fn) 418 ops.add_to_collections(updates_collections, update_mean) 419 ops.add_to_collections(updates_collections, update_variance) 420 421 outputs.set_shape(inputs_shape) 422 if original_shape.ndims == 2: 423 outputs = array_ops.reshape(outputs, array_ops.shape(original_inputs)) 424 if activation_fn is not None: 425 outputs = activation_fn(outputs) 426 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 427 428 429@add_arg_scope 430def batch_norm(inputs, 431 decay=0.999, 432 center=True, 433 scale=False, 434 epsilon=0.001, 435 activation_fn=None, 436 param_initializers=None, 437 param_regularizers=None, 438 updates_collections=ops.GraphKeys.UPDATE_OPS, 439 is_training=True, 440 reuse=None, 441 variables_collections=None, 442 outputs_collections=None, 443 trainable=True, 444 batch_weights=None, 445 fused=None, 446 data_format=DATA_FORMAT_NHWC, 447 zero_debias_moving_mean=False, 448 scope=None, 449 renorm=False, 450 renorm_clipping=None, 451 renorm_decay=0.99, 452 adjustment=None): 453 """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167. 454 455 "Batch Normalization: Accelerating Deep Network Training by Reducing 456 Internal Covariate Shift" 457 458 Sergey Ioffe, Christian Szegedy 459 460 Can be used as a normalizer function for conv2d and fully_connected. The 461 normalization is over all but the last dimension if `data_format` is `NHWC` 462 and all but the second dimension if `data_format` is `NCHW`. In case of a 2D 463 tensor this corresponds to the batch dimension, while in case of a 4D tensor 464 this 465 corresponds to the batch and space dimensions. 466 467 Note: when training, the moving_mean and moving_variance need to be updated. 468 By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they 469 need to be added as a dependency to the `train_op`. For example: 470 471 ```python 472 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 473 with tf.control_dependencies(update_ops): 474 train_op = optimizer.minimize(loss) 475 ``` 476 477 One can set updates_collections=None to force the updates in place, but that 478 can have a speed penalty, especially in distributed settings. 479 480 Args: 481 inputs: A tensor with 2 or more dimensions, where the first dimension has 482 `batch_size`. The normalization is over all but the last dimension if 483 `data_format` is `NHWC` and the second dimension if `data_format` is 484 `NCHW`. 485 decay: Decay for the moving average. Reasonable values for `decay` are close 486 to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc. 487 Lower `decay` value (recommend trying `decay`=0.9) if model experiences 488 reasonably good training performance but poor validation and/or test 489 performance. Try zero_debias_moving_mean=True for improved stability. 490 center: If True, add offset of `beta` to normalized tensor. If False, `beta` 491 is ignored. 492 scale: If True, multiply by `gamma`. If False, `gamma` is 493 not used. When the next layer is linear (also e.g. `nn.relu`), this can be 494 disabled since the scaling can be done by the next layer. 495 epsilon: Small float added to variance to avoid dividing by zero. 496 activation_fn: Activation function, default set to None to skip it and 497 maintain a linear activation. 498 param_initializers: Optional initializers for beta, gamma, moving mean and 499 moving variance. 500 param_regularizers: Optional regularizer for beta and gamma. 501 updates_collections: Collections to collect the update ops for computation. 502 The updates_ops need to be executed with the train_op. 503 If None, a control dependency would be added to make sure the updates are 504 computed in place. 505 is_training: Whether or not the layer is in training mode. In training mode 506 it would accumulate the statistics of the moments into `moving_mean` and 507 `moving_variance` using an exponential moving average with the given 508 `decay`. When it is not in training mode then it would use the values of 509 the `moving_mean` and the `moving_variance`. 510 reuse: Whether or not the layer and its variables should be reused. To be 511 able to reuse the layer scope must be given. 512 variables_collections: Optional collections for the variables. 513 outputs_collections: Collections to add the outputs. 514 trainable: If `True` also add variables to the graph collection 515 `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). 516 batch_weights: An optional tensor of shape `[batch_size]`, 517 containing a frequency weight for each batch item. If present, 518 then the batch normalization uses weighted mean and 519 variance. (This can be used to correct for bias in training 520 example selection.) 521 fused: if `None` or `True`, use a faster, fused implementation if possible. 522 If `False`, use the system recommended implementation. 523 data_format: A string. `NHWC` (default) and `NCHW` are supported. 524 zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new 525 pair of variables 'moving_mean/biased' and 'moving_mean/local_step'. 526 scope: Optional scope for `variable_scope`. 527 renorm: Whether to use Batch Renormalization 528 (https://arxiv.org/abs/1702.03275). This adds extra variables during 529 training. The inference is the same for either value of this parameter. 530 renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to 531 scalar `Tensors` used to clip the renorm correction. The correction 532 `(r, d)` is used as `corrected_value = normalized_value * r + d`, with 533 `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, 534 dmax are set to inf, 0, inf, respectively. 535 renorm_decay: Momentum used to update the moving means and standard 536 deviations with renorm. Unlike `momentum`, this affects training 537 and should be neither too small (which would add noise) nor too large 538 (which would give stale estimates). Note that `decay` is still applied 539 to get the means and variances for inference. 540 adjustment: A function taking the `Tensor` containing the (dynamic) shape of 541 the input tensor and returning a pair (scale, bias) to apply to the 542 normalized values (before gamma and beta), only during training. For 543 example, 544 `adjustment = lambda shape: ( 545 tf.random_uniform(shape[-1:], 0.93, 1.07), 546 tf.random_uniform(shape[-1:], -0.1, 0.1))` 547 will scale the normalized value by up to 7% up or down, then shift the 548 result by up to 0.1 (with independent scaling and bias for each feature 549 but shared across all examples), and finally apply gamma and/or beta. If 550 `None`, no adjustment is applied. 551 552 Returns: 553 A `Tensor` representing the output of the operation. 554 555 Raises: 556 ValueError: If `data_format` is neither `NHWC` nor `NCHW`. 557 ValueError: If the rank of `inputs` is undefined. 558 ValueError: If rank or channels dimension of `inputs` is undefined. 559 """ 560 if fused is None: 561 fused = True 562 563 # Only use _fused_batch_norm if all of the following three 564 # conditions are true: 565 # (1) fused is set True; 566 # (2) it is possible to use (currently it doesn't support batch weights, 567 # renorm, and the case when rank is neither 2 nor 4); 568 # (3) it is used with zero_debias_moving_mean, or an input shape of rank 2, 569 # or non-default updates_collections (not implemented in 570 # normalization_layers.BatchNormalization yet); otherwise use the fused 571 # implementation in normalization_layers.BatchNormalization. 572 inputs = ops.convert_to_tensor(inputs) 573 rank = inputs.get_shape().ndims 574 possible_to_fuse = ( 575 batch_weights is None and not renorm and rank in [2, 4] and 576 adjustment is None) 577 if fused and possible_to_fuse and ( 578 zero_debias_moving_mean or rank == 2 or 579 updates_collections is not ops.GraphKeys.UPDATE_OPS): 580 return _fused_batch_norm( 581 inputs, 582 decay=decay, 583 center=center, 584 scale=scale, 585 epsilon=epsilon, 586 activation_fn=activation_fn, 587 param_initializers=param_initializers, 588 param_regularizers=param_regularizers, 589 updates_collections=updates_collections, 590 is_training=is_training, 591 reuse=reuse, 592 variables_collections=variables_collections, 593 outputs_collections=outputs_collections, 594 trainable=trainable, 595 data_format=data_format, 596 zero_debias_moving_mean=zero_debias_moving_mean, 597 scope=scope) 598 599 if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): 600 raise ValueError('data_format has to be either NCHW or NHWC.') 601 602 layer_variable_getter = _build_variable_getter() 603 with variable_scope.variable_scope( 604 scope, 605 'BatchNorm', [inputs], 606 reuse=reuse, 607 custom_getter=layer_variable_getter) as sc: 608 inputs = ops.convert_to_tensor(inputs) 609 610 # Determine whether we can use the core layer class. 611 if (batch_weights is None and 612 updates_collections is ops.GraphKeys.UPDATE_OPS and 613 not zero_debias_moving_mean): 614 # Use the core layer class. 615 axis = 1 if data_format == DATA_FORMAT_NCHW else -1 616 if not param_initializers: 617 param_initializers = {} 618 beta_initializer = param_initializers.get('beta', 619 init_ops.zeros_initializer()) 620 gamma_initializer = param_initializers.get('gamma', 621 init_ops.ones_initializer()) 622 moving_mean_initializer = param_initializers.get( 623 'moving_mean', init_ops.zeros_initializer()) 624 moving_variance_initializer = param_initializers.get( 625 'moving_variance', init_ops.ones_initializer()) 626 if not param_regularizers: 627 param_regularizers = {} 628 beta_regularizer = param_regularizers.get('beta') 629 gamma_regularizer = param_regularizers.get('gamma') 630 layer = normalization_layers.BatchNormalization( 631 axis=axis, 632 momentum=decay, 633 epsilon=epsilon, 634 center=center, 635 scale=scale, 636 beta_initializer=beta_initializer, 637 gamma_initializer=gamma_initializer, 638 moving_mean_initializer=moving_mean_initializer, 639 moving_variance_initializer=moving_variance_initializer, 640 beta_regularizer=beta_regularizer, 641 gamma_regularizer=gamma_regularizer, 642 trainable=trainable, 643 renorm=renorm, 644 renorm_clipping=renorm_clipping, 645 renorm_momentum=renorm_decay, 646 adjustment=adjustment, 647 name=sc.name, 648 _scope=sc, 649 _reuse=reuse, 650 fused=fused) 651 outputs = layer.apply(inputs, training=is_training) 652 653 # Add variables to collections. 654 _add_variable_to_collections(layer.moving_mean, variables_collections, 655 'moving_mean') 656 _add_variable_to_collections(layer.moving_variance, variables_collections, 657 'moving_variance') 658 if layer.beta is not None: 659 _add_variable_to_collections(layer.beta, variables_collections, 'beta') 660 if layer.gamma is not None: 661 _add_variable_to_collections(layer.gamma, variables_collections, 662 'gamma') 663 664 if activation_fn is not None: 665 outputs = activation_fn(outputs) 666 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 667 668 # Not supported by layer class: batch_weights argument, 669 # and custom updates_collections. In that case, use the legacy BN 670 # implementation. 671 # Custom updates collections are not supported because the update logic 672 # is different in this case, in particular w.r.t. "forced updates" and 673 # update op reuse. 674 if renorm: 675 raise ValueError('renorm is not supported with batch_weights, ' 676 'updates_collections or zero_debias_moving_mean') 677 inputs_shape = inputs.get_shape() 678 inputs_rank = inputs_shape.ndims 679 if inputs_rank is None: 680 raise ValueError('Inputs %s has undefined rank.' % inputs.name) 681 dtype = inputs.dtype.base_dtype 682 if batch_weights is not None: 683 batch_weights = ops.convert_to_tensor(batch_weights) 684 inputs_shape[0:1].assert_is_compatible_with(batch_weights.get_shape()) 685 # Reshape batch weight values so they broadcast across inputs. 686 nshape = [-1] + [1 for _ in range(inputs_rank - 1)] 687 batch_weights = array_ops.reshape(batch_weights, nshape) 688 689 if data_format == DATA_FORMAT_NCHW: 690 moments_axes = [0] + list(range(2, inputs_rank)) 691 params_shape = inputs_shape[1:2] 692 # For NCHW format, rather than relying on implicit broadcasting, we 693 # explicitly reshape the params to params_shape_broadcast when computing 694 # the moments and the batch normalization. 695 params_shape_broadcast = list( 696 [1, inputs_shape.dims[1].value] + [1 for _ in range(2, inputs_rank)]) 697 else: 698 moments_axes = list(range(inputs_rank - 1)) 699 params_shape = inputs_shape[-1:] 700 params_shape_broadcast = None 701 if not params_shape.is_fully_defined(): 702 raise ValueError('Inputs %s has undefined channels dimension %s.' % 703 (inputs.name, params_shape)) 704 705 # Allocate parameters for the beta and gamma of the normalization. 706 beta, gamma = None, None 707 if not param_initializers: 708 param_initializers = {} 709 if center: 710 beta_collections = utils.get_variable_collections(variables_collections, 711 'beta') 712 beta_initializer = param_initializers.get('beta', 713 init_ops.zeros_initializer()) 714 beta = variables.model_variable( 715 'beta', 716 shape=params_shape, 717 dtype=dtype, 718 initializer=beta_initializer, 719 collections=beta_collections, 720 trainable=trainable) 721 if scale: 722 gamma_collections = utils.get_variable_collections( 723 variables_collections, 'gamma') 724 gamma_initializer = param_initializers.get('gamma', 725 init_ops.ones_initializer()) 726 gamma = variables.model_variable( 727 'gamma', 728 shape=params_shape, 729 dtype=dtype, 730 initializer=gamma_initializer, 731 collections=gamma_collections, 732 trainable=trainable) 733 734 # Create moving_mean and moving_variance variables and add them to the 735 # appropriate collections. We disable variable partitioning while creating 736 # them, because assign_moving_average is not yet supported for partitioned 737 # variables (this needs to be handled carefully, as it may break 738 # the checkpoint backward compatibility). 739 with variable_scope.variable_scope( 740 variable_scope.get_variable_scope()) as local_scope: 741 local_scope.set_partitioner(None) 742 moving_mean_collections = utils.get_variable_collections( 743 variables_collections, 'moving_mean') 744 moving_mean_initializer = param_initializers.get( 745 'moving_mean', init_ops.zeros_initializer()) 746 moving_mean = variables.model_variable( 747 'moving_mean', 748 shape=params_shape, 749 dtype=dtype, 750 initializer=moving_mean_initializer, 751 trainable=False, 752 collections=moving_mean_collections) 753 moving_variance_collections = utils.get_variable_collections( 754 variables_collections, 'moving_variance') 755 moving_variance_initializer = param_initializers.get( 756 'moving_variance', init_ops.ones_initializer()) 757 moving_variance = variables.model_variable( 758 'moving_variance', 759 shape=params_shape, 760 dtype=dtype, 761 initializer=moving_variance_initializer, 762 trainable=False, 763 collections=moving_variance_collections) 764 765 # If `is_training` doesn't have a constant value, because it is a `Tensor`, 766 # a `Variable` or `Placeholder` then is_training_value will be None and 767 # `needs_moments` will be true. 768 is_training_value = utils.constant_value(is_training) 769 need_moments = is_training_value is None or is_training_value 770 if need_moments: 771 # Calculate the moments based on the individual batch. 772 if batch_weights is None: 773 if data_format == DATA_FORMAT_NCHW: 774 mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) 775 mean = array_ops.reshape(mean, [-1]) 776 variance = array_ops.reshape(variance, [-1]) 777 else: 778 mean, variance = nn.moments(inputs, moments_axes) 779 else: 780 if data_format == DATA_FORMAT_NCHW: 781 mean, variance = nn.weighted_moments( 782 inputs, moments_axes, batch_weights, keepdims=True) 783 mean = array_ops.reshape(mean, [-1]) 784 variance = array_ops.reshape(variance, [-1]) 785 else: 786 mean, variance = nn.weighted_moments(inputs, moments_axes, 787 batch_weights) 788 789 moving_vars_fn = lambda: (moving_mean, moving_variance) 790 if updates_collections is None: 791 792 def _force_updates(): 793 """Internal function forces updates moving_vars if is_training.""" 794 update_moving_mean = moving_averages.assign_moving_average( 795 moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) 796 update_moving_variance = moving_averages.assign_moving_average( 797 moving_variance, variance, decay, zero_debias=False) 798 with ops.control_dependencies( 799 [update_moving_mean, update_moving_variance]): 800 return array_ops.identity(mean), array_ops.identity(variance) 801 802 mean, variance = utils.smart_cond(is_training, _force_updates, 803 moving_vars_fn) 804 else: 805 806 def _delay_updates(): 807 """Internal function that delay updates moving_vars if is_training.""" 808 update_moving_mean = moving_averages.assign_moving_average( 809 moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) 810 update_moving_variance = moving_averages.assign_moving_average( 811 moving_variance, variance, decay, zero_debias=False) 812 return update_moving_mean, update_moving_variance 813 814 update_mean, update_variance = utils.smart_cond( 815 is_training, _delay_updates, moving_vars_fn) 816 ops.add_to_collections(updates_collections, update_mean) 817 ops.add_to_collections(updates_collections, update_variance) 818 # Use computed moments during training and moving_vars otherwise. 819 vars_fn = lambda: (mean, variance) 820 mean, variance = utils.smart_cond(is_training, vars_fn, moving_vars_fn) 821 else: 822 mean, variance = moving_mean, moving_variance 823 if data_format == DATA_FORMAT_NCHW: 824 mean = array_ops.reshape(mean, params_shape_broadcast) 825 variance = array_ops.reshape(variance, params_shape_broadcast) 826 if beta is not None: 827 beta = array_ops.reshape(beta, params_shape_broadcast) 828 if gamma is not None: 829 gamma = array_ops.reshape(gamma, params_shape_broadcast) 830 831 # Compute batch_normalization. 832 outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, 833 epsilon) 834 outputs.set_shape(inputs_shape) 835 if activation_fn is not None: 836 outputs = activation_fn(outputs) 837 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 838 839 840@add_arg_scope 841def bias_add(inputs, 842 activation_fn=None, 843 initializer=init_ops.zeros_initializer(), 844 regularizer=None, 845 reuse=None, 846 variables_collections=None, 847 outputs_collections=None, 848 trainable=True, 849 data_format=DATA_FORMAT_NHWC, 850 scope=None): 851 """Adds a bias to the inputs. 852 853 Can be used as a normalizer function for conv2d and fully_connected. 854 855 Args: 856 inputs: A tensor of with at least rank 2 and value for the last dimension, 857 e.g. `[batch_size, depth]`, `[None, None, None, depth]`. 858 activation_fn: Activation function, default set to None to skip it and 859 maintain a linear activation. 860 initializer: An initializer for the bias, defaults to 0. 861 regularizer: A regularizer like the result of 862 `l1_regularizer` or `l2_regularizer`. 863 reuse: Whether or not the layer and its variables should be reused. To be 864 able to reuse the layer scope must be given. 865 variables_collections: Optional collections for the variables. 866 outputs_collections: Collections to add the outputs. 867 trainable: If `True` also add variables to the graph collection 868 `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). 869 data_format: A string. 'NHWC' and 'NCHW' are supported. 870 scope: Optional scope for variable_scope. 871 872 Returns: 873 A tensor representing the result of adding biases to the inputs. 874 875 Raises: 876 ValueError: If `data_format` is neither `NHWC` nor `NCHW`. 877 ValueError: If `data_format` is `NCHW` and rank of `inputs` is not 4. 878 ValueError: If the rank of `inputs` is undefined. 879 ValueError: If rank or `C` dimension of `inputs` is undefined. 880 """ 881 if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): 882 raise ValueError('data_format has to be either NCHW or NHWC.') 883 with variable_scope.variable_scope( 884 scope, 'BiasAdd', [inputs], reuse=reuse) as sc: 885 inputs = ops.convert_to_tensor(inputs) 886 dtype = inputs.dtype.base_dtype 887 inputs_shape = inputs.get_shape() 888 inputs_rank = inputs_shape.ndims 889 if inputs_rank is None: 890 raise ValueError('Dims of shape must be known but is None') 891 elif inputs_rank != 4 and data_format == DATA_FORMAT_NCHW: 892 raise ValueError('Data format NCHW only supports 4D Tensor') 893 axis = 1 if data_format == DATA_FORMAT_NCHW else -1 894 num_features = inputs_shape.dims[axis].value 895 if num_features is None: 896 raise ValueError('`C` dimension must be known but is None') 897 biases_collections = utils.get_variable_collections(variables_collections, 898 'biases') 899 biases = variables.model_variable( 900 'biases', 901 shape=[ 902 num_features, 903 ], 904 dtype=dtype, 905 initializer=initializer, 906 regularizer=regularizer, 907 collections=biases_collections, 908 trainable=trainable) 909 outputs = nn.bias_add(inputs, biases, data_format=data_format) 910 if activation_fn is not None: 911 outputs = activation_fn(outputs) 912 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 913 914 915# TODO(jbms): change `rate` parameter to `dilation_rate` for consistency with 916# underlying op. 917@add_arg_scope 918def convolution(inputs, 919 num_outputs, 920 kernel_size, 921 stride=1, 922 padding='SAME', 923 data_format=None, 924 rate=1, 925 activation_fn=nn.relu, 926 normalizer_fn=None, 927 normalizer_params=None, 928 weights_initializer=initializers.xavier_initializer(), 929 weights_regularizer=None, 930 biases_initializer=init_ops.zeros_initializer(), 931 biases_regularizer=None, 932 reuse=None, 933 variables_collections=None, 934 outputs_collections=None, 935 trainable=True, 936 scope=None, 937 conv_dims=None): 938 """Adds an N-D convolution followed by an optional batch_norm layer. 939 940 It is required that 1 <= N <= 3. 941 942 `convolution` creates a variable called `weights`, representing the 943 convolutional kernel, that is convolved (actually cross-correlated) with the 944 `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is 945 provided (such as `batch_norm`), it is then applied. Otherwise, if 946 `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` 947 variable would be created and added the activations. Finally, if 948 `activation_fn` is not `None`, it is applied to the activations as well. 949 950 Performs atrous convolution with input stride/dilation rate equal to `rate` 951 if a value > 1 for any dimension of `rate` is specified. In this case 952 `stride` values != 1 are not supported. 953 954 Args: 955 inputs: A Tensor of rank N+2 of shape 956 `[batch_size] + input_spatial_shape + [in_channels]` if data_format does 957 not start with "NC" (default), or 958 `[batch_size, in_channels] + input_spatial_shape` if data_format starts 959 with "NC". 960 num_outputs: Integer, the number of output filters. 961 kernel_size: A sequence of N positive integers specifying the spatial 962 dimensions of the filters. Can be a single integer to specify the same 963 value for all spatial dimensions. 964 stride: A sequence of N positive integers specifying the stride at which to 965 compute output. Can be a single integer to specify the same value for all 966 spatial dimensions. Specifying any `stride` value != 1 is incompatible 967 with specifying any `rate` value != 1. 968 padding: One of `"VALID"` or `"SAME"`. 969 data_format: A string or None. Specifies whether the channel dimension of 970 the `input` and output is the last dimension (default, or if `data_format` 971 does not start with "NC"), or the second dimension (if `data_format` 972 starts with "NC"). For N=1, the valid values are "NWC" (default) and 973 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 974 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 975 rate: A sequence of N positive integers specifying the dilation rate to use 976 for atrous convolution. Can be a single integer to specify the same 977 value for all spatial dimensions. Specifying any `rate` value != 1 is 978 incompatible with specifying any `stride` value != 1. 979 activation_fn: Activation function. The default value is a ReLU function. 980 Explicitly set it to None to skip it and maintain a linear activation. 981 normalizer_fn: Normalization function to use instead of `biases`. If 982 `normalizer_fn` is provided then `biases_initializer` and 983 `biases_regularizer` are ignored and `biases` are not created nor added. 984 default set to None for no normalizer function 985 normalizer_params: Normalization function parameters. 986 weights_initializer: An initializer for the weights. 987 weights_regularizer: Optional regularizer for the weights. 988 biases_initializer: An initializer for the biases. If None skip biases. 989 biases_regularizer: Optional regularizer for the biases. 990 reuse: Whether or not the layer and its variables should be reused. To be 991 able to reuse the layer scope must be given. 992 variables_collections: Optional list of collections for all the variables or 993 a dictionary containing a different list of collection per variable. 994 outputs_collections: Collection to add the outputs. 995 trainable: If `True` also add variables to the graph collection 996 `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). 997 scope: Optional scope for `variable_scope`. 998 conv_dims: Optional convolution dimensionality, when set it would use the 999 corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When 1000 leaved to None it would select the convolution dimensionality based on 1001 the input rank (i.e. Conv ND, with N = input_rank - 2). 1002 1003 Returns: 1004 A tensor representing the output of the operation. 1005 1006 Raises: 1007 ValueError: If `data_format` is invalid. 1008 ValueError: Both 'rate' and `stride` are not uniformly 1. 1009 """ 1010 if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']: 1011 raise ValueError('Invalid data_format: %r' % (data_format,)) 1012 1013 layer_variable_getter = _build_variable_getter({ 1014 'bias': 'biases', 1015 'kernel': 'weights' 1016 }) 1017 1018 with variable_scope.variable_scope( 1019 scope, 'Conv', [inputs], reuse=reuse, 1020 custom_getter=layer_variable_getter) as sc: 1021 inputs = ops.convert_to_tensor(inputs) 1022 input_rank = inputs.get_shape().ndims 1023 1024 if conv_dims is not None and conv_dims + 2 != input_rank: 1025 raise ValueError('Convolution expects input with rank %d, got %d' % 1026 (conv_dims + 2, input_rank)) 1027 if input_rank == 3: 1028 layer_class = convolutional_layers.Convolution1D 1029 elif input_rank == 4: 1030 layer_class = convolutional_layers.Convolution2D 1031 elif input_rank == 5: 1032 layer_class = convolutional_layers.Convolution3D 1033 else: 1034 raise ValueError('Convolution not supported for input with rank', 1035 input_rank) 1036 1037 df = ('channels_first' 1038 if data_format and data_format.startswith('NC') else 'channels_last') 1039 layer = layer_class( 1040 filters=num_outputs, 1041 kernel_size=kernel_size, 1042 strides=stride, 1043 padding=padding, 1044 data_format=df, 1045 dilation_rate=rate, 1046 activation=None, 1047 use_bias=not normalizer_fn and biases_initializer, 1048 kernel_initializer=weights_initializer, 1049 bias_initializer=biases_initializer, 1050 kernel_regularizer=weights_regularizer, 1051 bias_regularizer=biases_regularizer, 1052 activity_regularizer=None, 1053 trainable=trainable, 1054 name=sc.name, 1055 dtype=inputs.dtype.base_dtype, 1056 _scope=sc, 1057 _reuse=reuse) 1058 outputs = layer.apply(inputs) 1059 1060 # Add variables to collections. 1061 _add_variable_to_collections(layer.kernel, variables_collections, 'weights') 1062 if layer.use_bias: 1063 _add_variable_to_collections(layer.bias, variables_collections, 'biases') 1064 1065 if normalizer_fn is not None: 1066 normalizer_params = normalizer_params or {} 1067 outputs = normalizer_fn(outputs, **normalizer_params) 1068 1069 if activation_fn is not None: 1070 outputs = activation_fn(outputs) 1071 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 1072 1073@add_arg_scope 1074def convolution1d(inputs, 1075 num_outputs, 1076 kernel_size, 1077 stride=1, 1078 padding='SAME', 1079 data_format=None, 1080 rate=1, 1081 activation_fn=nn.relu, 1082 normalizer_fn=None, 1083 normalizer_params=None, 1084 weights_initializer=initializers.xavier_initializer(), 1085 weights_regularizer=None, 1086 biases_initializer=init_ops.zeros_initializer(), 1087 biases_regularizer=None, 1088 reuse=None, 1089 variables_collections=None, 1090 outputs_collections=None, 1091 trainable=True, 1092 scope=None): 1093 return convolution(inputs, 1094 num_outputs, 1095 kernel_size, 1096 stride, 1097 padding, 1098 data_format, 1099 rate, 1100 activation_fn, 1101 normalizer_fn, 1102 normalizer_params, 1103 weights_initializer, 1104 weights_regularizer, 1105 biases_initializer, 1106 biases_regularizer, 1107 reuse, 1108 variables_collections, 1109 outputs_collections, 1110 trainable, 1111 scope, 1112 conv_dims=1) 1113 1114convolution1d.__doc__ = convolution.__doc__ 1115 1116@add_arg_scope 1117def convolution2d(inputs, 1118 num_outputs, 1119 kernel_size, 1120 stride=1, 1121 padding='SAME', 1122 data_format=None, 1123 rate=1, 1124 activation_fn=nn.relu, 1125 normalizer_fn=None, 1126 normalizer_params=None, 1127 weights_initializer=initializers.xavier_initializer(), 1128 weights_regularizer=None, 1129 biases_initializer=init_ops.zeros_initializer(), 1130 biases_regularizer=None, 1131 reuse=None, 1132 variables_collections=None, 1133 outputs_collections=None, 1134 trainable=True, 1135 scope=None): 1136 return convolution(inputs, 1137 num_outputs, 1138 kernel_size, 1139 stride, 1140 padding, 1141 data_format, 1142 rate, 1143 activation_fn, 1144 normalizer_fn, 1145 normalizer_params, 1146 weights_initializer, 1147 weights_regularizer, 1148 biases_initializer, 1149 biases_regularizer, 1150 reuse, 1151 variables_collections, 1152 outputs_collections, 1153 trainable, 1154 scope, 1155 conv_dims=2) 1156 1157convolution2d.__doc__ = convolution.__doc__ 1158 1159@add_arg_scope 1160def convolution3d(inputs, 1161 num_outputs, 1162 kernel_size, 1163 stride=1, 1164 padding='SAME', 1165 data_format=None, 1166 rate=1, 1167 activation_fn=nn.relu, 1168 normalizer_fn=None, 1169 normalizer_params=None, 1170 weights_initializer=initializers.xavier_initializer(), 1171 weights_regularizer=None, 1172 biases_initializer=init_ops.zeros_initializer(), 1173 biases_regularizer=None, 1174 reuse=None, 1175 variables_collections=None, 1176 outputs_collections=None, 1177 trainable=True, 1178 scope=None): 1179 return convolution(inputs, 1180 num_outputs, 1181 kernel_size, 1182 stride, 1183 padding, 1184 data_format, 1185 rate, 1186 activation_fn, 1187 normalizer_fn, 1188 normalizer_params, 1189 weights_initializer, 1190 weights_regularizer, 1191 biases_initializer, 1192 biases_regularizer, 1193 reuse, 1194 variables_collections, 1195 outputs_collections, 1196 trainable, 1197 scope, 1198 conv_dims=3) 1199 1200convolution3d.__doc__ = convolution.__doc__ 1201 1202@add_arg_scope 1203def convolution2d_in_plane( 1204 inputs, 1205 kernel_size, 1206 stride=1, 1207 padding='SAME', 1208 activation_fn=nn.relu, 1209 normalizer_fn=None, 1210 normalizer_params=None, 1211 weights_initializer=initializers.xavier_initializer(), 1212 weights_regularizer=None, 1213 biases_initializer=init_ops.zeros_initializer(), 1214 biases_regularizer=None, 1215 reuse=None, 1216 variables_collections=None, 1217 outputs_collections=None, 1218 trainable=True, 1219 scope=None): 1220 """Performs the same in-plane convolution to each channel independently. 1221 1222 This is useful for performing various simple channel-independent convolution 1223 operations such as image gradients: 1224 1225 image = tf.constant(..., shape=(16, 240, 320, 3)) 1226 vert_gradients = layers.conv2d_in_plane(image, 1227 kernel=[1, -1], 1228 kernel_size=[2, 1]) 1229 horz_gradients = layers.conv2d_in_plane(image, 1230 kernel=[1, -1], 1231 kernel_size=[1, 2]) 1232 1233 Args: 1234 inputs: A 4-D tensor with dimensions [batch_size, height, width, channels]. 1235 kernel_size: A list of length 2 holding the [kernel_height, kernel_width] of 1236 of the pooling. Can be an int if both values are the same. 1237 stride: A list of length 2 `[stride_height, stride_width]`. 1238 Can be an int if both strides are the same. Note that presently 1239 both strides must have the same value. 1240 padding: The padding type to use, either 'SAME' or 'VALID'. 1241 activation_fn: Activation function. The default value is a ReLU function. 1242 Explicitly set it to None to skip it and maintain a linear activation. 1243 normalizer_fn: Normalization function to use instead of `biases`. If 1244 `normalizer_fn` is provided then `biases_initializer` and 1245 `biases_regularizer` are ignored and `biases` are not created nor added. 1246 default set to None for no normalizer function 1247 normalizer_params: Normalization function parameters. 1248 weights_initializer: An initializer for the weights. 1249 weights_regularizer: Optional regularizer for the weights. 1250 biases_initializer: An initializer for the biases. If None skip biases. 1251 biases_regularizer: Optional regularizer for the biases. 1252 reuse: Whether or not the layer and its variables should be reused. To be 1253 able to reuse the layer scope must be given. 1254 variables_collections: Optional list of collections for all the variables or 1255 a dictionary containing a different list of collection per variable. 1256 outputs_collections: Collection to add the outputs. 1257 trainable: If `True` also add variables to the graph collection 1258 `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). 1259 scope: Optional scope for `variable_scope`. 1260 1261 Returns: 1262 A `Tensor` representing the output of the operation. 1263 """ 1264 with variable_scope.variable_scope( 1265 scope, 'ConvInPlane', [inputs], reuse=reuse) as sc: 1266 dtype = inputs.dtype.base_dtype 1267 kernel_h, kernel_w = utils.two_element_tuple(kernel_size) 1268 stride_h, stride_w = utils.two_element_tuple(stride) 1269 num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4) 1270 weights_shape = [kernel_h, kernel_w, 1, 1] 1271 weights_collections = utils.get_variable_collections( 1272 variables_collections, 'weights') 1273 weights = variables.model_variable( 1274 'weights', 1275 shape=weights_shape, 1276 dtype=dtype, 1277 initializer=weights_initializer, 1278 regularizer=weights_regularizer, 1279 collections=weights_collections, 1280 trainable=trainable) 1281 depthwise_weights = array_ops.tile(weights, [1, 1, num_filters_in, 1]) 1282 outputs = nn.depthwise_conv2d(inputs, depthwise_weights, 1283 [1, stride_h, stride_w, 1], padding) 1284 if normalizer_fn is not None: 1285 normalizer_params = normalizer_params or {} 1286 outputs = normalizer_fn(outputs, **normalizer_params) 1287 else: 1288 if biases_initializer is not None: 1289 biases_collections = utils.get_variable_collections( 1290 variables_collections, 'biases') 1291 biases = variables.model_variable( 1292 'biases', 1293 shape=[ 1294 num_filters_in, 1295 ], 1296 dtype=dtype, 1297 initializer=biases_initializer, 1298 regularizer=biases_regularizer, 1299 collections=biases_collections, 1300 trainable=trainable) 1301 outputs = nn.bias_add(outputs, biases) 1302 1303 if activation_fn is not None: 1304 outputs = activation_fn(outputs) 1305 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 1306 1307 1308@add_arg_scope 1309def convolution2d_transpose( 1310 inputs, 1311 num_outputs, 1312 kernel_size, 1313 stride=1, 1314 padding='SAME', 1315 data_format=DATA_FORMAT_NHWC, 1316 activation_fn=nn.relu, 1317 normalizer_fn=None, 1318 normalizer_params=None, 1319 weights_initializer=initializers.xavier_initializer(), 1320 weights_regularizer=None, 1321 biases_initializer=init_ops.zeros_initializer(), 1322 biases_regularizer=None, 1323 reuse=None, 1324 variables_collections=None, 1325 outputs_collections=None, 1326 trainable=True, 1327 scope=None): 1328 """Adds a convolution2d_transpose with an optional batch normalization layer. 1329 1330 The function creates a variable called `weights`, representing the 1331 kernel, that is convolved with the input. If `normalizer_fn` is `None`, a 1332 second variable called 'biases' is added to the result of the operation. 1333 1334 Args: 1335 inputs: A 4-D `Tensor` of type `float` and shape 1336 `[batch, height, width, in_channels]` for `NHWC` data format or 1337 `[batch, in_channels, height, width]` for `NCHW` data format. 1338 num_outputs: Integer, the number of output filters. 1339 kernel_size: A list of length 2 holding the [kernel_height, kernel_width] of 1340 of the filters. Can be an int if both values are the same. 1341 stride: A list of length 2: [stride_height, stride_width]. 1342 Can be an int if both strides are the same. Note that presently 1343 both strides must have the same value. 1344 padding: One of 'VALID' or 'SAME'. 1345 data_format: A string. `NHWC` (default) and `NCHW` are supported. 1346 activation_fn: Activation function. The default value is a ReLU function. 1347 Explicitly set it to None to skip it and maintain a linear activation. 1348 normalizer_fn: Normalization function to use instead of `biases`. If 1349 `normalizer_fn` is provided then `biases_initializer` and 1350 `biases_regularizer` are ignored and `biases` are not created nor added. 1351 default set to None for no normalizer function 1352 normalizer_params: Normalization function parameters. 1353 weights_initializer: An initializer for the weights. 1354 weights_regularizer: Optional regularizer for the weights. 1355 biases_initializer: An initializer for the biases. If None skip biases. 1356 biases_regularizer: Optional regularizer for the biases. 1357 reuse: Whether or not the layer and its variables should be reused. To be 1358 able to reuse the layer scope must be given. 1359 variables_collections: Optional list of collections for all the variables or 1360 a dictionary containing a different list of collection per variable. 1361 outputs_collections: Collection to add the outputs. 1362 trainable: Whether or not the variables should be trainable or not. 1363 scope: Optional scope for variable_scope. 1364 1365 Returns: 1366 A tensor representing the output of the operation. 1367 1368 Raises: 1369 ValueError: If 'kernel_size' is not a list of length 2. 1370 ValueError: If `data_format` is neither `NHWC` nor `NCHW`. 1371 ValueError: If `C` dimension of `inputs` is None. 1372 """ 1373 layer_variable_getter = _build_variable_getter({ 1374 'bias': 'biases', 1375 'kernel': 'weights' 1376 }) 1377 1378 with variable_scope.variable_scope( 1379 scope, 1380 'Conv2d_transpose', [inputs], 1381 reuse=reuse, 1382 custom_getter=layer_variable_getter) as sc: 1383 if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): 1384 raise ValueError('data_format has to be either NCHW or NHWC.') 1385 1386 inputs = ops.convert_to_tensor(inputs) 1387 1388 df = ('channels_first' 1389 if data_format and data_format.startswith('NC') else 'channels_last') 1390 layer = convolutional_layers.Convolution2DTranspose( 1391 filters=num_outputs, 1392 kernel_size=kernel_size, 1393 strides=stride, 1394 padding=padding, 1395 data_format=df, 1396 activation=None, 1397 use_bias=not normalizer_fn and biases_initializer, 1398 kernel_initializer=weights_initializer, 1399 bias_initializer=biases_initializer, 1400 kernel_regularizer=weights_regularizer, 1401 bias_regularizer=biases_regularizer, 1402 activity_regularizer=None, 1403 trainable=trainable, 1404 name=sc.name, 1405 dtype=inputs.dtype.base_dtype, 1406 _scope=sc, 1407 _reuse=reuse) 1408 outputs = layer.apply(inputs) 1409 1410 # Add variables to collections. 1411 _add_variable_to_collections(layer.kernel, variables_collections, 'weights') 1412 if layer.bias is not None: 1413 _add_variable_to_collections(layer.bias, variables_collections, 'biases') 1414 1415 if normalizer_fn is not None: 1416 normalizer_params = normalizer_params or {} 1417 outputs = normalizer_fn(outputs, **normalizer_params) 1418 1419 if activation_fn is not None: 1420 outputs = activation_fn(outputs) 1421 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 1422 1423 1424@add_arg_scope 1425def convolution3d_transpose( 1426 inputs, 1427 num_outputs, 1428 kernel_size, 1429 stride=1, 1430 padding='SAME', 1431 data_format=DATA_FORMAT_NDHWC, 1432 activation_fn=nn.relu, 1433 normalizer_fn=None, 1434 normalizer_params=None, 1435 weights_initializer=initializers.xavier_initializer(), 1436 weights_regularizer=None, 1437 biases_initializer=init_ops.zeros_initializer(), 1438 biases_regularizer=None, 1439 reuse=None, 1440 variables_collections=None, 1441 outputs_collections=None, 1442 trainable=True, 1443 scope=None): 1444 """Adds a convolution3d_transpose with an optional batch normalization layer. 1445 1446 The function creates a variable called `weights`, representing the 1447 kernel, that is convolved with the input. If `batch_norm_params` is `None`, a 1448 second variable called 'biases' is added to the result of the operation. 1449 Args: 1450 inputs: A 5-D `Tensor` of type `float` and shape 1451 `[batch, depth, height, width, in_channels]` for `NDHWC` data format or 1452 `[batch, in_channels, depth, height, width]` for `NCDHW` data format. 1453 num_outputs: Integer, the number of output filters. 1454 kernel_size: A list of length 3 holding the [kernel_depth, kernel_height, 1455 kernel_width] of the filters. Can be an int if both values are the same. 1456 stride: A list of length 3: [stride_depth, stride_height, stride_width]. 1457 Can be an int if both strides are the same. Note that presently 1458 both strides must have the same value. 1459 padding: One of 'VALID' or 'SAME'. 1460 data_format: A string. `NDHWC` (default) and `NCDHW` are supported. 1461 activation_fn: Activation function. The default value is a ReLU function. 1462 Explicitly set it to None to skip it and maintain a linear activation. 1463 normalizer_fn: Normalization function to use instead of `biases`. If 1464 `normalizer_fn` is provided then `biases_initializer` and 1465 `biases_regularizer` are ignored and `biases` are not created nor added. 1466 default set to None for no normalizer function 1467 normalizer_params: Normalization function parameters. 1468 weights_initializer: An initializer for the weights. 1469 weights_regularizer: Optional regularizer for the weights. 1470 biases_initializer: An initializer for the biases. If None skip biases. 1471 biases_regularizer: Optional regularizer for the biases. 1472 reuse: Whether or not the layer and its variables should be reused. To be 1473 able to reuse the layer scope must be given. 1474 variables_collections: Optional list of collections for all the variables or 1475 a dictionary containing a different list of collection per variable. 1476 outputs_collections: Collection to add the outputs. 1477 trainable: Whether or not the variables should be trainable or not. 1478 scope: Optional scope for variable_scope. 1479 Returns: 1480 A tensor representing the output of the operation. 1481 Raises: 1482 ValueError: If 'kernel_size' is not a list of length 3. 1483 ValueError: If `data_format` is neither `NDHWC` nor `NCDHW`. 1484 ValueError: If `C` dimension of `inputs` is None. 1485 """ 1486 layer_variable_getter = _build_variable_getter({ 1487 'bias': 'biases', 1488 'kernel': 'weights' 1489 }) 1490 1491 with variable_scope.variable_scope( 1492 scope, 1493 'Conv3d_transpose', [inputs], 1494 reuse=reuse, 1495 custom_getter=layer_variable_getter) as sc: 1496 if data_format not in (DATA_FORMAT_NCDHW, DATA_FORMAT_NDHWC): 1497 raise ValueError('data_format has to be either NCDHW or NDHWC.') 1498 1499 inputs = ops.convert_to_tensor(inputs) 1500 1501 df = ('channels_first' 1502 if data_format and data_format.startswith('NC') else 'channels_last') 1503 layer = convolutional_layers.Convolution3DTranspose( 1504 filters=num_outputs, 1505 kernel_size=kernel_size, 1506 strides=stride, 1507 padding=padding, 1508 data_format=df, 1509 activation=None, 1510 use_bias=not normalizer_fn and biases_initializer, 1511 kernel_initializer=weights_initializer, 1512 bias_initializer=biases_initializer, 1513 kernel_regularizer=weights_regularizer, 1514 bias_regularizer=biases_regularizer, 1515 activity_regularizer=None, 1516 trainable=trainable, 1517 name=sc.name, 1518 dtype=inputs.dtype.base_dtype, 1519 _scope=sc, 1520 _reuse=reuse) 1521 outputs = layer.apply(inputs) 1522 1523 # Add variables to collections. 1524 _add_variable_to_collections(layer.kernel, variables_collections, 'weights') 1525 if layer.bias is not None: 1526 _add_variable_to_collections(layer.bias, variables_collections, 'biases') 1527 1528 if normalizer_fn is not None: 1529 normalizer_params = normalizer_params or {} 1530 outputs = normalizer_fn(outputs, **normalizer_params) 1531 1532 if activation_fn is not None: 1533 outputs = activation_fn(outputs) 1534 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 1535 1536 1537@add_arg_scope 1538def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None): 1539 """Converts a dense tensor into a sparse tensor. 1540 1541 An example use would be to convert dense labels to sparse ones 1542 so that they can be fed to the ctc_loss. 1543 1544 Args: 1545 tensor: An `int` `Tensor` to be converted to a `Sparse`. 1546 eos_token: An integer. 1547 It is part of the target label that signifies the end of a sentence. 1548 outputs_collections: Collection to add the outputs. 1549 scope: Optional scope for name_scope. 1550 """ 1551 with variable_scope.variable_scope(scope, 'dense_to_sparse', [tensor]) as sc: 1552 tensor = ops.convert_to_tensor(tensor) 1553 indices = array_ops.where( 1554 math_ops.not_equal(tensor, constant_op.constant(eos_token, 1555 tensor.dtype))) 1556 values = array_ops.gather_nd(tensor, indices) 1557 shape = array_ops.shape(tensor, out_type=dtypes.int64) 1558 outputs = sparse_tensor.SparseTensor(indices, values, shape) 1559 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 1560 1561 1562@add_arg_scope 1563def dropout(inputs, 1564 keep_prob=0.5, 1565 noise_shape=None, 1566 is_training=True, 1567 outputs_collections=None, 1568 scope=None, 1569 seed=None): 1570 """Returns a dropout op applied to the input. 1571 1572 With probability `keep_prob`, outputs the input element scaled up by 1573 `1 / keep_prob`, otherwise outputs `0`. The scaling is so that the expected 1574 sum is unchanged. 1575 1576 Args: 1577 inputs: The tensor to pass to the nn.dropout op. 1578 keep_prob: A scalar `Tensor` with the same type as x. The probability 1579 that each element is kept. 1580 noise_shape: A 1-D `Tensor` of type `int32`, representing the 1581 shape for randomly generated keep/drop flags. 1582 is_training: A bool `Tensor` indicating whether or not the model 1583 is in training mode. If so, dropout is applied and values scaled. 1584 Otherwise, inputs is returned. 1585 outputs_collections: Collection to add the outputs. 1586 scope: Optional scope for name_scope. 1587 seed: A Python integer. Used to create random seeds. See 1588 `tf.set_random_seed` for behavior. 1589 1590 Returns: 1591 A tensor representing the output of the operation. 1592 """ 1593 with variable_scope.variable_scope( 1594 scope, 'Dropout', [inputs], custom_getter=_model_variable_getter) as sc: 1595 inputs = ops.convert_to_tensor(inputs) 1596 layer = core_layers.Dropout( 1597 rate=1 - keep_prob, 1598 noise_shape=noise_shape, 1599 seed=seed, 1600 name=sc.name, 1601 _scope=sc) 1602 outputs = layer.apply(inputs, training=is_training) 1603 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 1604 1605 1606@add_arg_scope 1607def flatten(inputs, outputs_collections=None, scope=None): 1608 """Flattens the input while maintaining the batch_size. 1609 1610 Assumes that the first dimension represents the batch. 1611 1612 Args: 1613 inputs: A tensor of size [batch_size, ...]. 1614 outputs_collections: Collection to add the outputs. 1615 scope: Optional scope for name_scope. 1616 1617 Returns: 1618 A flattened tensor with shape [batch_size, k]. 1619 Raises: 1620 ValueError: If inputs rank is unknown or less than 2. 1621 """ 1622 with ops.name_scope(scope, 'Flatten', [inputs]) as sc: 1623 inputs = ops.convert_to_tensor(inputs) 1624 outputs = core_layers.flatten(inputs) 1625 return utils.collect_named_outputs(outputs_collections, sc, outputs) 1626 1627 1628def _sparse_inner_flatten(inputs, new_rank): 1629 """Helper function for `inner_flatten`.""" 1630 inputs_rank = inputs.dense_shape.get_shape().as_list()[0] 1631 if inputs_rank < new_rank: 1632 raise ValueError( 1633 'Inputs has rank less than new_rank. {} must have rank at least' 1634 ' {}. Received rank {}, shape {}'.format(inputs, new_rank, inputs_rank, 1635 inputs.get_shape())) 1636 1637 outer_dimensions = inputs.dense_shape[:new_rank - 1] 1638 inner_dimensions = inputs.dense_shape[new_rank - 1:] 1639 new_shape = array_ops.concat( 1640 (outer_dimensions, [math_ops.reduce_prod(inner_dimensions)]), 0) 1641 flattened = sparse_ops.sparse_reshape(inputs, new_shape) 1642 return flattened 1643 1644 1645def _dense_inner_flatten(inputs, new_rank): 1646 """Helper function for `inner_flatten`.""" 1647 rank_assertion = check_ops.assert_rank_at_least( 1648 inputs, new_rank, message='inputs has rank less than new_rank') 1649 with ops.control_dependencies([rank_assertion]): 1650 outer_dimensions = array_ops.strided_slice( 1651 array_ops.shape(inputs), [0], [new_rank - 1]) 1652 new_shape = array_ops.concat((outer_dimensions, [-1]), 0) 1653 reshaped = array_ops.reshape(inputs, new_shape) 1654 1655 # if `new_rank` is an integer, try to calculate new shape. 1656 if isinstance(new_rank, six.integer_types): 1657 static_shape = inputs.get_shape() 1658 if static_shape is not None and static_shape.dims is not None: 1659 static_shape = static_shape.as_list() 1660 static_outer_dims = static_shape[:new_rank - 1] 1661 static_inner_dims = static_shape[new_rank - 1:] 1662 flattened_dimension = 1 1663 for inner_dim in static_inner_dims: 1664 if inner_dim is None: 1665 flattened_dimension = None 1666 break 1667 flattened_dimension *= inner_dim 1668 reshaped.set_shape(static_outer_dims + [flattened_dimension]) 1669 return reshaped 1670 1671 1672@add_arg_scope 1673def _inner_flatten(inputs, new_rank, output_collections=None, scope=None): 1674 """Flattens inner dimensions of `inputs`, returns a Tensor with `new_rank`. 1675 1676 For example: 1677 ''' 1678 x = tf.random_uniform(shape=[1, 2, 3, 4, 5, 6]) 1679 y = _inner_flatten(x, 4) 1680 assert y.get_shape().as_list() == [1, 2, 3, (4 * 5 * 6)] 1681 ''' 1682 This layer will fail at run time if `new_rank` is greater than the current 1683 rank of `inputs`. 1684 1685 Args: 1686 inputs: A `Tensor` or `SparseTensor`. 1687 new_rank: The desired rank of the returned `Tensor` or `SparseTensor`. 1688 output_collections: Collection to which the outputs will be added. 1689 scope: Optional scope for `name_scope`. 1690 Returns: 1691 A `Tensor` or `SparseTensor` containing the same values as `inputs`, but 1692 with innermost dimensions flattened to obtain rank `new_rank`. 1693 1694 Raises: 1695 TypeError: `inputs` is not a `Tensor` or `SparseTensor`. 1696 """ 1697 with ops.name_scope(scope, 'InnerFlatten', [inputs, new_rank]) as sc: 1698 if isinstance(inputs, sparse_tensor.SparseTensor): 1699 flattened = _sparse_inner_flatten(inputs, new_rank) 1700 else: 1701 inputs = ops.convert_to_tensor(inputs) 1702 flattened = _dense_inner_flatten(inputs, new_rank) 1703 return utils.collect_named_outputs(output_collections, sc, flattened) 1704 1705 1706def _model_variable_getter( 1707 getter, 1708 name, 1709 shape=None, 1710 dtype=None, 1711 initializer=None, 1712 regularizer=None, 1713 trainable=True, 1714 collections=None, 1715 caching_device=None, 1716 partitioner=None, 1717 rename=None, 1718 use_resource=None, 1719 synchronization=tf_variables.VariableSynchronization.AUTO, 1720 aggregation=tf_variables.VariableAggregation.NONE, 1721 **_): 1722 """Getter that uses model_variable for compatibility with core layers.""" 1723 short_name = name.split('/')[-1] 1724 if rename and short_name in rename: 1725 name_components = name.split('/') 1726 name_components[-1] = rename[short_name] 1727 name = '/'.join(name_components) 1728 return variables.model_variable( 1729 name, 1730 shape=shape, 1731 dtype=dtype, 1732 initializer=initializer, 1733 regularizer=regularizer, 1734 collections=collections, 1735 trainable=trainable, 1736 caching_device=caching_device, 1737 partitioner=partitioner, 1738 custom_getter=getter, 1739 use_resource=use_resource, 1740 synchronization=synchronization, 1741 aggregation=aggregation) 1742 1743 1744def _build_variable_getter(rename=None): 1745 """Build a model variable getter that respects scope getter and renames.""" 1746 1747 # VariableScope will nest the getters 1748 def layer_variable_getter(getter, *args, **kwargs): 1749 kwargs['rename'] = rename 1750 return _model_variable_getter(getter, *args, **kwargs) 1751 1752 return layer_variable_getter 1753 1754 1755def _add_variable_to_collections(variable, collections_set, collections_name): 1756 """Adds variable (or all its parts) to all collections with that name.""" 1757 collections = utils.get_variable_collections(collections_set, 1758 collections_name) or [] 1759 variables_list = [variable] 1760 if isinstance(variable, tf_variables.PartitionedVariable): 1761 variables_list = [v for v in variable] 1762 for collection in collections: 1763 for var in variables_list: 1764 if var not in ops.get_collection(collection): 1765 ops.add_to_collection(collection, var) 1766 1767 1768@add_arg_scope 1769def fully_connected(inputs, 1770 num_outputs, 1771 activation_fn=nn.relu, 1772 normalizer_fn=None, 1773 normalizer_params=None, 1774 weights_initializer=initializers.xavier_initializer(), 1775 weights_regularizer=None, 1776 biases_initializer=init_ops.zeros_initializer(), 1777 biases_regularizer=None, 1778 reuse=None, 1779 variables_collections=None, 1780 outputs_collections=None, 1781 trainable=True, 1782 scope=None): 1783 """Adds a fully connected layer. 1784 1785 `fully_connected` creates a variable called `weights`, representing a fully 1786 connected weight matrix, which is multiplied by the `inputs` to produce a 1787 `Tensor` of hidden units. If a `normalizer_fn` is provided (such as 1788 `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is 1789 None and a `biases_initializer` is provided then a `biases` variable would be 1790 created and added the hidden units. Finally, if `activation_fn` is not `None`, 1791 it is applied to the hidden units as well. 1792 1793 Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened 1794 prior to the initial matrix multiply by `weights`. 1795 1796 Args: 1797 inputs: A tensor of at least rank 2 and static value for the last dimension; 1798 i.e. `[batch_size, depth]`, `[None, None, None, channels]`. 1799 num_outputs: Integer or long, the number of output units in the layer. 1800 activation_fn: Activation function. The default value is a ReLU function. 1801 Explicitly set it to None to skip it and maintain a linear activation. 1802 normalizer_fn: Normalization function to use instead of `biases`. If 1803 `normalizer_fn` is provided then `biases_initializer` and 1804 `biases_regularizer` are ignored and `biases` are not created nor added. 1805 default set to None for no normalizer function 1806 normalizer_params: Normalization function parameters. 1807 weights_initializer: An initializer for the weights. 1808 weights_regularizer: Optional regularizer for the weights. 1809 biases_initializer: An initializer for the biases. If None skip biases. 1810 biases_regularizer: Optional regularizer for the biases. 1811 reuse: Whether or not the layer and its variables should be reused. To be 1812 able to reuse the layer scope must be given. 1813 variables_collections: Optional list of collections for all the variables or 1814 a dictionary containing a different list of collections per variable. 1815 outputs_collections: Collection to add the outputs. 1816 trainable: If `True` also add variables to the graph collection 1817 `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). 1818 scope: Optional scope for variable_scope. 1819 1820 Returns: 1821 The tensor variable representing the result of the series of operations. 1822 1823 Raises: 1824 ValueError: If x has rank less than 2 or if its last dimension is not set. 1825 """ 1826 if not isinstance(num_outputs, six.integer_types): 1827 raise ValueError('num_outputs type should be one of %s, got %s.' % ( 1828 list(six.integer_types), type(num_outputs))) 1829 1830 layer_variable_getter = _build_variable_getter({ 1831 'bias': 'biases', 1832 'kernel': 'weights' 1833 }) 1834 1835 with variable_scope.variable_scope( 1836 scope, 1837 'fully_connected', [inputs], 1838 reuse=reuse, 1839 custom_getter=layer_variable_getter) as sc: 1840 inputs = ops.convert_to_tensor(inputs) 1841 layer = core_layers.Dense( 1842 units=num_outputs, 1843 activation=None, 1844 use_bias=not normalizer_fn and biases_initializer, 1845 kernel_initializer=weights_initializer, 1846 bias_initializer=biases_initializer, 1847 kernel_regularizer=weights_regularizer, 1848 bias_regularizer=biases_regularizer, 1849 activity_regularizer=None, 1850 trainable=trainable, 1851 name=sc.name, 1852 dtype=inputs.dtype.base_dtype, 1853 _scope=sc, 1854 _reuse=reuse) 1855 outputs = layer.apply(inputs) 1856 1857 # Add variables to collections. 1858 _add_variable_to_collections(layer.kernel, variables_collections, 'weights') 1859 if layer.bias is not None: 1860 _add_variable_to_collections(layer.bias, variables_collections, 'biases') 1861 1862 # Apply normalizer function / layer. 1863 if normalizer_fn is not None: 1864 if not normalizer_params: 1865 normalizer_params = {} 1866 outputs = normalizer_fn(outputs, **normalizer_params) 1867 1868 if activation_fn is not None: 1869 outputs = activation_fn(outputs) 1870 1871 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 1872 1873 1874class GDN(base.Layer): 1875 """Generalized divisive normalization layer. 1876 1877 Based on the papers: 1878 1879 "Density Modeling of Images using a Generalized Normalization 1880 Transformation" 1881 1882 Johannes Ballé, Valero Laparra, Eero P. Simoncelli 1883 1884 https://arxiv.org/abs/1511.06281 1885 1886 "End-to-end Optimized Image Compression" 1887 1888 Johannes Ballé, Valero Laparra, Eero P. Simoncelli 1889 1890 https://arxiv.org/abs/1611.01704 1891 1892 Implements an activation function that is essentially a multivariate 1893 generalization of a particular sigmoid-type function: 1894 1895 ``` 1896 y[i] = x[i] / sqrt(beta[i] + sum_j(gamma[j, i] * x[j])) 1897 ``` 1898 1899 where `i` and `j` run over channels. This implementation never sums across 1900 spatial dimensions. It is similar to local response normalization, but much 1901 more flexible, as `beta` and `gamma` are trainable parameters. 1902 1903 Arguments: 1904 inverse: If `False` (default), compute GDN response. If `True`, compute IGDN 1905 response (one step of fixed point iteration to invert GDN; the division 1906 is replaced by multiplication). 1907 beta_min: Lower bound for beta, to prevent numerical error from causing 1908 square root of zero or negative values. 1909 gamma_init: The gamma matrix will be initialized as the identity matrix 1910 multiplied with this value. If set to zero, the layer is effectively 1911 initialized to the identity operation, since beta is initialized as one. 1912 A good default setting is somewhere between 0 and 0.5. 1913 reparam_offset: Offset added to the reparameterization of beta and gamma. 1914 The reparameterization of beta and gamma as their square roots lets the 1915 training slow down when their values are close to zero, which is desirable 1916 as small values in the denominator can lead to a situation where gradient 1917 noise on beta/gamma leads to extreme amounts of noise in the GDN 1918 activations. However, without the offset, we would get zero gradients if 1919 any elements of beta or gamma were exactly zero, and thus the training 1920 could get stuck. To prevent this, we add this small constant. The default 1921 value was empirically determined as a good starting point. Making it 1922 bigger potentially leads to more gradient noise on the activations, making 1923 it too small may lead to numerical precision issues. 1924 data_format: Format of input tensor. Currently supports `'channels_first'` 1925 and `'channels_last'`. 1926 activity_regularizer: Regularizer function for the output. 1927 trainable: Boolean, if `True`, also add variables to the graph collection 1928 `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). 1929 name: String, the name of the layer. Layers with the same name will 1930 share weights, but to avoid mistakes we require `reuse=True` in such 1931 cases. 1932 1933 Properties: 1934 inverse: Boolean, whether GDN is computed (`True`) or IGDN (`False`). 1935 data_format: Format of input tensor. Currently supports `'channels_first'` 1936 and `'channels_last'`. 1937 beta: The beta parameter as defined above (1D `Tensor`). 1938 gamma: The gamma parameter as defined above (2D `Tensor`). 1939 """ 1940 1941 def __init__(self, 1942 inverse=False, 1943 beta_min=1e-6, 1944 gamma_init=.1, 1945 reparam_offset=2**-18, 1946 data_format='channels_last', 1947 activity_regularizer=None, 1948 trainable=True, 1949 name=None, 1950 **kwargs): 1951 super(GDN, self).__init__( 1952 trainable=trainable, 1953 name=name, 1954 activity_regularizer=activity_regularizer, 1955 **kwargs) 1956 self.inverse = inverse 1957 self._beta_min = beta_min 1958 self._gamma_init = gamma_init 1959 self._reparam_offset = reparam_offset 1960 self.data_format = data_format 1961 self._channel_axis() # trigger ValueError early 1962 self.input_spec = input_spec.InputSpec(min_ndim=3, max_ndim=5) 1963 1964 def _channel_axis(self): 1965 try: 1966 return {'channels_first': 1, 'channels_last': -1}[self.data_format] 1967 except KeyError: 1968 raise ValueError('Unsupported `data_format` for GDN layer: {}.'.format( 1969 self.data_format)) 1970 1971 @staticmethod 1972 def _lower_bound(inputs, bound, name=None): 1973 """Same as tf.maximum, but with helpful gradient for inputs < bound. 1974 1975 The gradient is overwritten so that it is passed through if the input is not 1976 hitting the bound. If it is, only gradients that push `inputs` higher than 1977 the bound are passed through. No gradients are passed through to the bound. 1978 1979 Args: 1980 inputs: input tensor 1981 bound: lower bound for the input tensor 1982 name: name for this op 1983 1984 Returns: 1985 tf.maximum(inputs, bound) 1986 """ 1987 with ops.name_scope(name, 'GDNLowerBound', [inputs, bound]) as scope: 1988 inputs = ops.convert_to_tensor(inputs, name='inputs') 1989 bound = ops.convert_to_tensor(bound, name='bound') 1990 with ops.get_default_graph().gradient_override_map({ 1991 'Maximum': 'GDNLowerBound' 1992 }): 1993 return math_ops.maximum(inputs, bound, name=scope) 1994 1995 @staticmethod 1996 def _lower_bound_grad(op, grad): 1997 """Gradient for `_lower_bound`. 1998 1999 Args: 2000 op: the tensorflow op for which to calculate a gradient 2001 grad: gradient with respect to the output of the op 2002 2003 Returns: 2004 gradients with respect to the inputs of the op 2005 """ 2006 inputs = op.inputs[0] 2007 bound = op.inputs[1] 2008 pass_through_if = math_ops.logical_or(inputs >= bound, grad < 0) 2009 return [math_ops.cast(pass_through_if, grad.dtype) * grad, None] 2010 2011 def build(self, input_shape): 2012 channel_axis = self._channel_axis() 2013 input_shape = tensor_shape.TensorShape(input_shape) 2014 num_channels = input_shape.dims[channel_axis].value 2015 if num_channels is None: 2016 raise ValueError('The channel dimension of the inputs to `GDN` ' 2017 'must be defined.') 2018 self._input_rank = input_shape.ndims 2019 self.input_spec = input_spec.InputSpec( 2020 ndim=input_shape.ndims, axes={ 2021 channel_axis: num_channels 2022 }) 2023 2024 pedestal = array_ops.constant(self._reparam_offset**2, dtype=self.dtype) 2025 beta_bound = array_ops.constant( 2026 (self._beta_min + self._reparam_offset**2)**.5, dtype=self.dtype) 2027 gamma_bound = array_ops.constant(self._reparam_offset, dtype=self.dtype) 2028 2029 def beta_initializer(shape, dtype=None, partition_info=None): 2030 del partition_info # unused 2031 pedestal = array_ops.constant(self._reparam_offset**2, dtype=self.dtype) 2032 return math_ops.sqrt(array_ops.ones(shape, dtype=dtype) + pedestal) 2033 2034 def gamma_initializer(shape, dtype=None, partition_info=None): 2035 del partition_info # unused 2036 assert len(shape) == 2 2037 assert shape[0] == shape[1] 2038 eye = linalg_ops.eye(shape[0], dtype=dtype) 2039 pedestal = array_ops.constant(self._reparam_offset**2, dtype=self.dtype) 2040 return math_ops.sqrt(self._gamma_init * eye + pedestal) 2041 2042 beta = self.add_variable( 2043 'reparam_beta', 2044 shape=[num_channels], 2045 initializer=beta_initializer, 2046 dtype=self.dtype, 2047 trainable=True) 2048 beta = self._lower_bound(beta, beta_bound) 2049 self.beta = math_ops.square(beta) - pedestal 2050 2051 gamma = self.add_variable( 2052 'reparam_gamma', 2053 shape=[num_channels, num_channels], 2054 initializer=gamma_initializer, 2055 dtype=self.dtype, 2056 trainable=True) 2057 gamma = self._lower_bound(gamma, gamma_bound) 2058 self.gamma = math_ops.square(gamma) - pedestal 2059 2060 self.built = True 2061 2062 def call(self, inputs): 2063 inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) 2064 ndim = self._input_rank 2065 2066 shape = self.gamma.get_shape().as_list() 2067 gamma = array_ops.reshape(self.gamma, (ndim - 2) * [1] + shape) 2068 2069 # Compute normalization pool. 2070 if self.data_format == 'channels_first': 2071 norm_pool = nn.convolution( 2072 math_ops.square(inputs), 2073 gamma, 2074 'VALID', 2075 data_format='NC' + 'DHW' [-(ndim - 2):]) 2076 if ndim == 3: 2077 norm_pool = array_ops.expand_dims(norm_pool, 2) 2078 norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NCHW') 2079 norm_pool = array_ops.squeeze(norm_pool, [2]) 2080 elif ndim == 5: 2081 shape = array_ops.shape(norm_pool) 2082 norm_pool = array_ops.reshape(norm_pool, shape[:3] + [-1]) 2083 norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NCHW') 2084 norm_pool = array_ops.reshape(norm_pool, shape) 2085 else: # ndim == 4 2086 norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NCHW') 2087 else: # channels_last 2088 norm_pool = nn.convolution(math_ops.square(inputs), gamma, 'VALID') 2089 norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NHWC') 2090 norm_pool = math_ops.sqrt(norm_pool) 2091 2092 if self.inverse: 2093 outputs = inputs * norm_pool 2094 else: 2095 outputs = inputs / norm_pool 2096 outputs.set_shape(inputs.get_shape()) 2097 return outputs 2098 2099 def compute_output_shape(self, input_shape): 2100 channel_axis = self._channel_axis() 2101 input_shape = tensor_shape.TensorShape(input_shape) 2102 if not 3 <= input_shape.ndim <= 5: 2103 raise ValueError('`input_shape` must be of rank 3 to 5, inclusive.') 2104 if input_shape.dims[channel_axis].value is None: 2105 raise ValueError( 2106 'The channel dimension of `input_shape` must be defined.') 2107 return input_shape 2108 2109 2110ops.RegisterGradient('GDNLowerBound')(GDN._lower_bound_grad) # pylint:disable=protected-access 2111 2112 2113def gdn(inputs, 2114 inverse=False, 2115 beta_min=1e-6, 2116 gamma_init=.1, 2117 reparam_offset=2**-18, 2118 data_format='channels_last', 2119 activity_regularizer=None, 2120 trainable=True, 2121 name=None, 2122 reuse=None): 2123 """Functional interface for GDN layer. 2124 2125 Based on the papers: 2126 2127 "Density Modeling of Images using a Generalized Normalization 2128 Transformation" 2129 Johannes Ballé, Valero Laparra, Eero P. Simoncelli 2130 https://arxiv.org/abs/1511.06281 2131 2132 "End-to-end Optimized Image Compression" 2133 Johannes Ballé, Valero Laparra, Eero P. Simoncelli 2134 https://arxiv.org/abs/1611.01704 2135 2136 Implements an activation function that is essentially a multivariate 2137 generalization of a particular sigmoid-type function: 2138 2139 ``` 2140 y[i] = x[i] / sqrt(beta[i] + sum_j(gamma[j, i] * x[j])) 2141 ``` 2142 2143 where `i` and `j` run over channels. This implementation never sums across 2144 spatial dimensions. It is similar to local response normalization, but much 2145 more flexible, as `beta` and `gamma` are trainable parameters. 2146 2147 Args: 2148 inputs: Tensor input. 2149 inverse: If `False` (default), compute GDN response. If `True`, compute IGDN 2150 response (one step of fixed point iteration to invert GDN; the division 2151 is replaced by multiplication). 2152 beta_min: Lower bound for beta, to prevent numerical error from causing 2153 square root of zero or negative values. 2154 gamma_init: The gamma matrix will be initialized as the identity matrix 2155 multiplied with this value. If set to zero, the layer is effectively 2156 initialized to the identity operation, since beta is initialized as one. 2157 A good default setting is somewhere between 0 and 0.5. 2158 reparam_offset: Offset added to the reparameterization of beta and gamma. 2159 The reparameterization of beta and gamma as their square roots lets the 2160 training slow down when their values are close to zero, which is desirable 2161 as small values in the denominator can lead to a situation where gradient 2162 noise on beta/gamma leads to extreme amounts of noise in the GDN 2163 activations. However, without the offset, we would get zero gradients if 2164 any elements of beta or gamma were exactly zero, and thus the training 2165 could get stuck. To prevent this, we add this small constant. The default 2166 value was empirically determined as a good starting point. Making it 2167 bigger potentially leads to more gradient noise on the activations, making 2168 it too small may lead to numerical precision issues. 2169 data_format: Format of input tensor. Currently supports `'channels_first'` 2170 and `'channels_last'`. 2171 activity_regularizer: Regularizer function for the output. 2172 trainable: Boolean, if `True`, also add variables to the graph collection 2173 `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). 2174 name: String, the name of the layer. Layers with the same name will 2175 share weights, but to avoid mistakes we require `reuse=True` in such 2176 cases. 2177 reuse: Boolean, whether to reuse the weights of a previous layer by the same 2178 name. 2179 2180 Returns: 2181 Output tensor. 2182 """ 2183 layer = GDN( 2184 inverse=inverse, 2185 beta_min=beta_min, 2186 gamma_init=gamma_init, 2187 reparam_offset=reparam_offset, 2188 data_format=data_format, 2189 activity_regularizer=activity_regularizer, 2190 trainable=trainable, 2191 name=name, 2192 dtype=inputs.dtype.base_dtype, 2193 _scope=name, 2194 _reuse=reuse) 2195 return layer.apply(inputs) 2196 2197 2198@add_arg_scope 2199def layer_norm(inputs, 2200 center=True, 2201 scale=True, 2202 activation_fn=None, 2203 reuse=None, 2204 variables_collections=None, 2205 outputs_collections=None, 2206 trainable=True, 2207 begin_norm_axis=1, 2208 begin_params_axis=-1, 2209 scope=None): 2210 """Adds a Layer Normalization layer. 2211 2212 Based on the paper: 2213 2214 "Layer Normalization" 2215 2216 Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton 2217 2218 https://arxiv.org/abs/1607.06450. 2219 2220 Can be used as a normalizer function for conv2d and fully_connected. 2221 2222 Given a tensor `inputs` of rank `R`, moments are calculated and normalization 2223 is performed over axes `begin_norm_axis ... R - 1`. Scaling and centering, 2224 if requested, is performed over axes `begin_params_axis .. R - 1`. 2225 2226 By default, `begin_norm_axis = 1` and `begin_params_axis = -1`, 2227 meaning that normalization is performed over all but the first axis 2228 (the `HWC` if `inputs` is `NHWC`), while the `beta` and `gamma` trainable 2229 parameters are calculated for the rightmost axis (the `C` if `inputs` is 2230 `NHWC`). Scaling and recentering is performed via broadcast of the 2231 `beta` and `gamma` parameters with the normalized tensor. 2232 2233 The shapes of `beta` and `gamma` are `inputs.shape[begin_params_axis:]`, 2234 and this part of the inputs' shape must be fully defined. 2235 2236 Args: 2237 inputs: A tensor having rank `R`. The normalization is performed over 2238 axes `begin_norm_axis ... R - 1` and centering and scaling parameters 2239 are calculated over `begin_params_axis ... R - 1`. 2240 center: If True, add offset of `beta` to normalized tensor. If False, `beta` 2241 is ignored. 2242 scale: If True, multiply by `gamma`. If False, `gamma` is 2243 not used. When the next layer is linear (also e.g. `nn.relu`), this can be 2244 disabled since the scaling can be done by the next layer. 2245 activation_fn: Activation function, default set to None to skip it and 2246 maintain a linear activation. 2247 reuse: Whether or not the layer and its variables should be reused. To be 2248 able to reuse the layer scope must be given. 2249 variables_collections: Optional collections for the variables. 2250 outputs_collections: Collections to add the outputs. 2251 trainable: If `True` also add variables to the graph collection 2252 `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). 2253 begin_norm_axis: The first normalization dimension: normalization will be 2254 performed along dimensions `begin_norm_axis : rank(inputs)` 2255 begin_params_axis: The first parameter (beta, gamma) dimension: scale 2256 and centering parameters will have dimensions 2257 `begin_params_axis : rank(inputs)` and will be broadcast with the 2258 normalized inputs accordingly. 2259 scope: Optional scope for `variable_scope`. 2260 2261 Returns: 2262 A `Tensor` representing the output of the operation, having the same 2263 shape and dtype as `inputs`. 2264 2265 Raises: 2266 ValueError: If the rank of `inputs` is not known at graph build time, 2267 or if `inputs.shape[begin_params_axis:]` is not fully defined at 2268 graph build time. 2269 """ 2270 with variable_scope.variable_scope( 2271 scope, 'LayerNorm', [inputs], reuse=reuse) as sc: 2272 inputs = ops.convert_to_tensor(inputs) 2273 inputs_shape = inputs.shape 2274 inputs_rank = inputs_shape.ndims 2275 if inputs_rank is None: 2276 raise ValueError('Inputs %s has undefined rank.' % inputs.name) 2277 dtype = inputs.dtype.base_dtype 2278 if begin_norm_axis < 0: 2279 begin_norm_axis = inputs_rank + begin_norm_axis 2280 if begin_params_axis >= inputs_rank or begin_norm_axis >= inputs_rank: 2281 raise ValueError('begin_params_axis (%d) and begin_norm_axis (%d) ' 2282 'must be < rank(inputs) (%d)' % 2283 (begin_params_axis, begin_norm_axis, inputs_rank)) 2284 params_shape = inputs_shape[begin_params_axis:] 2285 if not params_shape.is_fully_defined(): 2286 raise ValueError( 2287 'Inputs %s: shape(inputs)[%s:] is not fully defined: %s' % 2288 (inputs.name, begin_params_axis, inputs_shape)) 2289 # Allocate parameters for the beta and gamma of the normalization. 2290 beta, gamma = None, None 2291 if center: 2292 beta_collections = utils.get_variable_collections(variables_collections, 2293 'beta') 2294 beta = variables.model_variable( 2295 'beta', 2296 shape=params_shape, 2297 dtype=dtype, 2298 initializer=init_ops.zeros_initializer(), 2299 collections=beta_collections, 2300 trainable=trainable) 2301 if scale: 2302 gamma_collections = utils.get_variable_collections( 2303 variables_collections, 'gamma') 2304 gamma = variables.model_variable( 2305 'gamma', 2306 shape=params_shape, 2307 dtype=dtype, 2308 initializer=init_ops.ones_initializer(), 2309 collections=gamma_collections, 2310 trainable=trainable) 2311 # By default, compute the moments across all the dimensions except the one with index 0. 2312 norm_axes = list(range(begin_norm_axis, inputs_rank)) 2313 mean, variance = nn.moments(inputs, norm_axes, keep_dims=True) 2314 # Compute layer normalization using the batch_normalization function. 2315 # Note that epsilon must be increased for float16 due to the limited 2316 # representable range. 2317 variance_epsilon = 1e-12 if dtype != dtypes.float16 else 1e-3 2318 outputs = nn.batch_normalization( 2319 inputs, 2320 mean, 2321 variance, 2322 offset=beta, 2323 scale=gamma, 2324 variance_epsilon=variance_epsilon) 2325 outputs.set_shape(inputs_shape) 2326 if activation_fn is not None: 2327 outputs = activation_fn(outputs) 2328 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 2329 2330 2331@add_arg_scope 2332def images_to_sequence(inputs, 2333 data_format=DATA_FORMAT_NHWC, 2334 outputs_collections=None, 2335 scope=None): 2336 """Convert a batch of images into a batch of sequences. 2337 2338 Args: 2339 inputs: a (num_images, height, width, depth) tensor 2340 data_format: A string. `NHWC` (default) and `NCHW` are supported. 2341 outputs_collections: The collections to which the outputs are added. 2342 scope: Optional scope for name_scope. 2343 2344 Raises: 2345 ValueError: If `data_format` is not either NCHW or NHWC. 2346 2347 Returns: 2348 (width, num_images*height, depth) sequence tensor 2349 """ 2350 if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): 2351 raise ValueError('data_format has to be either NCHW or NHWC.') 2352 with ops.name_scope(scope, 'ImagesToSequence', [inputs]) as sc: 2353 inputs = ops.convert_to_tensor(inputs) 2354 df = ('channels_first' 2355 if data_format and data_format.startswith('NC') else 'channels_last') 2356 if df == 'channels_first': 2357 inputs = array_ops.transpose(inputs, [0, 2, 3, 1]) 2358 _, _, width, depth = inputs.get_shape().as_list() 2359 s = array_ops.shape(inputs) 2360 batch_size, height = s[0], s[1] 2361 transposed = array_ops.transpose(inputs, [2, 0, 1, 3]) 2362 outputs = array_ops.reshape(transposed, [width, batch_size * height, depth]) 2363 return utils.collect_named_outputs(outputs_collections, sc, outputs) 2364 2365 2366@add_arg_scope 2367def max_pool2d(inputs, 2368 kernel_size, 2369 stride=2, 2370 padding='VALID', 2371 data_format=DATA_FORMAT_NHWC, 2372 outputs_collections=None, 2373 scope=None): 2374 """Adds a 2D Max Pooling op. 2375 2376 It is assumed that the pooling is done per image but not in batch or channels. 2377 2378 Args: 2379 inputs: A 4-D tensor of shape `[batch_size, height, width, channels]` if 2380 `data_format` is `NHWC`, and `[batch_size, channels, height, width]` if 2381 `data_format` is `NCHW`. 2382 kernel_size: A list of length 2: [kernel_height, kernel_width] of the 2383 pooling kernel over which the op is computed. Can be an int if both 2384 values are the same. 2385 stride: A list of length 2: [stride_height, stride_width]. 2386 Can be an int if both strides are the same. Note that presently 2387 both strides must have the same value. 2388 padding: The padding method, either 'VALID' or 'SAME'. 2389 data_format: A string. `NHWC` (default) and `NCHW` are supported. 2390 outputs_collections: The collections to which the outputs are added. 2391 scope: Optional scope for name_scope. 2392 2393 Returns: 2394 A `Tensor` representing the results of the pooling operation. 2395 2396 Raises: 2397 ValueError: If `data_format` is neither `NHWC` nor `NCHW`. 2398 ValueError: If 'kernel_size' is not a 2-D list 2399 """ 2400 if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): 2401 raise ValueError('data_format has to be either NCHW or NHWC.') 2402 with ops.name_scope(scope, 'MaxPool2D', [inputs]) as sc: 2403 inputs = ops.convert_to_tensor(inputs) 2404 df = ('channels_first' 2405 if data_format and data_format.startswith('NC') else 'channels_last') 2406 layer = pooling_layers.MaxPooling2D( 2407 pool_size=kernel_size, 2408 strides=stride, 2409 padding=padding, 2410 data_format=df, 2411 _scope=sc) 2412 outputs = layer.apply(inputs) 2413 return utils.collect_named_outputs(outputs_collections, sc, outputs) 2414 2415 2416@add_arg_scope 2417def max_pool3d(inputs, 2418 kernel_size, 2419 stride=2, 2420 padding='VALID', 2421 data_format=DATA_FORMAT_NDHWC, 2422 outputs_collections=None, 2423 scope=None): 2424 """Adds a 3D Max Pooling op. 2425 2426 It is assumed that the pooling is done per image but not in batch or channels. 2427 2428 Args: 2429 inputs: A 5-D tensor of shape `[batch_size, depth, height, width, channels]` 2430 if `data_format` is `NDHWC`, and `[batch_size, channels, depth, height, 2431 width]` if `data_format` is `NCDHW`. 2432 kernel_size: A list of length 3: [kernel_depth, kernel_height, kernel_width] 2433 of the pooling kernel over which the op is computed. Can be an int if both 2434 values are the same. 2435 stride: A list of length 3: [stride_depth, stride_height, stride_width]. 2436 Can be an int if both strides are the same. Note that presently 2437 both strides must have the same value. 2438 padding: The padding method, either 'VALID' or 'SAME'. 2439 data_format: A string. `NDHWC` (default) and `NCDHW` are supported. 2440 outputs_collections: The collections to which the outputs are added. 2441 scope: Optional scope for name_scope. 2442 2443 Returns: 2444 A `Tensor` representing the results of the pooling operation. 2445 2446 Raises: 2447 ValueError: If `data_format` is neither `NDHWC` nor `NCDHW`. 2448 ValueError: If 'kernel_size' is not a 3-D list 2449 """ 2450 if data_format not in (DATA_FORMAT_NCDHW, DATA_FORMAT_NDHWC): 2451 raise ValueError('data_format has to be either NCDHW or NDHWC.') 2452 with ops.name_scope(scope, 'MaxPool3D', [inputs]) as sc: 2453 inputs = ops.convert_to_tensor(inputs) 2454 df = ('channels_first' 2455 if data_format and data_format.startswith('NC') else 'channels_last') 2456 layer = pooling_layers.MaxPooling3D( 2457 pool_size=kernel_size, 2458 strides=stride, 2459 padding=padding, 2460 data_format=df, 2461 _scope=sc) 2462 outputs = layer.apply(inputs) 2463 return utils.collect_named_outputs(outputs_collections, sc, outputs) 2464 2465 2466@add_arg_scope 2467def pool(inputs, 2468 kernel_size, 2469 pooling_type, 2470 padding='VALID', 2471 data_format=None, 2472 dilation_rate=1, 2473 stride=1, 2474 outputs_collections=None, 2475 scope=None): 2476 # pylint: disable=line-too-long 2477 """Adds a pooling op. 2478 2479 2480 Args: 2481 inputs: Tensor of rank N+2, of shape 2482 `[batch_size] + input_spatial_shape + [num_channels]` if data_format does 2483 not start with "NC" (default), or 2484 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 2485 with "NC". Pooling happens over the spatial dimensions only. 2486 kernel_size: Sequence of N ints >= 1. Can also be a single integer to 2487 specify the same value for all spatial dimensions. 2488 pooling_type: Specifies pooling operation, must be "AVG" or "MAX". 2489 padding: The padding algorithm, must be "SAME" or "VALID". 2490 data_format: A string or None. Specifies whether the channel dimension of 2491 the `input` and output is the last dimension (default, or if `data_format` 2492 does not start with "NC"), or the second dimension (if `data_format` 2493 starts with "NC"). For N=1, the valid values are "NWC" (default) and 2494 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 2495 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 2496 dilation_rate: Optional. Dilation rate. Sequence of N ints >= 1. Defaults 2497 to [1]*N. Can also be a single integer to specify the same value for all 2498 spatial dimensions. If any value of dilation_rate is > 1, then all values 2499 of stride must be 1. 2500 stride: Optional. Sequence of N ints >= 1. Defaults to [1]*N. Can also be 2501 a single integer to specify the same value for all spatial dimensions. If 2502 any value of stride is > 1, then all values of dilation_rate must be 1. 2503 outputs_collections: The collections to which the outputs are added. 2504 scope: Optional scope for name_scope. 2505 2506 Returns: 2507 A `Tensor` representing the results of the pooling operation. 2508 2509 Raises: 2510 ValueError: If arguments are invalid. 2511 2512 """ 2513 # pylint: enable=line-too-long 2514 with ops.name_scope(scope, '%s_pool' % (pooling_type.lower()), 2515 [inputs]) as sc: 2516 inputs = ops.convert_to_tensor(inputs) 2517 input_rank = inputs.get_shape().ndims 2518 if input_rank is None: 2519 raise ValueError('Rank of inputs must be known') 2520 if input_rank < 3: 2521 raise ValueError('Rank of inputs must be >= 3') 2522 num_spatial_dims = input_rank - 2 2523 output = nn.pool( 2524 input=inputs, 2525 window_shape=utils.n_positive_integers(num_spatial_dims, kernel_size), 2526 pooling_type=pooling_type, 2527 padding=padding, 2528 data_format=data_format, 2529 dilation_rate=utils.n_positive_integers(num_spatial_dims, 2530 dilation_rate), 2531 strides=utils.n_positive_integers(num_spatial_dims, stride), 2532 name=sc) 2533 return utils.collect_named_outputs(outputs_collections, sc, output) 2534 2535 2536@add_arg_scope 2537def one_hot_encoding(labels, 2538 num_classes, 2539 on_value=1.0, 2540 off_value=0.0, 2541 outputs_collections=None, 2542 scope=None): 2543 """Transform numeric labels into onehot_labels using `tf.one_hot`. 2544 2545 Args: 2546 labels: [batch_size] target labels. 2547 num_classes: Total number of classes. 2548 on_value: A scalar defining the on-value. 2549 off_value: A scalar defining the off-value. 2550 outputs_collections: Collection to add the outputs. 2551 scope: Optional scope for name_scope. 2552 2553 Returns: 2554 One-hot encoding of the labels. 2555 """ 2556 with ops.name_scope(scope, 'OneHotEncoding', [labels, num_classes]) as sc: 2557 labels = ops.convert_to_tensor(labels) 2558 if labels.dtype == dtypes.int32: 2559 labels = standard_ops.to_int64(labels) 2560 outputs = standard_ops.one_hot( 2561 labels, num_classes, on_value=on_value, off_value=off_value) 2562 return utils.collect_named_outputs(outputs_collections, sc, outputs) 2563 2564 2565def _apply_activation(y, activation_fn, output_collections): 2566 if activation_fn is not None: 2567 y = activation_fn(y) 2568 ops.add_to_collections( 2569 list(output_collections or []) + [ops.GraphKeys.ACTIVATIONS], y) 2570 return y 2571 2572 2573def repeat(inputs, repetitions, layer, *args, **kwargs): 2574 """Applies the same layer with the same arguments repeatedly. 2575 2576 ```python 2577 y = repeat(x, 3, conv2d, 64, [3, 3], scope='conv1') 2578 # It is equivalent to: 2579 2580 x = conv2d(x, 64, [3, 3], scope='conv1/conv1_1') 2581 x = conv2d(x, 64, [3, 3], scope='conv1/conv1_2') 2582 y = conv2d(x, 64, [3, 3], scope='conv1/conv1_3') 2583 ``` 2584 2585 If the `scope` argument is not given in `kwargs`, it is set to 2586 `layer.__name__`, or `layer.func.__name__` (for `functools.partial` 2587 objects). If neither `__name__` nor `func.__name__` is available, the 2588 layers are called with `scope='stack'`. 2589 2590 Args: 2591 inputs: A `Tensor` suitable for layer. 2592 repetitions: Int, number of repetitions. 2593 layer: A layer with arguments `(inputs, *args, **kwargs)` 2594 *args: Extra args for the layer. 2595 **kwargs: Extra kwargs for the layer. 2596 2597 Returns: 2598 A tensor result of applying the layer, repetitions times. 2599 Raises: 2600 ValueError: If the op is unknown or wrong. 2601 """ 2602 scope = kwargs.pop('scope', None) 2603 with variable_scope.variable_scope(scope, 'Repeat', [inputs]): 2604 inputs = ops.convert_to_tensor(inputs) 2605 if scope is None: 2606 if hasattr(layer, '__name__'): 2607 scope = layer.__name__ 2608 elif hasattr(layer, 'func') and hasattr(layer.func, '__name__'): 2609 scope = layer.func.__name__ # In case layer is a functools.partial. 2610 else: 2611 scope = 'repeat' 2612 outputs = inputs 2613 for i in range(repetitions): 2614 kwargs['scope'] = scope + '_' + str(i + 1) 2615 outputs = layer(outputs, *args, **kwargs) 2616 return outputs 2617 2618 2619def _scale_gradient_shape(op): 2620 """Shape helper function for scale_gradient function below.""" 2621 return [op.inputs[0].shape] 2622 2623 2624def _scale_gradient_grad(op, grad): 2625 """Python gradient helper function for scale_gradient function below.""" 2626 return [grad * op.inputs[1], None] 2627 2628 2629@function.Defun( 2630 python_grad_func=_scale_gradient_grad, shape_func=_scale_gradient_shape) 2631def scale_gradient(inputs, gradient_multiplier): 2632 """Identity operation, but with the gradient multiplied by a tensor. 2633 2634 The TensorFlow gradient system will compute the gradient with respect to 2635 `inputs` as the product of the gradient with respect to the `output` 2636 multiplied by a specified `gradient_multiplier` tensor. If 2637 `gradient_multiplier` is equal to 1, then this results in the true gradient. 2638 Otherwise, it results in a scaled gradient. 2639 2640 This can be useful for adjusting the relative learning rate of different 2641 parameter tensors when performing gradient descent, and because this rescaling 2642 can be inserted at arbitrary locations within a graph, is often more 2643 convenient to apply than simply rescaling the final computed gradients. 2644 2645 Args: 2646 inputs: Tensor to be output. 2647 gradient_multiplier: Tensor by which to multiply the gradient with respect 2648 to `output` to compute the gradient with respect to `inputs`. Its shape 2649 must be broadcastable to the shape of `inputs`. 2650 2651 Returns: 2652 output Tensor, equal to `inputs`. 2653 """ 2654 # gradient_multiplier is implicitly saved by decorator, and only used for 2655 # gradient computation. 2656 del gradient_multiplier 2657 2658 return inputs 2659 2660 2661@add_arg_scope 2662def separable_convolution2d( 2663 inputs, 2664 num_outputs, 2665 kernel_size, 2666 depth_multiplier=1, 2667 stride=1, 2668 padding='SAME', 2669 data_format=DATA_FORMAT_NHWC, 2670 rate=1, 2671 activation_fn=nn.relu, 2672 normalizer_fn=None, 2673 normalizer_params=None, 2674 weights_initializer=initializers.xavier_initializer(), 2675 pointwise_initializer=None, 2676 weights_regularizer=None, 2677 biases_initializer=init_ops.zeros_initializer(), 2678 biases_regularizer=None, 2679 reuse=None, 2680 variables_collections=None, 2681 outputs_collections=None, 2682 trainable=True, 2683 scope=None): 2684 """Adds a depth-separable 2D convolution with optional batch_norm layer. 2685 2686 This op first performs a depthwise convolution that acts separately on 2687 channels, creating a variable called `depthwise_weights`. If `num_outputs` 2688 is not None, it adds a pointwise convolution that mixes channels, creating a 2689 variable called `pointwise_weights`. Then, if `normalizer_fn` is None, 2690 it adds bias to the result, creating a variable called 'biases', otherwise, 2691 the `normalizer_fn` is applied. It finally applies an activation function 2692 to produce the end result. 2693 2694 Args: 2695 inputs: A tensor of size [batch_size, height, width, channels]. 2696 num_outputs: The number of pointwise convolution output filters. If is 2697 None, then we skip the pointwise convolution stage. 2698 kernel_size: A list of length 2: [kernel_height, kernel_width] of 2699 of the filters. Can be an int if both values are the same. 2700 depth_multiplier: The number of depthwise convolution output channels for 2701 each input channel. The total number of depthwise convolution output 2702 channels will be equal to `num_filters_in * depth_multiplier`. 2703 stride: A list of length 2: [stride_height, stride_width], specifying the 2704 depthwise convolution stride. Can be an int if both strides are the same. 2705 padding: One of 'VALID' or 'SAME'. 2706 data_format: A string. `NHWC` (default) and `NCHW` are supported. 2707 rate: A list of length 2: [rate_height, rate_width], specifying the dilation 2708 rates for atrous convolution. Can be an int if both rates are the same. 2709 If any value is larger than one, then both stride values need to be one. 2710 activation_fn: Activation function. The default value is a ReLU function. 2711 Explicitly set it to None to skip it and maintain a linear activation. 2712 normalizer_fn: Normalization function to use instead of `biases`. If 2713 `normalizer_fn` is provided then `biases_initializer` and 2714 `biases_regularizer` are ignored and `biases` are not created nor added. 2715 default set to None for no normalizer function 2716 normalizer_params: Normalization function parameters. 2717 weights_initializer: An initializer for the depthwise weights. 2718 pointwise_initializer: An initializer for the pointwise weights. 2719 default set to None, means use weights_initializer. 2720 weights_regularizer: Optional regularizer for the weights. 2721 biases_initializer: An initializer for the biases. If None skip biases. 2722 biases_regularizer: Optional regularizer for the biases. 2723 reuse: Whether or not the layer and its variables should be reused. To be 2724 able to reuse the layer scope must be given. 2725 variables_collections: Optional list of collections for all the variables or 2726 a dictionary containing a different list of collection per variable. 2727 outputs_collections: Collection to add the outputs. 2728 trainable: Whether or not the variables should be trainable or not. 2729 scope: Optional scope for variable_scope. 2730 2731 Returns: 2732 A `Tensor` representing the output of the operation. 2733 Raises: 2734 ValueError: If `data_format` is invalid. 2735 """ 2736 if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): 2737 raise ValueError('data_format has to be either NCHW or NHWC.') 2738 layer_variable_getter = _build_variable_getter({ 2739 'bias': 'biases', 2740 'depthwise_kernel': 'depthwise_weights', 2741 'pointwise_kernel': 'pointwise_weights' 2742 }) 2743 2744 with variable_scope.variable_scope( 2745 scope, 2746 'SeparableConv2d', [inputs], 2747 reuse=reuse, 2748 custom_getter=layer_variable_getter) as sc: 2749 inputs = ops.convert_to_tensor(inputs) 2750 2751 if pointwise_initializer is None: 2752 pointwise_initializer = weights_initializer 2753 2754 df = ('channels_first' 2755 if data_format and data_format.startswith('NC') else 'channels_last') 2756 if num_outputs is not None: 2757 # Apply separable conv using the SeparableConvolution2D layer. 2758 layer = convolutional_layers.SeparableConvolution2D( 2759 filters=num_outputs, 2760 kernel_size=kernel_size, 2761 strides=stride, 2762 padding=padding, 2763 data_format=df, 2764 dilation_rate=utils.two_element_tuple(rate), 2765 activation=None, 2766 depth_multiplier=depth_multiplier, 2767 use_bias=not normalizer_fn and biases_initializer, 2768 depthwise_initializer=weights_initializer, 2769 pointwise_initializer=pointwise_initializer, 2770 bias_initializer=biases_initializer, 2771 depthwise_regularizer=weights_regularizer, 2772 pointwise_regularizer=weights_regularizer, 2773 bias_regularizer=biases_regularizer, 2774 activity_regularizer=None, 2775 trainable=trainable, 2776 name=sc.name, 2777 dtype=inputs.dtype.base_dtype, 2778 _scope=sc, 2779 _reuse=reuse) 2780 outputs = layer.apply(inputs) 2781 2782 # Add variables to collections. 2783 _add_variable_to_collections(layer.depthwise_kernel, 2784 variables_collections, 'weights') 2785 _add_variable_to_collections(layer.pointwise_kernel, 2786 variables_collections, 'weights') 2787 if layer.bias is not None: 2788 _add_variable_to_collections(layer.bias, variables_collections, 2789 'biases') 2790 2791 if normalizer_fn is not None: 2792 normalizer_params = normalizer_params or {} 2793 outputs = normalizer_fn(outputs, **normalizer_params) 2794 else: 2795 # Actually apply depthwise conv instead of separable conv. 2796 dtype = inputs.dtype.base_dtype 2797 kernel_h, kernel_w = utils.two_element_tuple(kernel_size) 2798 stride_h, stride_w = utils.two_element_tuple(stride) 2799 num_filters_in = utils.channel_dimension( 2800 inputs.get_shape(), df, min_rank=4) 2801 weights_collections = utils.get_variable_collections( 2802 variables_collections, 'weights') 2803 2804 depthwise_shape = [kernel_h, kernel_w, num_filters_in, depth_multiplier] 2805 depthwise_weights = variables.model_variable( 2806 'depthwise_weights', 2807 shape=depthwise_shape, 2808 dtype=dtype, 2809 initializer=weights_initializer, 2810 regularizer=weights_regularizer, 2811 trainable=trainable, 2812 collections=weights_collections) 2813 strides = [1, 1, stride_h, 2814 stride_w] if data_format.startswith('NC') else [ 2815 1, stride_h, stride_w, 1 2816 ] 2817 2818 outputs = nn.depthwise_conv2d( 2819 inputs, 2820 depthwise_weights, 2821 strides, 2822 padding, 2823 rate=utils.two_element_tuple(rate), 2824 data_format=data_format) 2825 num_outputs = depth_multiplier * num_filters_in 2826 2827 if normalizer_fn is not None: 2828 normalizer_params = normalizer_params or {} 2829 outputs = normalizer_fn(outputs, **normalizer_params) 2830 else: 2831 if biases_initializer is not None: 2832 biases_collections = utils.get_variable_collections( 2833 variables_collections, 'biases') 2834 biases = variables.model_variable( 2835 'biases', 2836 shape=[ 2837 num_outputs, 2838 ], 2839 dtype=dtype, 2840 initializer=biases_initializer, 2841 regularizer=biases_regularizer, 2842 trainable=trainable, 2843 collections=biases_collections) 2844 outputs = nn.bias_add(outputs, biases, data_format=data_format) 2845 2846 if activation_fn is not None: 2847 outputs = activation_fn(outputs) 2848 return utils.collect_named_outputs(outputs_collections, sc.name, outputs) 2849 2850 2851@add_arg_scope 2852def sequence_to_images(inputs, 2853 height, 2854 output_data_format='channels_last', 2855 outputs_collections=None, 2856 scope=None): 2857 """Convert a batch of sequences into a batch of images. 2858 2859 Args: 2860 inputs: (num_steps, num_batches, depth) sequence tensor 2861 height: the height of the images 2862 output_data_format: Format of output tensor. 2863 Currently supports `'channels_first'` and `'channels_last'`. 2864 outputs_collections: The collections to which the outputs are added. 2865 scope: Optional scope for name_scope. 2866 2867 Returns: 2868 A tensor representing the output of the operation. 2869 """ 2870 with ops.name_scope(scope, 'SequenceToImages', [inputs]) as sc: 2871 inputs = ops.convert_to_tensor(inputs) 2872 width, num_batches, depth = inputs.get_shape().as_list() 2873 if num_batches is None: 2874 num_batches = -1 2875 else: 2876 num_batches //= height 2877 reshaped = array_ops.reshape(inputs, 2878 [width, num_batches, height, depth]) 2879 if output_data_format == 'channels_first': 2880 outputs = array_ops.transpose(reshaped, [1, 3, 2, 0]) 2881 else: 2882 outputs = array_ops.transpose(reshaped, [1, 2, 0, 3]) 2883 return utils.collect_named_outputs(outputs_collections, sc, outputs) 2884 2885 2886@add_arg_scope 2887def softmax(logits, scope=None): 2888 """Performs softmax on Nth dimension of N-dimensional logit tensor. 2889 2890 For two-dimensional logits this reduces to tf.nn.softmax. The N-th dimension 2891 needs to have a specified number of elements (number of classes). 2892 2893 Args: 2894 logits: N-dimensional `Tensor` with logits, where N > 1. 2895 scope: Optional scope for variable_scope. 2896 2897 Returns: 2898 A `Tensor` with same shape and type as logits. 2899 """ 2900 # TODO(jrru): Add axis argument which defaults to last dimension. 2901 with variable_scope.variable_scope(scope, 'softmax', [logits]): 2902 num_logits = utils.last_dimension(logits.get_shape(), min_rank=2) 2903 logits_2d = array_ops.reshape(logits, [-1, num_logits]) 2904 predictions = nn.softmax(logits_2d) 2905 predictions = array_ops.reshape(predictions, array_ops.shape(logits)) 2906 if not context.executing_eagerly(): 2907 predictions.set_shape(logits.get_shape()) 2908 return predictions 2909 2910 2911@add_arg_scope 2912def spatial_softmax(features, 2913 temperature=None, 2914 name=None, 2915 variables_collections=None, 2916 trainable=True, 2917 data_format='NHWC'): 2918 """Computes the spatial softmax of a convolutional feature map. 2919 2920 First computes the softmax over the spatial extent of each channel of a 2921 convolutional feature map. Then computes the expected 2D position of the 2922 points of maximal activation for each channel, resulting in a set of 2923 feature keypoints [x1, y1, ... xN, yN] for all N channels. 2924 2925 Read more here: 2926 "Learning visual feature spaces for robotic manipulation with 2927 deep spatial autoencoders." Finn et al., http://arxiv.org/abs/1509.06113. 2928 2929 Args: 2930 features: A `Tensor` of size [batch_size, W, H, num_channels]; the 2931 convolutional feature map. 2932 temperature: Softmax temperature (optional). If None, a learnable 2933 temperature is created. 2934 name: A name for this operation (optional). 2935 variables_collections: Collections for the temperature variable. 2936 trainable: If `True` also add variables to the graph collection 2937 `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). 2938 data_format: A string. `NHWC` (default) and `NCHW` are supported. 2939 Returns: 2940 feature_keypoints: A `Tensor` with size [batch_size, num_channels * 2]; 2941 the expected 2D locations of each channel's feature keypoint (normalized 2942 to the range (-1,1)). The inner dimension is arranged as 2943 [x1, y1, ... xN, yN]. 2944 Raises: 2945 ValueError: If unexpected data_format specified. 2946 ValueError: If num_channels dimension is unspecified. 2947 """ 2948 with variable_scope.variable_scope(name, 'spatial_softmax'): 2949 shape = array_ops.shape(features) 2950 static_shape = features.shape 2951 if data_format == DATA_FORMAT_NHWC: 2952 height, width, num_channels = shape[1], shape[2], static_shape[3] 2953 elif data_format == DATA_FORMAT_NCHW: 2954 num_channels, height, width = static_shape[1], shape[2], shape[3] 2955 else: 2956 raise ValueError('data_format has to be either NCHW or NHWC.') 2957 if tensor_shape.dimension_value(num_channels) is None: 2958 raise ValueError('The num_channels dimension of the inputs to ' 2959 '`spatial_softmax` should be defined. Found `None`.') 2960 2961 with ops.name_scope('spatial_softmax_op', 'spatial_softmax_op', [features]): 2962 # Create tensors for x and y coordinate values, scaled to range [-1, 1]. 2963 pos_x, pos_y = array_ops.meshgrid( 2964 math_ops.lin_space(-1., 1., num=height), 2965 math_ops.lin_space(-1., 1., num=width), 2966 indexing='ij') 2967 pos_x = array_ops.reshape(pos_x, [height * width]) 2968 pos_y = array_ops.reshape(pos_y, [height * width]) 2969 2970 if temperature is None: 2971 temp_initializer = init_ops.ones_initializer() 2972 else: 2973 temp_initializer = init_ops.constant_initializer(temperature) 2974 2975 if not trainable: 2976 temp_collections = None 2977 else: 2978 temp_collections = utils.get_variable_collections( 2979 variables_collections, 'temperature') 2980 2981 temperature = variables.model_variable( 2982 'temperature', 2983 shape=(), 2984 dtype=dtypes.float32, 2985 initializer=temp_initializer, 2986 collections=temp_collections, 2987 trainable=trainable) 2988 if data_format == 'NCHW': 2989 features = array_ops.reshape(features, [-1, height * width]) 2990 else: 2991 features = array_ops.reshape( 2992 array_ops.transpose(features, [0, 3, 1, 2]), [-1, height * width]) 2993 2994 softmax_attention = nn.softmax(features / temperature) 2995 expected_x = math_ops.reduce_sum( 2996 pos_x * softmax_attention, [1], keepdims=True) 2997 expected_y = math_ops.reduce_sum( 2998 pos_y * softmax_attention, [1], keepdims=True) 2999 expected_xy = array_ops.concat([expected_x, expected_y], 1) 3000 feature_keypoints = array_ops.reshape( 3001 expected_xy, 3002 [-1, tensor_shape.dimension_value(num_channels) * 2]) 3003 feature_keypoints.set_shape( 3004 [None, tensor_shape.dimension_value(num_channels) * 2]) 3005 return feature_keypoints 3006 3007 3008def stack(inputs, layer, stack_args, **kwargs): 3009 """Builds a stack of layers by applying layer repeatedly using stack_args. 3010 3011 `stack` allows you to repeatedly apply the same operation with different 3012 arguments `stack_args[i]`. For each application of the layer, `stack` creates 3013 a new scope appended with an increasing number. For example: 3014 3015 ```python 3016 y = stack(x, fully_connected, [32, 64, 128], scope='fc') 3017 # It is equivalent to: 3018 3019 x = fully_connected(x, 32, scope='fc/fc_1') 3020 x = fully_connected(x, 64, scope='fc/fc_2') 3021 y = fully_connected(x, 128, scope='fc/fc_3') 3022 ``` 3023 3024 If the `scope` argument is not given in `kwargs`, it is set to 3025 `layer.__name__`, or `layer.func.__name__` (for `functools.partial` 3026 objects). If neither `__name__` nor `func.__name__` is available, the 3027 layers are called with `scope='stack'`. 3028 3029 Args: 3030 inputs: A `Tensor` suitable for layer. 3031 layer: A layer with arguments `(inputs, *args, **kwargs)` 3032 stack_args: A list/tuple of parameters for each call of layer. 3033 **kwargs: Extra kwargs for the layer. 3034 3035 Returns: 3036 A `Tensor` result of applying the stacked layers. 3037 3038 Raises: 3039 ValueError: If the op is unknown or wrong. 3040 """ 3041 scope = kwargs.pop('scope', None) 3042 if not isinstance(stack_args, (list, tuple)): 3043 raise ValueError('stack_args need to be a list or tuple') 3044 with variable_scope.variable_scope(scope, 'Stack', [inputs]): 3045 inputs = ops.convert_to_tensor(inputs) 3046 if scope is None: 3047 if hasattr(layer, '__name__'): 3048 scope = layer.__name__ 3049 elif hasattr(layer, 'func') and hasattr(layer.func, '__name__'): 3050 scope = layer.func.__name__ # In case layer is a functools.partial. 3051 else: 3052 scope = 'stack' 3053 outputs = inputs 3054 for i in range(len(stack_args)): 3055 kwargs['scope'] = scope + '_' + str(i + 1) 3056 layer_args = stack_args[i] 3057 if not isinstance(layer_args, (list, tuple)): 3058 layer_args = [layer_args] 3059 outputs = layer(outputs, *layer_args, **kwargs) 3060 return outputs 3061 3062 3063@add_arg_scope 3064def unit_norm(inputs, dim, epsilon=1e-7, scope=None): 3065 """Normalizes the given input across the specified dimension to unit length. 3066 3067 Note that the rank of `input` must be known. 3068 3069 Args: 3070 inputs: A `Tensor` of arbitrary size. 3071 dim: The dimension along which the input is normalized. 3072 epsilon: A small value to add to the inputs to avoid dividing by zero. 3073 scope: Optional scope for variable_scope. 3074 3075 Returns: 3076 The normalized `Tensor`. 3077 3078 Raises: 3079 ValueError: If dim is smaller than the number of dimensions in 'inputs'. 3080 """ 3081 with variable_scope.variable_scope(scope, 'UnitNorm', [inputs]): 3082 if not inputs.get_shape(): 3083 raise ValueError('The input rank must be known.') 3084 input_rank = len(inputs.get_shape().as_list()) 3085 if dim < 0 or dim >= input_rank: 3086 raise ValueError('dim must be positive but smaller than the input rank.') 3087 3088 lengths = math_ops.sqrt( 3089 epsilon + math_ops.reduce_sum(math_ops.square(inputs), dim, True)) 3090 multiples = [] 3091 if dim > 0: 3092 multiples.append(array_ops.ones([dim], dtypes.int32)) 3093 multiples.append( 3094 array_ops.strided_slice(array_ops.shape(inputs), [dim], [dim + 1])) 3095 if dim < (input_rank - 1): 3096 multiples.append(array_ops.ones([input_rank - 1 - dim], dtypes.int32)) 3097 multiples = array_ops.concat(multiples, 0) 3098 return math_ops.div(inputs, array_ops.tile(lengths, multiples)) 3099 3100 3101@add_arg_scope 3102def maxout(inputs, num_units, axis=-1, scope=None): 3103 """Adds a maxout op from https://arxiv.org/abs/1302.4389 3104 3105 "Maxout Networks" Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron 3106 Courville, 3107 Yoshua Bengio 3108 3109 Usually the operation is performed in the filter/channel dimension. This can 3110 also be 3111 used after fully-connected layers to reduce number of features. 3112 3113 Arguments: 3114 inputs: Tensor input 3115 num_units: Specifies how many features will remain after maxout 3116 in the `axis` dimension (usually channel). 3117 This must be a factor of number of features. 3118 axis: The dimension where max pooling will be performed. Default is the 3119 last dimension. 3120 scope: Optional scope for variable_scope. 3121 3122 Returns: 3123 A `Tensor` representing the results of the pooling operation. 3124 3125 Raises: 3126 ValueError: if num_units is not multiple of number of features. 3127 """ 3128 with variable_scope.variable_scope(scope, 'MaxOut', [inputs]): 3129 inputs = ops.convert_to_tensor(inputs) 3130 shape = inputs.get_shape().as_list() 3131 num_channels = shape[axis] 3132 if num_channels % num_units: 3133 raise ValueError('number of features({}) is not ' 3134 'a multiple of num_units({})'.format( 3135 num_channels, num_units)) 3136 shape[axis] = num_units 3137 shape += [num_channels // num_units] 3138 3139 # Dealing with batches with arbitrary sizes 3140 for i in range(len(shape)): 3141 if shape[i] is None: 3142 shape[i] = array_ops.shape(inputs)[i] 3143 outputs = math_ops.reduce_max( 3144 array_ops.reshape(inputs, shape), -1, keepdims=False) 3145 return outputs 3146 3147 3148def poincare_normalize(x, axis=1, epsilon=1e-5, name=None): 3149 """Project into the Poincare ball with norm <= 1.0 - epsilon. 3150 3151 https://en.wikipedia.org/wiki/Poincare_ball_model 3152 3153 Used in 3154 Poincare Embeddings for Learning Hierarchical Representations 3155 Maximilian Nickel, Douwe Kiela 3156 https://arxiv.org/pdf/1705.08039.pdf 3157 3158 For a 1-D tensor with `axis = 0`, computes 3159 3160 (x * (1 - epsilon)) / ||x|| if ||x|| > 1 - epsilon 3161 output = 3162 x otherwise 3163 3164 For `x` with more dimensions, independently normalizes each 1-D slice along 3165 dimension `axis`. 3166 3167 Args: 3168 x: A `Tensor`. 3169 axis: Axis along which to normalize. A scalar or a vector of 3170 integers. 3171 epsilon: A small deviation from the edge of the unit sphere for numerical 3172 stability. 3173 name: A name for this operation (optional). 3174 3175 Returns: 3176 A `Tensor` with the same shape as `x`. 3177 """ 3178 with ops.name_scope(name, 'poincare_normalize', [x]) as name: 3179 x = ops.convert_to_tensor(x, name='x') 3180 square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keepdims=True) 3181 x_inv_norm = math_ops.rsqrt(square_sum) 3182 x_inv_norm = math_ops.minimum((1. - epsilon) * x_inv_norm, 1.) 3183 return math_ops.multiply(x, x_inv_norm, name=name) 3184 3185 3186def legacy_fully_connected(x, 3187 num_output_units, 3188 activation_fn=None, 3189 weight_init=initializers.xavier_initializer(), 3190 bias_init=init_ops.zeros_initializer(), 3191 name=None, 3192 weight_collections=(ops.GraphKeys.WEIGHTS,), 3193 bias_collections=(ops.GraphKeys.BIASES,), 3194 output_collections=(ops.GraphKeys.ACTIVATIONS,), 3195 trainable=True, 3196 weight_regularizer=None, 3197 bias_regularizer=None): 3198 # pylint: disable=anomalous-backslash-in-string 3199 r"""Adds the parameters for a fully connected layer and returns the output. 3200 3201 A fully connected layer is generally defined as a matrix multiply: 3202 `y = f(w * x + b)` where `f` is given by `activation_fn`. If 3203 `activation_fn` is `None`, the result of `y = w * x + b` is 3204 returned. 3205 3206 If `x` has shape [\\(\text{dim}_0, \text{dim}_1, ..., \text{dim}_n\\)] 3207 with more than 2 dimensions (\\(n > 1\\)), then we repeat the matrix 3208 multiply along the first dimensions. The result r is a tensor of shape 3209 [\\(\text{dim}_0, ..., \text{dim}_{n-1},\\) `num_output_units`], 3210 where \\( r_{i_0, ..., i_{n-1}, k} = 3211 \sum_{0 \leq j < \text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\). 3212 This is accomplished by reshaping `x` to 2-D 3213 [\\(\text{dim}_0 \cdot ... \cdot \text{dim}_{n-1}, \text{dim}_n\\)] 3214 before the matrix multiply and afterwards reshaping it to 3215 [\\(\text{dim}_0, ..., \text{dim}_{n-1},\\) `num_output_units`]. 3216 3217 This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting 3218 `bias_init` to `None`. 3219 3220 The variable creation is compatible with `tf.variable_scope` and so can be 3221 reused with `tf.variable_scope` or `tf.make_template`. 3222 3223 Most of the details of variable creation can be controlled by specifying the 3224 initializers (`weight_init` and `bias_init`) and in which collections to place 3225 the created variables (`weight_collections` and `bias_collections`; note that 3226 the variables are always added to the `VARIABLES` collection). The output of 3227 the layer can be placed in custom collections using `output_collections`. 3228 The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`, 3229 respectively. 3230 3231 A per layer regularization can be specified by setting `weight_regularizer` 3232 and `bias_regularizer`, which are applied to the weights and biases 3233 respectively, and whose output is added to the `REGULARIZATION_LOSSES` 3234 collection. 3235 3236 Args: 3237 x: The input `Tensor`. 3238 num_output_units: The size of the output. 3239 activation_fn: Activation function, default set to None to skip it and 3240 maintain a linear activation. 3241 weight_init: An optional weight initialization, defaults to 3242 `xavier_initializer`. 3243 bias_init: An initializer for the bias, defaults to 0. Set to `None` in 3244 order to disable bias. 3245 name: The name for this operation is used to name operations and to find 3246 variables. If specified it must be unique for this scope, otherwise a 3247 unique name starting with "fully_connected" will be created. See 3248 `tf.variable_scope` for details. 3249 weight_collections: List of graph collections to which weights are added. 3250 bias_collections: List of graph collections to which biases are added. 3251 output_collections: List of graph collections to which outputs are added. 3252 trainable: If `True` also add variables to the graph collection 3253 `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). 3254 weight_regularizer: A regularizer like the result of 3255 `l1_regularizer` or `l2_regularizer`. Used for weights. 3256 bias_regularizer: A regularizer like the result of 3257 `l1_regularizer` or `l2_regularizer`. Used for biases. 3258 3259 Returns: 3260 The output of the fully connected layer. 3261 3262 Raises: 3263 ValueError: If x has rank less than 2 or if its last dimension is not set. 3264 """ 3265 with variable_scope.variable_scope(name, 'fully_connected', [x]): 3266 x = ops.convert_to_tensor(x) 3267 dims = x.get_shape().dims 3268 if dims is None: 3269 raise ValueError('dims of x must be known but is None') 3270 if len(dims) < 2: 3271 raise ValueError('rank of x must be at least 2 not: %d' % len(dims)) 3272 num_input_units = dims[-1].value 3273 if num_input_units is None: 3274 raise ValueError('last dimension of x must be known but is None') 3275 dtype = x.dtype.base_dtype 3276 3277 weight_collections = set( 3278 list(weight_collections or []) + [ops.GraphKeys.GLOBAL_VARIABLES]) 3279 w = variable_scope.get_variable( 3280 'weights', 3281 shape=[num_input_units, num_output_units], 3282 dtype=dtype, 3283 initializer=weight_init, 3284 collections=weight_collections, 3285 regularizer=weight_regularizer, 3286 trainable=trainable) 3287 x_2_dim = x if len(dims) <= 2 else array_ops.reshape( 3288 x, [-1, num_input_units]) 3289 y = standard_ops.matmul(x_2_dim, w) 3290 3291 if bias_init is not None: 3292 bias_collections = set( 3293 list(bias_collections or []) + [ops.GraphKeys.GLOBAL_VARIABLES]) 3294 b = variable_scope.get_variable( 3295 'bias', 3296 shape=[num_output_units], 3297 dtype=dtype, 3298 initializer=bias_init, 3299 collections=bias_collections, 3300 regularizer=bias_regularizer, 3301 trainable=trainable) 3302 3303 y = nn.bias_add(y, b) 3304 3305 if len(dims) > 2: 3306 out_shape = array_ops.unstack(array_ops.shape(x)) 3307 out_shape[-1] = num_output_units 3308 3309 y = array_ops.reshape(y, array_ops.stack(out_shape)) 3310 3311 static_shape = x.get_shape().as_list() 3312 static_shape[-1] = num_output_units 3313 y.set_shape(static_shape) 3314 3315 return _apply_activation(y, activation_fn, output_collections) 3316 3317 3318# TODO(eiderm): Verify and fix autocomplete in colab (also relu6). 3319# Simple aliases which remove the activation_fn parameter. 3320elu = functools.partial(fully_connected, activation_fn=nn.elu) 3321legacy_relu = functools.partial(legacy_fully_connected, activation_fn=nn.relu) 3322legacy_linear = functools.partial(legacy_fully_connected, activation_fn=None) 3323relu = functools.partial(fully_connected, activation_fn=nn.relu) 3324relu6 = functools.partial(fully_connected, activation_fn=nn.relu6) 3325linear = functools.partial(fully_connected, activation_fn=None) 3326 3327# Simple alias. 3328conv1d = convolution1d 3329conv2d = convolution2d 3330conv3d = convolution3d 3331conv2d_transpose = convolution2d_transpose 3332conv3d_transpose = convolution3d_transpose 3333conv2d_in_plane = convolution2d_in_plane 3334separable_conv2d = separable_convolution2d 3335