1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Wrappers for primitive Neural Net (NN) Operations.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import collections 22import numbers 23import os 24 25import numpy as np 26 27from tensorflow.python.eager import context 28from tensorflow.python.framework import constant_op 29from tensorflow.python.framework import dtypes 30from tensorflow.python.framework import errors_impl 31from tensorflow.python.framework import graph_util 32from tensorflow.python.framework import ops 33from tensorflow.python.framework import random_seed 34from tensorflow.python.framework import tensor_shape 35from tensorflow.python.framework import tensor_util 36from tensorflow.python.ops import array_ops 37from tensorflow.python.ops import check_ops 38from tensorflow.python.ops import gen_math_ops 39from tensorflow.python.ops import gen_nn_ops 40from tensorflow.python.ops import math_ops 41from tensorflow.python.ops import random_ops 42# go/tf-wildcard-import 43# pylint: disable=wildcard-import 44from tensorflow.python.ops.gen_nn_ops import * 45# pylint: enable=wildcard-import 46from tensorflow.python.platform import device_context 47from tensorflow.python.util import deprecation 48from tensorflow.python.util.compat import collections_abc 49from tensorflow.python.util.deprecation import deprecated_args 50from tensorflow.python.util.deprecation import deprecated_argument_lookup 51 52from tensorflow.python.util.tf_export import tf_export 53 54# Aliases for some automatically-generated names. 55local_response_normalization = gen_nn_ops.lrn 56 57# pylint: disable=protected-access 58 59 60def _get_sequence(value, n, channel_index, name): 61 """Formats a value input for gen_nn_ops.""" 62 if value is None: 63 value = [1] 64 elif not isinstance(value, collections_abc.Sized): 65 value = [value] 66 67 current_n = len(value) 68 if current_n == n + 2: 69 return value 70 elif current_n == 1: 71 value = list((value[0],) * n) 72 elif current_n == n: 73 value = list(value) 74 else: 75 raise ValueError("{} should be of length 1, {} or {} but was {}".format( 76 name, n, n + 2, current_n)) 77 78 if channel_index == 1: 79 return [1, 1] + value 80 else: 81 return [1] + value + [1] 82 83 84def _non_atrous_convolution( 85 input, # pylint: disable=redefined-builtin 86 filter, # pylint: disable=redefined-builtin 87 padding, 88 data_format=None, # pylint: disable=redefined-builtin 89 strides=None, 90 name=None): 91 """Computes sums of N-D convolutions (actually cross correlation). 92 93 It is required that 1 <= N <= 3. 94 95 This is used to implement the more generic `convolution` function, which 96 extends the interface of this function with a `dilation_rate` parameter. 97 98 Args: 99 100 input: Rank N+2 tensor of type T of shape 101 `[batch_size] + input_spatial_shape + [in_channels]` if `data_format` 102 does not start with `"NC"`, or 103 `[batch_size, in_channels] + input_spatial_shape` if `data_format` starts 104 with `"NC"`. 105 filter: Rank N+2 tensor of type T of shape 106 `filter_spatial_shape + [in_channels, out_channels]`. Rank of either 107 `input` or `filter` must be known. 108 padding: Padding method to use, must be either "VALID" or "SAME". 109 data_format: A string or None. Specifies whether the channel dimension of 110 the `input` and output is the last dimension (default, or if `data_format` 111 does not start with "NC"), or the second dimension (if `data_format` 112 starts with "NC"). For N=1, the valid values are "NWC" (default) and 113 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 114 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 115 strides: Sequence of N positive integers, defaults to `[1] * N`. 116 name: Name prefix to use. 117 118 Returns: 119 Rank N+2 tensor of type T of shape 120 `[batch_size] + output_spatial_shape + [out_channels]`, where 121 if padding == "SAME": 122 output_spatial_shape = input_spatial_shape 123 if padding == "VALID": 124 output_spatial_shape = input_spatial_shape - filter_spatial_shape + 1. 125 126 Raises: 127 ValueError: if ranks are incompatible. 128 129 """ 130 with ops.name_scope(name, "non_atrous_convolution", [input, filter]) as scope: 131 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 132 input_shape = input.get_shape() 133 filter = ops.convert_to_tensor(filter, name="filter") # pylint: disable=redefined-builtin 134 filter_shape = filter.get_shape() 135 op = _NonAtrousConvolution( 136 input_shape, 137 filter_shape=filter_shape, 138 padding=padding, 139 data_format=data_format, 140 strides=strides, 141 name=scope) 142 return op(input, filter) 143 144 145class _NonAtrousConvolution(object): 146 """Helper class for _non_atrous_convolution. 147 148 Note that this class assumes that shapes of input and filter passed to 149 __call__ are compatible with input_shape and filter_shape passed to the 150 constructor. 151 152 Arguments: 153 input_shape: static input shape, i.e. input.get_shape(). 154 filter_shape: static filter shape, i.e. filter.get_shape(). 155 padding: see _non_atrous_convolution. 156 data_format: see _non_atrous_convolution. 157 strides: see _non_atrous_convolution. 158 name: see _non_atrous_convolution. 159 """ 160 161 def __init__( 162 self, 163 input_shape, 164 filter_shape, # pylint: disable=redefined-builtin 165 padding, 166 data_format=None, 167 strides=None, 168 name=None): 169 filter_shape = filter_shape.with_rank(input_shape.ndims) 170 self.padding = padding 171 self.name = name 172 input_shape = input_shape.with_rank(filter_shape.ndims) 173 if input_shape.ndims is None: 174 raise ValueError("Rank of convolution must be known") 175 if input_shape.ndims < 3 or input_shape.ndims > 5: 176 raise ValueError( 177 "`input` and `filter` must have rank at least 3 and at most 5") 178 conv_dims = input_shape.ndims - 2 179 if strides is None: 180 strides = [1] * conv_dims 181 elif len(strides) != conv_dims: 182 raise ValueError("len(strides)=%d, but should be %d" % (len(strides), 183 conv_dims)) 184 if conv_dims == 1: 185 # conv1d uses the 2-d data format names 186 if data_format is None: 187 data_format = "NWC" 188 elif data_format not in {"NCW", "NWC", "NCHW", "NHWC"}: 189 raise ValueError("data_format must be \"NWC\" or \"NCW\".") 190 self.strides = strides[0] 191 self.data_format = data_format 192 self.conv_op = self._conv1d 193 elif conv_dims == 2: 194 if data_format is None or data_format == "NHWC": 195 data_format = "NHWC" 196 strides = [1] + list(strides) + [1] 197 elif data_format == "NCHW": 198 strides = [1, 1] + list(strides) 199 else: 200 raise ValueError("data_format must be \"NHWC\" or \"NCHW\".") 201 self.strides = strides 202 self.data_format = data_format 203 self.conv_op = conv2d 204 elif conv_dims == 3: 205 if data_format is None or data_format == "NDHWC": 206 strides = [1] + list(strides) + [1] 207 elif data_format == "NCDHW": 208 strides = [1, 1] + list(strides) 209 else: 210 raise ValueError("data_format must be \"NDHWC\" or \"NCDHW\". Have: %s" 211 % data_format) 212 self.strides = strides 213 self.data_format = data_format 214 self.conv_op = gen_nn_ops.conv3d 215 216 # Note that we need this adapter since argument names for conv1d don't match 217 # those for gen_nn_ops.conv2d and gen_nn_ops.conv3d. 218 # pylint: disable=redefined-builtin 219 def _conv1d(self, input, filter, strides, padding, data_format, name): 220 return conv1d( 221 value=input, 222 filters=filter, 223 stride=strides, 224 padding=padding, 225 data_format=data_format, 226 name=name) 227 228 # pylint: enable=redefined-builtin 229 230 def __call__(self, inp, filter): # pylint: disable=redefined-builtin 231 return self.conv_op( 232 input=inp, 233 filter=filter, 234 strides=self.strides, 235 padding=self.padding, 236 data_format=self.data_format, 237 name=self.name) 238 239 240@tf_export("nn.dilation2d", v1=[]) 241def dilation2d_v2( 242 input, # pylint: disable=redefined-builtin 243 filters, # pylint: disable=redefined-builtin 244 strides, 245 padding, 246 data_format, 247 dilations, 248 name=None): 249 """Computes the grayscale dilation of 4-D `input` and 3-D `filters` tensors. 250 251 The `input` tensor has shape `[batch, in_height, in_width, depth]` and the 252 `filters` tensor has shape `[filter_height, filter_width, depth]`, i.e., each 253 input channel is processed independently of the others with its own 254 structuring function. The `output` tensor has shape 255 `[batch, out_height, out_width, depth]`. The spatial dimensions of the output 256 tensor depend on the `padding` algorithm. We currently only support the 257 default "NHWC" `data_format`. 258 259 In detail, the grayscale morphological 2-D dilation is the max-sum correlation 260 (for consistency with `conv2d`, we use unmirrored filters): 261 262 output[b, y, x, c] = 263 max_{dy, dx} input[b, 264 strides[1] * y + rates[1] * dy, 265 strides[2] * x + rates[2] * dx, 266 c] + 267 filters[dy, dx, c] 268 269 Max-pooling is a special case when the filter has size equal to the pooling 270 kernel size and contains all zeros. 271 272 Note on duality: The dilation of `input` by the `filters` is equal to the 273 negation of the erosion of `-input` by the reflected `filters`. 274 275 Args: 276 input: A `Tensor`. Must be one of the following types: `float32`, `float64`, 277 `int32`, `uint8`, `int16`, `int8`, `int64`, `bfloat16`, `uint16`, `half`, 278 `uint32`, `uint64`. 279 4-D with shape `[batch, in_height, in_width, depth]`. 280 filters: A `Tensor`. Must have the same type as `input`. 281 3-D with shape `[filter_height, filter_width, depth]`. 282 strides: A list of `ints` that has length `>= 4`. 283 The stride of the sliding window for each dimension of the input 284 tensor. Must be: `[1, stride_height, stride_width, 1]`. 285 padding: A `string` from: `"SAME", "VALID"`. 286 The type of padding algorithm to use. 287 data_format: A `string`, only `"NHWC"` is currently supported. 288 dilations: A list of `ints` that has length `>= 4`. 289 The input stride for atrous morphological dilation. Must be: 290 `[1, rate_height, rate_width, 1]`. 291 name: A name for the operation (optional). 292 293 Returns: 294 A `Tensor`. Has the same type as `input`. 295 """ 296 if data_format != "NHWC": 297 raise ValueError("Data formats other than NHWC are not yet supported") 298 299 return gen_nn_ops.dilation2d(input=input, 300 filter=filters, 301 strides=strides, 302 rates=dilations, 303 padding=padding, 304 name=name) 305 306 307@tf_export(v1=["nn.dilation2d"]) 308def dilation2d_v1( # pylint: disable=missing-docstring 309 input, # pylint: disable=redefined-builtin 310 filter=None, # pylint: disable=redefined-builtin 311 strides=None, 312 rates=None, 313 padding=None, 314 name=None, 315 filters=None, 316 dilations=None): 317 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 318 rates = deprecated_argument_lookup("dilations", dilations, "rates", rates) 319 return gen_nn_ops.dilation2d(input, filter, strides, rates, padding, name) 320 321 322dilation2d_v1.__doc__ = gen_nn_ops.dilation2d.__doc__ 323 324 325@tf_export("nn.with_space_to_batch") 326def with_space_to_batch( 327 input, # pylint: disable=redefined-builtin 328 dilation_rate, 329 padding, 330 op, 331 filter_shape=None, 332 spatial_dims=None, 333 data_format=None): 334 """Performs `op` on the space-to-batch representation of `input`. 335 336 This has the effect of transforming sliding window operations into the 337 corresponding "atrous" operation in which the input is sampled at the 338 specified `dilation_rate`. 339 340 In the special case that `dilation_rate` is uniformly 1, this simply returns: 341 342 op(input, num_spatial_dims, padding) 343 344 Otherwise, it returns: 345 346 batch_to_space_nd( 347 op(space_to_batch_nd(input, adjusted_dilation_rate, adjusted_paddings), 348 num_spatial_dims, 349 "VALID") 350 adjusted_dilation_rate, 351 adjusted_crops), 352 353 where: 354 355 adjusted_dilation_rate is an int64 tensor of shape [max(spatial_dims)], 356 adjusted_{paddings,crops} are int64 tensors of shape [max(spatial_dims), 2] 357 358 defined as follows: 359 360 We first define two int64 tensors `paddings` and `crops` of shape 361 `[num_spatial_dims, 2]` based on the value of `padding` and the spatial 362 dimensions of the `input`: 363 364 If `padding = "VALID"`, then: 365 366 paddings, crops = required_space_to_batch_paddings( 367 input_shape[spatial_dims], 368 dilation_rate) 369 370 If `padding = "SAME"`, then: 371 372 dilated_filter_shape = 373 filter_shape + (filter_shape - 1) * (dilation_rate - 1) 374 375 paddings, crops = required_space_to_batch_paddings( 376 input_shape[spatial_dims], 377 dilation_rate, 378 [(dilated_filter_shape - 1) // 2, 379 dilated_filter_shape - 1 - (dilated_filter_shape - 1) // 2]) 380 381 Because `space_to_batch_nd` and `batch_to_space_nd` assume that the spatial 382 dimensions are contiguous starting at the second dimension, but the specified 383 `spatial_dims` may not be, we must adjust `dilation_rate`, `paddings` and 384 `crops` in order to be usable with these operations. For a given dimension, 385 if the block size is 1, and both the starting and ending padding and crop 386 amounts are 0, then space_to_batch_nd effectively leaves that dimension alone, 387 which is what is needed for dimensions not part of `spatial_dims`. 388 Furthermore, `space_to_batch_nd` and `batch_to_space_nd` handle this case 389 efficiently for any number of leading and trailing dimensions. 390 391 For 0 <= i < len(spatial_dims), we assign: 392 393 adjusted_dilation_rate[spatial_dims[i] - 1] = dilation_rate[i] 394 adjusted_paddings[spatial_dims[i] - 1, :] = paddings[i, :] 395 adjusted_crops[spatial_dims[i] - 1, :] = crops[i, :] 396 397 All unassigned values of `adjusted_dilation_rate` default to 1, while all 398 unassigned values of `adjusted_paddings` and `adjusted_crops` default to 0. 399 400 Note in the case that `dilation_rate` is not uniformly 1, specifying "VALID" 401 padding is equivalent to specifying `padding = "SAME"` with a filter_shape of 402 `[1]*N`. 403 404 Advanced usage. Note the following optimization: A sequence of 405 `with_space_to_batch` operations with identical (not uniformly 1) 406 `dilation_rate` parameters and "VALID" padding 407 408 net = with_space_to_batch(net, dilation_rate, "VALID", op_1) 409 ... 410 net = with_space_to_batch(net, dilation_rate, "VALID", op_k) 411 412 can be combined into a single `with_space_to_batch` operation as follows: 413 414 def combined_op(converted_input, num_spatial_dims, _): 415 result = op_1(converted_input, num_spatial_dims, "VALID") 416 ... 417 result = op_k(result, num_spatial_dims, "VALID") 418 419 net = with_space_to_batch(net, dilation_rate, "VALID", combined_op) 420 421 This eliminates the overhead of `k-1` calls to `space_to_batch_nd` and 422 `batch_to_space_nd`. 423 424 Similarly, a sequence of `with_space_to_batch` operations with identical (not 425 uniformly 1) `dilation_rate` parameters, "SAME" padding, and odd filter 426 dimensions 427 428 net = with_space_to_batch(net, dilation_rate, "SAME", op_1, filter_shape_1) 429 ... 430 net = with_space_to_batch(net, dilation_rate, "SAME", op_k, filter_shape_k) 431 432 can be combined into a single `with_space_to_batch` operation as follows: 433 434 def combined_op(converted_input, num_spatial_dims, _): 435 result = op_1(converted_input, num_spatial_dims, "SAME") 436 ... 437 result = op_k(result, num_spatial_dims, "SAME") 438 439 net = with_space_to_batch(net, dilation_rate, "VALID", combined_op) 440 441 Args: 442 input: Tensor of rank > max(spatial_dims). 443 dilation_rate: int32 Tensor of *known* shape [num_spatial_dims]. 444 padding: str constant equal to "VALID" or "SAME" 445 op: Function that maps (input, num_spatial_dims, padding) -> output 446 filter_shape: If padding = "SAME", specifies the shape of the convolution 447 kernel/pooling window as an integer Tensor of shape [>=num_spatial_dims]. 448 If padding = "VALID", filter_shape is ignored and need not be specified. 449 spatial_dims: Monotonically increasing sequence of `num_spatial_dims` 450 integers (which are >= 1) specifying the spatial dimensions of `input` 451 and output. Defaults to: `range(1, num_spatial_dims+1)`. 452 data_format: A string or None. Specifies whether the channel dimension of 453 the `input` and output is the last dimension (default, or if `data_format` 454 does not start with "NC"), or the second dimension (if `data_format` 455 starts with "NC"). For N=1, the valid values are "NWC" (default) and 456 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 457 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 458 459 Returns: 460 The output Tensor as described above, dimensions will vary based on the op 461 provided. 462 463 Raises: 464 ValueError: if `padding` is invalid or the arguments are incompatible. 465 ValueError: if `spatial_dims` are invalid. 466 467 """ 468 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 469 input_shape = input.get_shape() 470 471 def build_op(num_spatial_dims, padding): 472 return lambda inp, _: op(inp, num_spatial_dims, padding) 473 474 new_op = _WithSpaceToBatch( 475 input_shape, 476 dilation_rate, 477 padding, 478 build_op, 479 filter_shape=filter_shape, 480 spatial_dims=spatial_dims, 481 data_format=data_format) 482 return new_op(input, None) 483 484 485class _WithSpaceToBatch(object): 486 """Helper class for with_space_to_batch. 487 488 Note that this class assumes that shapes of input and filter passed to 489 __call__ are compatible with input_shape and filter_shape passed to the 490 constructor. 491 492 Arguments 493 input_shape: static shape of input. i.e. input.get_shape(). 494 dilation_rate: see with_space_to_batch 495 padding: see with_space_to_batch 496 build_op: Function that maps (num_spatial_dims, paddings) -> (function that 497 maps (input, filter) -> output). 498 filter_shape: see with_space_to_batch 499 spatial_dims: see with_space_to_batch 500 data_format: see with_space_to_batch 501 """ 502 503 def __init__(self, 504 input_shape, 505 dilation_rate, 506 padding, 507 build_op, 508 filter_shape=None, 509 spatial_dims=None, 510 data_format=None): 511 """Helper class for _with_space_to_batch.""" 512 dilation_rate = ops.convert_to_tensor( 513 dilation_rate, dtypes.int32, name="dilation_rate") 514 try: 515 rate_shape = dilation_rate.get_shape().with_rank(1) 516 except ValueError: 517 raise ValueError("rate must be rank 1") 518 519 if not dilation_rate.get_shape().is_fully_defined(): 520 raise ValueError("rate must have known shape") 521 522 num_spatial_dims = rate_shape.dims[0].value 523 524 if data_format is not None and data_format.startswith("NC"): 525 starting_spatial_dim = 2 526 else: 527 starting_spatial_dim = 1 528 529 if spatial_dims is None: 530 spatial_dims = range(starting_spatial_dim, 531 num_spatial_dims + starting_spatial_dim) 532 orig_spatial_dims = list(spatial_dims) 533 spatial_dims = sorted(set(int(x) for x in orig_spatial_dims)) 534 if spatial_dims != orig_spatial_dims or any(x < 1 for x in spatial_dims): 535 raise ValueError( 536 "spatial_dims must be a montonically increasing sequence of positive " 537 "integers") 538 539 if data_format is not None and data_format.startswith("NC"): 540 expected_input_rank = spatial_dims[-1] 541 else: 542 expected_input_rank = spatial_dims[-1] + 1 543 544 try: 545 input_shape.with_rank_at_least(expected_input_rank) 546 except ValueError: 547 raise ValueError( 548 "input tensor must have rank %d at least" % (expected_input_rank)) 549 550 const_rate = tensor_util.constant_value(dilation_rate) 551 rate_or_const_rate = dilation_rate 552 if const_rate is not None: 553 rate_or_const_rate = const_rate 554 if np.any(const_rate < 1): 555 raise ValueError("dilation_rate must be positive") 556 if np.all(const_rate == 1): 557 self.call = build_op(num_spatial_dims, padding) 558 return 559 560 # We have two padding contributions. The first is used for converting "SAME" 561 # to "VALID". The second is required so that the height and width of the 562 # zero-padded value tensor are multiples of rate. 563 564 # Padding required to reduce to "VALID" convolution 565 if padding == "SAME": 566 if filter_shape is None: 567 raise ValueError("filter_shape must be specified for SAME padding") 568 filter_shape = ops.convert_to_tensor(filter_shape, name="filter_shape") 569 const_filter_shape = tensor_util.constant_value(filter_shape) 570 if const_filter_shape is not None: 571 filter_shape = const_filter_shape 572 self.base_paddings = _with_space_to_batch_base_paddings( 573 const_filter_shape, num_spatial_dims, rate_or_const_rate) 574 else: 575 self.num_spatial_dims = num_spatial_dims 576 self.rate_or_const_rate = rate_or_const_rate 577 self.base_paddings = None 578 elif padding == "VALID": 579 self.base_paddings = np.zeros([num_spatial_dims, 2], np.int32) 580 else: 581 raise ValueError("Invalid padding method %r" % padding) 582 583 self.input_shape = input_shape 584 self.spatial_dims = spatial_dims 585 self.dilation_rate = dilation_rate 586 self.data_format = data_format 587 self.op = build_op(num_spatial_dims, "VALID") 588 self.call = self._with_space_to_batch_call 589 590 def _with_space_to_batch_call(self, inp, filter): # pylint: disable=redefined-builtin 591 """Call functionality for with_space_to_batch.""" 592 # Handle input whose shape is unknown during graph creation. 593 input_spatial_shape = None 594 input_shape = self.input_shape 595 spatial_dims = self.spatial_dims 596 if input_shape.ndims is not None: 597 input_shape_list = input_shape.as_list() 598 input_spatial_shape = [input_shape_list[i] for i in spatial_dims] 599 if input_spatial_shape is None or None in input_spatial_shape: 600 input_shape_tensor = array_ops.shape(inp) 601 input_spatial_shape = array_ops.stack( 602 [input_shape_tensor[i] for i in spatial_dims]) 603 604 base_paddings = self.base_paddings 605 if base_paddings is None: 606 # base_paddings could not be computed at build time since static filter 607 # shape was not fully defined. 608 filter_shape = array_ops.shape(filter) 609 base_paddings = _with_space_to_batch_base_paddings( 610 filter_shape, self.num_spatial_dims, self.rate_or_const_rate) 611 paddings, crops = array_ops.required_space_to_batch_paddings( 612 input_shape=input_spatial_shape, 613 base_paddings=base_paddings, 614 block_shape=self.dilation_rate) 615 616 dilation_rate = _with_space_to_batch_adjust(self.dilation_rate, 1, 617 spatial_dims) 618 paddings = _with_space_to_batch_adjust(paddings, 0, spatial_dims) 619 crops = _with_space_to_batch_adjust(crops, 0, spatial_dims) 620 input_converted = array_ops.space_to_batch_nd( 621 input=inp, block_shape=dilation_rate, paddings=paddings) 622 623 result = self.op(input_converted, filter) 624 625 result_converted = array_ops.batch_to_space_nd( 626 input=result, block_shape=dilation_rate, crops=crops) 627 628 # Recover channel information for output shape if channels are not last. 629 if self.data_format is not None and self.data_format.startswith("NC"): 630 if not result_converted.shape.dims[1].value and filter is not None: 631 output_shape = result_converted.shape.as_list() 632 output_shape[1] = filter.shape[-1] 633 result_converted.set_shape(output_shape) 634 635 return result_converted 636 637 def __call__(self, inp, filter): # pylint: disable=redefined-builtin 638 return self.call(inp, filter) 639 640 641def _with_space_to_batch_base_paddings(filter_shape, num_spatial_dims, 642 rate_or_const_rate): 643 """Helper function to compute base_paddings.""" 644 # Spatial dimensions of the filters and the upsampled filters in which we 645 # introduce (rate - 1) zeros between consecutive filter values. 646 filter_spatial_shape = filter_shape[:num_spatial_dims] 647 dilated_filter_spatial_shape = ( 648 filter_spatial_shape + (filter_spatial_shape - 1) * 649 (rate_or_const_rate - 1)) 650 pad_extra_shape = dilated_filter_spatial_shape - 1 651 652 # When full_padding_shape is odd, we pad more at end, following the same 653 # convention as conv2d. 654 pad_extra_start = pad_extra_shape // 2 655 pad_extra_end = pad_extra_shape - pad_extra_start 656 base_paddings = array_ops.stack( 657 [[pad_extra_start[i], pad_extra_end[i]] for i in range(num_spatial_dims)]) 658 return base_paddings 659 660 661def _with_space_to_batch_adjust(orig, fill_value, spatial_dims): 662 """Returns an `adjusted` version of `orig` based on `spatial_dims`. 663 664 Tensor of the same type as `orig` and with shape 665 `[max(spatial_dims), ...]` where: 666 667 adjusted[spatial_dims[i] - 1, ...] = orig[i, ...] 668 669 for 0 <= i < len(spatial_dims), and 670 671 adjusted[j, ...] = fill_value 672 673 for j != spatial_dims[i] - 1 for some i. 674 675 If `orig` is a constant value, then the result will be a constant value. 676 677 Args: 678 orig: Tensor of rank > max(spatial_dims). 679 fill_value: Numpy scalar (of same data type as `orig) specifying the fill 680 value for non-spatial dimensions. 681 spatial_dims: See with_space_to_batch. 682 683 Returns: 684 `adjusted` tensor. 685 """ 686 fill_dims = orig.get_shape().as_list()[1:] 687 dtype = orig.dtype.as_numpy_dtype 688 parts = [] 689 const_orig = tensor_util.constant_value(orig) 690 const_or_orig = const_orig if const_orig is not None else orig 691 prev_spatial_dim = 0 692 i = 0 693 while i < len(spatial_dims): 694 start_i = i 695 start_spatial_dim = spatial_dims[i] 696 if start_spatial_dim > 1: 697 # Fill in any gap from the previous spatial dimension (or dimension 1 if 698 # this is the first spatial dimension) with `fill_value`. 699 parts.append( 700 np.full( 701 [start_spatial_dim - 1 - prev_spatial_dim] + fill_dims, 702 fill_value, 703 dtype=dtype)) 704 # Find the largest value of i such that: 705 # [spatial_dims[start_i], ..., spatial_dims[i]] 706 # == [start_spatial_dim, ..., start_spatial_dim + i - start_i], 707 # i.e. the end of a contiguous group of spatial dimensions. 708 while (i + 1 < len(spatial_dims) and 709 spatial_dims[i + 1] == spatial_dims[i] + 1): 710 i += 1 711 parts.append(const_or_orig[start_i:i + 1]) 712 prev_spatial_dim = spatial_dims[i] 713 i += 1 714 if const_orig is not None: 715 return np.concatenate(parts) 716 else: 717 return array_ops.concat(parts, 0) 718 719 720def _get_strides_and_dilation_rate(num_spatial_dims, strides, dilation_rate): 721 """Helper function for verifying strides and dilation_rate arguments. 722 723 This is used by `convolution` and `pool`. 724 725 Args: 726 num_spatial_dims: int 727 strides: Optional. List of N ints >= 1. Defaults to [1]*N. If any value 728 of strides is > 1, then all values of dilation_rate must be 1. 729 dilation_rate: Optional. List of N ints >= 1. Defaults to [1]*N. If any 730 value of dilation_rate is > 1, then all values of strides must be 1. 731 732 Returns: 733 Normalized (strides, dilation_rate) as int32 numpy arrays of shape 734 [num_spatial_dims]. 735 736 Raises: 737 ValueError: if the parameters are invalid. 738 """ 739 if dilation_rate is None: 740 dilation_rate = [1] * num_spatial_dims 741 elif len(dilation_rate) != num_spatial_dims: 742 raise ValueError("len(dilation_rate)=%d but should be %d" % 743 (len(dilation_rate), num_spatial_dims)) 744 dilation_rate = np.array(dilation_rate, dtype=np.int32) 745 if np.any(dilation_rate < 1): 746 raise ValueError("all values of dilation_rate must be positive") 747 748 if strides is None: 749 strides = [1] * num_spatial_dims 750 elif len(strides) != num_spatial_dims: 751 raise ValueError("len(strides)=%d but should be %d" % (len(strides), 752 num_spatial_dims)) 753 strides = np.array(strides, dtype=np.int32) 754 if np.any(strides < 1): 755 raise ValueError("all values of strides must be positive") 756 757 if np.any(strides > 1) and np.any(dilation_rate > 1): 758 raise ValueError( 759 "strides > 1 not supported in conjunction with dilation_rate > 1") 760 return strides, dilation_rate 761 762 763@tf_export(v1=["nn.convolution"]) 764def convolution( 765 input, # pylint: disable=redefined-builtin 766 filter, # pylint: disable=redefined-builtin 767 padding, 768 strides=None, 769 dilation_rate=None, 770 name=None, 771 data_format=None, 772 filters=None, 773 dilations=None): 774 """Computes sums of N-D convolutions (actually cross-correlation). 775 776 This also supports either output striding via the optional `strides` parameter 777 or atrous convolution (also known as convolution with holes or dilated 778 convolution, based on the French word "trous" meaning holes in English) via 779 the optional `dilation_rate` parameter. Currently, however, output striding 780 is not supported for atrous convolutions. 781 782 Specifically, in the case that `data_format` does not start with "NC", given 783 a rank (N+2) `input` Tensor of shape 784 785 [num_batches, 786 input_spatial_shape[0], 787 ..., 788 input_spatial_shape[N-1], 789 num_input_channels], 790 791 a rank (N+2) `filter` Tensor of shape 792 793 [spatial_filter_shape[0], 794 ..., 795 spatial_filter_shape[N-1], 796 num_input_channels, 797 num_output_channels], 798 799 an optional `dilation_rate` tensor of shape [N] (defaulting to [1]*N) 800 specifying the filter upsampling/input downsampling rate, and an optional list 801 of N `strides` (defaulting [1]*N), this computes for each N-D spatial output 802 position (x[0], ..., x[N-1]): 803 804 ``` 805 output[b, x[0], ..., x[N-1], k] = 806 sum_{z[0], ..., z[N-1], q} 807 filter[z[0], ..., z[N-1], q, k] * 808 padded_input[b, 809 x[0]*strides[0] + dilation_rate[0]*z[0], 810 ..., 811 x[N-1]*strides[N-1] + dilation_rate[N-1]*z[N-1], 812 q] 813 ``` 814 where b is the index into the batch, k is the output channel number, q is the 815 input channel number, and z is the N-D spatial offset within the filter. Here, 816 `padded_input` is obtained by zero padding the input using an effective 817 spatial filter shape of `(spatial_filter_shape-1) * dilation_rate + 1` and 818 output striding `strides` as described in the 819 [comment here](https://tensorflow.org/api_guides/python/nn#Convolution). 820 821 In the case that `data_format` does start with `"NC"`, the `input` and output 822 (but not the `filter`) are simply transposed as follows: 823 824 convolution(input, data_format, **kwargs) = 825 tf.transpose(convolution(tf.transpose(input, [0] + range(2,N+2) + [1]), 826 **kwargs), 827 [0, N+1] + range(1, N+1)) 828 829 It is required that 1 <= N <= 3. 830 831 Args: 832 input: An (N+2)-D `Tensor` of type `T`, of shape 833 `[batch_size] + input_spatial_shape + [in_channels]` if data_format does 834 not start with "NC" (default), or 835 `[batch_size, in_channels] + input_spatial_shape` if data_format starts 836 with "NC". 837 filter: An (N+2)-D `Tensor` with the same type as `input` and shape 838 `spatial_filter_shape + [in_channels, out_channels]`. 839 padding: A string, either `"VALID"` or `"SAME"`. The padding algorithm. 840 strides: Optional. Sequence of N ints >= 1. Specifies the output stride. 841 Defaults to [1]*N. If any value of strides is > 1, then all values of 842 dilation_rate must be 1. 843 dilation_rate: Optional. Sequence of N ints >= 1. Specifies the filter 844 upsampling/input downsampling rate. In the literature, the same parameter 845 is sometimes called `input stride` or `dilation`. The effective filter 846 size used for the convolution will be `spatial_filter_shape + 847 (spatial_filter_shape - 1) * (rate - 1)`, obtained by inserting 848 (dilation_rate[i]-1) zeros between consecutive elements of the original 849 filter in each spatial dimension i. If any value of dilation_rate is > 1, 850 then all values of strides must be 1. 851 name: Optional name for the returned tensor. 852 data_format: A string or None. Specifies whether the channel dimension of 853 the `input` and output is the last dimension (default, or if `data_format` 854 does not start with "NC"), or the second dimension (if `data_format` 855 starts with "NC"). For N=1, the valid values are "NWC" (default) and 856 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 857 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 858 filters: Alias of filter. 859 dilations: Alias of dilation_rate. 860 861 Returns: 862 A `Tensor` with the same type as `input` of shape 863 864 `[batch_size] + output_spatial_shape + [out_channels]` 865 866 if data_format is None or does not start with "NC", or 867 868 `[batch_size, out_channels] + output_spatial_shape` 869 870 if data_format starts with "NC", 871 where `output_spatial_shape` depends on the value of `padding`. 872 873 If padding == "SAME": 874 output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i]) 875 876 If padding == "VALID": 877 output_spatial_shape[i] = 878 ceil((input_spatial_shape[i] - 879 (spatial_filter_shape[i]-1) * dilation_rate[i]) 880 / strides[i]). 881 882 Raises: 883 ValueError: If input/output depth does not match `filter` shape, if padding 884 is other than `"VALID"` or `"SAME"`, or if data_format is invalid. 885 886 """ 887 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 888 dilation_rate = deprecated_argument_lookup( 889 "dilations", dilations, "dilation_rate", dilation_rate) 890 return convolution_internal( 891 input, 892 filter, 893 strides=strides, 894 padding=padding, 895 data_format=data_format, 896 dilations=dilation_rate, 897 name=name) 898 899 900@tf_export("nn.convolution", v1=[]) 901def convolution_v2( 902 input, # pylint: disable=redefined-builtin 903 filters, 904 strides=None, 905 padding="VALID", 906 data_format=None, 907 dilations=None, 908 name=None): 909 return convolution_internal( 910 input, # pylint: disable=redefined-builtin 911 filters, 912 strides=strides, 913 padding=padding, 914 data_format=data_format, 915 dilations=dilations, 916 name=name) 917 918 919convolution_v2.__doc__ = deprecation.rewrite_argument_docstring( 920 deprecation.rewrite_argument_docstring( 921 convolution.__doc__, "dilation_rate", "dilations"), 922 "filter", "filters") 923 924 925def convolution_internal( 926 input, # pylint: disable=redefined-builtin 927 filters, 928 strides=None, 929 padding="VALID", 930 data_format=None, 931 dilations=None, 932 name=None, 933 call_from_convolution=True): 934 """Internal function which performs rank agnostic convolution.""" 935 if isinstance(input.shape, tensor_shape.TensorShape) and \ 936 input.shape.rank is not None: 937 n = len(input.shape) - 2 938 elif not isinstance(input.shape, tensor_shape.TensorShape) and \ 939 input.shape is not None: 940 n = len(input.shape) - 2 941 elif isinstance(filters.shape, tensor_shape.TensorShape) and \ 942 filters.shape.rank is not None: 943 n = len(filters.shape) - 2 944 elif not isinstance(filters.shape, tensor_shape.TensorShape) and \ 945 filters.shape is not None: 946 n = len(filters.shape) - 2 947 else: 948 raise ValueError("rank of input or filter must be known") 949 950 if not 1 <= n <= 3: 951 raise ValueError( 952 "Input tensor must be of rank 3, 4 or 5 but was {}.".format(n + 2)) 953 954 if data_format is None: 955 channel_index = n + 1 956 else: 957 channel_index = 1 if data_format.startswith("NC") else n + 1 958 959 strides = _get_sequence(strides, n, channel_index, "strides") 960 dilations = _get_sequence(dilations, n, channel_index, "dilations") 961 962 scopes = {1: "conv1d", 2: "Conv2D", 3: "Conv3D"} 963 if not call_from_convolution and device_context.enclosing_tpu_context( 964 ) is not None: 965 scope = scopes[n] 966 else: 967 scope = "convolution" 968 969 with ops.name_scope(name, scope, [input, filters]) as name: 970 conv_ops = {1: conv1d, 2: gen_nn_ops.conv2d, 3: gen_nn_ops.conv3d} 971 972 if device_context.enclosing_tpu_context() is not None or all( 973 i == 1 for i in dilations): 974 # fast path for TPU or if no dilation as gradient only supported on GPU 975 # for dilations 976 op = conv_ops[n] 977 return op( 978 input, 979 filters, 980 strides, 981 padding=padding, 982 data_format=data_format, 983 dilations=dilations, 984 name=name) 985 else: 986 if channel_index == 1: 987 strides = strides[2:] 988 dilations = dilations[2:] 989 else: 990 strides = strides[1:-1] 991 dilations = dilations[1:-1] 992 993 op = Convolution( 994 tensor_shape.as_shape(input.shape), 995 tensor_shape.as_shape(filters.shape), 996 padding, 997 strides=strides, 998 dilation_rate=dilations, 999 name=name, 1000 data_format=data_format) 1001 return op(input, filters) 1002 1003 1004class Convolution(object): 1005 """Helper class for convolution. 1006 1007 Note that this class assumes that shapes of input and filter passed to 1008 __call__ are compatible with input_shape and filter_shape passed to the 1009 constructor. 1010 1011 Arguments 1012 input_shape: static shape of input. i.e. input.get_shape(). 1013 filter_shape: static shape of the filter. i.e. filter.get_shape(). 1014 padding: see convolution. 1015 strides: see convolution. 1016 dilation_rate: see convolution. 1017 name: see convolution. 1018 data_format: see convolution. 1019 """ 1020 1021 def __init__(self, 1022 input_shape, 1023 filter_shape, 1024 padding, 1025 strides=None, 1026 dilation_rate=None, 1027 name=None, 1028 data_format=None): 1029 """Helper function for convolution.""" 1030 num_total_dims = filter_shape.ndims 1031 if num_total_dims is None: 1032 num_total_dims = input_shape.ndims 1033 if num_total_dims is None: 1034 raise ValueError("rank of input or filter must be known") 1035 1036 num_spatial_dims = num_total_dims - 2 1037 1038 try: 1039 input_shape.with_rank(num_spatial_dims + 2) 1040 except ValueError: 1041 raise ValueError( 1042 "input tensor must have rank %d" % (num_spatial_dims + 2)) 1043 1044 try: 1045 filter_shape.with_rank(num_spatial_dims + 2) 1046 except ValueError: 1047 raise ValueError( 1048 "filter tensor must have rank %d" % (num_spatial_dims + 2)) 1049 1050 if data_format is None or not data_format.startswith("NC"): 1051 input_channels_dim = tensor_shape.dimension_at_index( 1052 input_shape, num_spatial_dims + 1) 1053 spatial_dims = range(1, num_spatial_dims + 1) 1054 else: 1055 input_channels_dim = tensor_shape.dimension_at_index(input_shape, 1) 1056 spatial_dims = range(2, num_spatial_dims + 2) 1057 1058 if not input_channels_dim.is_compatible_with( 1059 filter_shape[num_spatial_dims]): 1060 raise ValueError( 1061 "number of input channels does not match corresponding dimension of " 1062 "filter, {} != {}".format(input_channels_dim, 1063 filter_shape[num_spatial_dims])) 1064 1065 strides, dilation_rate = _get_strides_and_dilation_rate( 1066 num_spatial_dims, strides, dilation_rate) 1067 1068 self.input_shape = input_shape 1069 self.filter_shape = filter_shape 1070 self.data_format = data_format 1071 self.strides = strides 1072 self.padding = padding 1073 self.name = name 1074 self.dilation_rate = dilation_rate 1075 self.conv_op = _WithSpaceToBatch( 1076 input_shape, 1077 dilation_rate=dilation_rate, 1078 padding=padding, 1079 build_op=self._build_op, 1080 filter_shape=filter_shape, 1081 spatial_dims=spatial_dims, 1082 data_format=data_format) 1083 1084 def _build_op(self, _, padding): 1085 return _NonAtrousConvolution( 1086 self.input_shape, 1087 filter_shape=self.filter_shape, 1088 padding=padding, 1089 data_format=self.data_format, 1090 strides=self.strides, 1091 name=self.name) 1092 1093 def __call__(self, inp, filter): # pylint: disable=redefined-builtin 1094 # TPU convolution supports dilations greater than 1. 1095 if device_context.enclosing_tpu_context() is not None: 1096 return convolution_internal( 1097 inp, 1098 filter, 1099 strides=self.strides, 1100 padding=self.padding, 1101 data_format=self.data_format, 1102 dilations=self.dilation_rate, 1103 name=self.name, 1104 call_from_convolution=False) 1105 else: 1106 return self.conv_op(inp, filter) 1107 1108 1109@tf_export(v1=["nn.pool"]) 1110def pool( 1111 input, # pylint: disable=redefined-builtin 1112 window_shape, 1113 pooling_type, 1114 padding, 1115 dilation_rate=None, 1116 strides=None, 1117 name=None, 1118 data_format=None, 1119 dilations=None): 1120 """Performs an N-D pooling operation. 1121 1122 In the case that `data_format` does not start with "NC", computes for 1123 0 <= b < batch_size, 1124 0 <= x[i] < output_spatial_shape[i], 1125 0 <= c < num_channels: 1126 1127 ``` 1128 output[b, x[0], ..., x[N-1], c] = 1129 REDUCE_{z[0], ..., z[N-1]} 1130 input[b, 1131 x[0] * strides[0] - pad_before[0] + dilation_rate[0]*z[0], 1132 ... 1133 x[N-1]*strides[N-1] - pad_before[N-1] + dilation_rate[N-1]*z[N-1], 1134 c], 1135 ``` 1136 1137 where the reduction function REDUCE depends on the value of `pooling_type`, 1138 and pad_before is defined based on the value of `padding` as described in 1139 the "returns" section of `tf.nn.convolution` for details. 1140 The reduction never includes out-of-bounds positions. 1141 1142 In the case that `data_format` starts with `"NC"`, the `input` and output are 1143 simply transposed as follows: 1144 1145 ``` 1146 pool(input, data_format, **kwargs) = 1147 tf.transpose(pool(tf.transpose(input, [0] + range(2,N+2) + [1]), 1148 **kwargs), 1149 [0, N+1] + range(1, N+1)) 1150 ``` 1151 1152 Args: 1153 input: Tensor of rank N+2, of shape 1154 `[batch_size] + input_spatial_shape + [num_channels]` if data_format does 1155 not start with "NC" (default), or 1156 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 1157 with "NC". Pooling happens over the spatial dimensions only. 1158 window_shape: Sequence of N ints >= 1. 1159 pooling_type: Specifies pooling operation, must be "AVG" or "MAX". 1160 padding: The padding algorithm, must be "SAME" or "VALID". 1161 See the "returns" section of `tf.nn.convolution` for details. 1162 dilation_rate: Optional. Dilation rate. List of N ints >= 1. 1163 Defaults to [1]*N. If any value of dilation_rate is > 1, then all values 1164 of strides must be 1. 1165 strides: Optional. Sequence of N ints >= 1. Defaults to [1]*N. 1166 If any value of strides is > 1, then all values of dilation_rate must be 1167 1. 1168 name: Optional. Name of the op. 1169 data_format: A string or None. Specifies whether the channel dimension of 1170 the `input` and output is the last dimension (default, or if `data_format` 1171 does not start with "NC"), or the second dimension (if `data_format` 1172 starts with "NC"). For N=1, the valid values are "NWC" (default) and 1173 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 1174 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 1175 dilations: Alias for dilation_rate 1176 1177 Returns: 1178 Tensor of rank N+2, of shape 1179 [batch_size] + output_spatial_shape + [num_channels] 1180 1181 if data_format is None or does not start with "NC", or 1182 1183 [batch_size, num_channels] + output_spatial_shape 1184 1185 if data_format starts with "NC", 1186 where `output_spatial_shape` depends on the value of padding: 1187 1188 If padding = "SAME": 1189 output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i]) 1190 1191 If padding = "VALID": 1192 output_spatial_shape[i] = 1193 ceil((input_spatial_shape[i] - (window_shape[i] - 1) * dilation_rate[i]) 1194 / strides[i]). 1195 1196 Raises: 1197 ValueError: if arguments are invalid. 1198 1199 """ 1200 dilation_rate = deprecated_argument_lookup( 1201 "dilations", dilations, "dilation_rate", dilation_rate) 1202 # pylint: enable=line-too-long 1203 with ops.name_scope(name, "%s_pool" % (pooling_type.lower()), 1204 [input]) as scope: 1205 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 1206 1207 num_spatial_dims = len(window_shape) 1208 if num_spatial_dims < 1 or num_spatial_dims > 3: 1209 raise ValueError("It is required that 1 <= num_spatial_dims <= 3.") 1210 1211 input.get_shape().with_rank(num_spatial_dims + 2) 1212 1213 strides, dilation_rate = _get_strides_and_dilation_rate( 1214 num_spatial_dims, strides, dilation_rate) 1215 1216 if padding == "SAME" and np.any(dilation_rate > 1): 1217 raise ValueError( 1218 "pooling with SAME padding is not implemented for dilation_rate > 1") 1219 1220 if np.any(strides > window_shape): 1221 raise ValueError( 1222 "strides > window_shape not supported due to inconsistency between " 1223 "CPU and GPU implementations") 1224 1225 pooling_ops = { 1226 ("MAX", 1): max_pool, 1227 ("MAX", 2): max_pool, 1228 ("MAX", 3): max_pool3d, # pylint: disable=undefined-variable 1229 ("AVG", 1): avg_pool, 1230 ("AVG", 2): avg_pool, 1231 ("AVG", 3): avg_pool3d, # pylint: disable=undefined-variable 1232 } 1233 op_key = (pooling_type, num_spatial_dims) 1234 if op_key not in pooling_ops: 1235 raise ValueError("%d-D %s pooling is not supported." % (op_key[1], 1236 op_key[0])) 1237 1238 if data_format is None or not data_format.startswith("NC"): 1239 adjusted_window_shape = [1] + list(window_shape) + [1] 1240 adjusted_strides = [1] + list(strides) + [1] 1241 spatial_dims = range(1, num_spatial_dims + 1) 1242 else: 1243 adjusted_window_shape = [1, 1] + list(window_shape) 1244 adjusted_strides = [1, 1] + list(strides) 1245 spatial_dims = range(2, num_spatial_dims + 2) 1246 1247 if num_spatial_dims == 1: 1248 if data_format is None or data_format == "NWC": 1249 data_format_kwargs = dict(data_format="NHWC") 1250 elif data_format == "NCW": 1251 data_format_kwargs = dict(data_format="NCHW") 1252 else: 1253 raise ValueError("data_format must be either \"NWC\" or \"NCW\".") 1254 adjusted_window_shape = [1] + adjusted_window_shape 1255 adjusted_strides = [1] + adjusted_strides 1256 else: 1257 data_format_kwargs = dict(data_format=data_format) 1258 1259 def op(converted_input, _, converted_padding): # pylint: disable=missing-docstring 1260 if num_spatial_dims == 1: 1261 converted_input = array_ops.expand_dims(converted_input, 1262 spatial_dims[0]) 1263 result = pooling_ops[op_key]( 1264 converted_input, 1265 adjusted_window_shape, 1266 adjusted_strides, 1267 converted_padding, 1268 name=scope, 1269 **data_format_kwargs) 1270 if num_spatial_dims == 1: 1271 result = array_ops.squeeze(result, [spatial_dims[0]]) 1272 return result 1273 1274 return with_space_to_batch( 1275 input=input, 1276 dilation_rate=dilation_rate, 1277 padding=padding, 1278 op=op, 1279 spatial_dims=spatial_dims, 1280 filter_shape=window_shape) 1281 1282 1283@tf_export("nn.pool", v1=[]) 1284def pool_v2( 1285 input, # pylint: disable=redefined-builtin 1286 window_shape, 1287 pooling_type, 1288 strides=None, 1289 padding="VALID", 1290 data_format=None, 1291 dilations=None, 1292 name=None): 1293 # pylint: disable=line-too-long 1294 """Performs an N-D pooling operation. 1295 1296 In the case that `data_format` does not start with "NC", computes for 1297 0 <= b < batch_size, 1298 0 <= x[i] < output_spatial_shape[i], 1299 0 <= c < num_channels: 1300 1301 ``` 1302 output[b, x[0], ..., x[N-1], c] = 1303 REDUCE_{z[0], ..., z[N-1]} 1304 input[b, 1305 x[0] * strides[0] - pad_before[0] + dilation_rate[0]*z[0], 1306 ... 1307 x[N-1]*strides[N-1] - pad_before[N-1] + dilation_rate[N-1]*z[N-1], 1308 c], 1309 ``` 1310 1311 where the reduction function REDUCE depends on the value of `pooling_type`, 1312 and pad_before is defined based on the value of `padding` as described in 1313 the "returns" section of `tf.nn.convolution` for details. 1314 The reduction never includes out-of-bounds positions. 1315 1316 In the case that `data_format` starts with `"NC"`, the `input` and output are 1317 simply transposed as follows: 1318 1319 ``` 1320 pool(input, data_format, **kwargs) = 1321 tf.transpose(pool(tf.transpose(input, [0] + range(2,N+2) + [1]), 1322 **kwargs), 1323 [0, N+1] + range(1, N+1)) 1324 ``` 1325 1326 Args: 1327 input: Tensor of rank N+2, of shape `[batch_size] + input_spatial_shape + 1328 [num_channels]` if data_format does not start with "NC" (default), or 1329 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 1330 with "NC". Pooling happens over the spatial dimensions only. 1331 window_shape: Sequence of N ints >= 1. 1332 pooling_type: Specifies pooling operation, must be "AVG" or "MAX". 1333 strides: Optional. Sequence of N ints >= 1. Defaults to [1]*N. If any value of 1334 strides is > 1, then all values of dilation_rate must be 1. 1335 padding: The padding algorithm, must be "SAME" or "VALID". Defaults to "SAME". 1336 See the "returns" section of `tf.nn.convolution` for details. 1337 data_format: A string or None. Specifies whether the channel dimension of 1338 the `input` and output is the last dimension (default, or if `data_format` 1339 does not start with "NC"), or the second dimension (if `data_format` 1340 starts with "NC"). For N=1, the valid values are "NWC" (default) and 1341 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". For 1342 N=3, the valid values are "NDHWC" (default) and "NCDHW". 1343 dilations: Optional. Dilation rate. List of N ints >= 1. Defaults to 1344 [1]*N. If any value of dilation_rate is > 1, then all values of strides 1345 must be 1. 1346 name: Optional. Name of the op. 1347 1348 Returns: 1349 Tensor of rank N+2, of shape 1350 [batch_size] + output_spatial_shape + [num_channels] 1351 1352 if data_format is None or does not start with "NC", or 1353 1354 [batch_size, num_channels] + output_spatial_shape 1355 1356 if data_format starts with "NC", 1357 where `output_spatial_shape` depends on the value of padding: 1358 1359 If padding = "SAME": 1360 output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i]) 1361 1362 If padding = "VALID": 1363 output_spatial_shape[i] = 1364 ceil((input_spatial_shape[i] - (window_shape[i] - 1) * dilation_rate[i]) 1365 / strides[i]). 1366 1367 Raises: 1368 ValueError: if arguments are invalid. 1369 1370 """ 1371 return pool( 1372 input=input, 1373 window_shape=window_shape, 1374 pooling_type=pooling_type, 1375 padding=padding, 1376 dilation_rate=dilations, 1377 strides=strides, 1378 name=name, 1379 data_format=data_format) 1380 1381 1382@tf_export("nn.atrous_conv2d") 1383def atrous_conv2d(value, filters, rate, padding, name=None): 1384 """Atrous convolution (a.k.a. convolution with holes or dilated convolution). 1385 1386 This function is a simpler wrapper around the more general 1387 `tf.nn.convolution`, and exists only for backwards compatibility. You can 1388 use `tf.nn.convolution` to perform 1-D, 2-D, or 3-D atrous convolution. 1389 1390 1391 Computes a 2-D atrous convolution, also known as convolution with holes or 1392 dilated convolution, given 4-D `value` and `filters` tensors. If the `rate` 1393 parameter is equal to one, it performs regular 2-D convolution. If the `rate` 1394 parameter is greater than one, it performs convolution with holes, sampling 1395 the input values every `rate` pixels in the `height` and `width` dimensions. 1396 This is equivalent to convolving the input with a set of upsampled filters, 1397 produced by inserting `rate - 1` zeros between two consecutive values of the 1398 filters along the `height` and `width` dimensions, hence the name atrous 1399 convolution or convolution with holes (the French word trous means holes in 1400 English). 1401 1402 More specifically: 1403 1404 ``` 1405 output[batch, height, width, out_channel] = 1406 sum_{dheight, dwidth, in_channel} ( 1407 filters[dheight, dwidth, in_channel, out_channel] * 1408 value[batch, height + rate*dheight, width + rate*dwidth, in_channel] 1409 ) 1410 ``` 1411 1412 Atrous convolution allows us to explicitly control how densely to compute 1413 feature responses in fully convolutional networks. Used in conjunction with 1414 bilinear interpolation, it offers an alternative to `conv2d_transpose` in 1415 dense prediction tasks such as semantic image segmentation, optical flow 1416 computation, or depth estimation. It also allows us to effectively enlarge 1417 the field of view of filters without increasing the number of parameters or 1418 the amount of computation. 1419 1420 For a description of atrous convolution and how it can be used for dense 1421 feature extraction, please see: (Chen et al., 2015). The same operation is 1422 investigated further in (Yu et al., 2016). Previous works that effectively 1423 use atrous convolution in different ways are, among others, 1424 (Sermanet et al., 2014) and (Giusti et al., 2013). 1425 Atrous convolution is also closely related to the so-called noble identities 1426 in multi-rate signal processing. 1427 1428 There are many different ways to implement atrous convolution (see the refs 1429 above). The implementation here reduces 1430 1431 ```python 1432 atrous_conv2d(value, filters, rate, padding=padding) 1433 ``` 1434 1435 to the following three operations: 1436 1437 ```python 1438 paddings = ... 1439 net = space_to_batch(value, paddings, block_size=rate) 1440 net = conv2d(net, filters, strides=[1, 1, 1, 1], padding="VALID") 1441 crops = ... 1442 net = batch_to_space(net, crops, block_size=rate) 1443 ``` 1444 1445 Advanced usage. Note the following optimization: A sequence of `atrous_conv2d` 1446 operations with identical `rate` parameters, 'SAME' `padding`, and filters 1447 with odd heights/ widths: 1448 1449 ```python 1450 net = atrous_conv2d(net, filters1, rate, padding="SAME") 1451 net = atrous_conv2d(net, filters2, rate, padding="SAME") 1452 ... 1453 net = atrous_conv2d(net, filtersK, rate, padding="SAME") 1454 ``` 1455 1456 can be equivalently performed cheaper in terms of computation and memory as: 1457 1458 ```python 1459 pad = ... # padding so that the input dims are multiples of rate 1460 net = space_to_batch(net, paddings=pad, block_size=rate) 1461 net = conv2d(net, filters1, strides=[1, 1, 1, 1], padding="SAME") 1462 net = conv2d(net, filters2, strides=[1, 1, 1, 1], padding="SAME") 1463 ... 1464 net = conv2d(net, filtersK, strides=[1, 1, 1, 1], padding="SAME") 1465 net = batch_to_space(net, crops=pad, block_size=rate) 1466 ``` 1467 1468 because a pair of consecutive `space_to_batch` and `batch_to_space` ops with 1469 the same `block_size` cancel out when their respective `paddings` and `crops` 1470 inputs are identical. 1471 1472 Args: 1473 value: A 4-D `Tensor` of type `float`. It needs to be in the default "NHWC" 1474 format. Its shape is `[batch, in_height, in_width, in_channels]`. 1475 filters: A 4-D `Tensor` with the same type as `value` and shape 1476 `[filter_height, filter_width, in_channels, out_channels]`. `filters`' 1477 `in_channels` dimension must match that of `value`. Atrous convolution is 1478 equivalent to standard convolution with upsampled filters with effective 1479 height `filter_height + (filter_height - 1) * (rate - 1)` and effective 1480 width `filter_width + (filter_width - 1) * (rate - 1)`, produced by 1481 inserting `rate - 1` zeros along consecutive elements across the 1482 `filters`' spatial dimensions. 1483 rate: A positive int32. The stride with which we sample input values across 1484 the `height` and `width` dimensions. Equivalently, the rate by which we 1485 upsample the filter values by inserting zeros across the `height` and 1486 `width` dimensions. In the literature, the same parameter is sometimes 1487 called `input stride` or `dilation`. 1488 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 1489 name: Optional name for the returned tensor. 1490 1491 Returns: 1492 A `Tensor` with the same type as `value`. 1493 Output shape with `'VALID'` padding is: 1494 1495 [batch, height - 2 * (filter_width - 1), 1496 width - 2 * (filter_height - 1), out_channels]. 1497 1498 Output shape with `'SAME'` padding is: 1499 1500 [batch, height, width, out_channels]. 1501 1502 Raises: 1503 ValueError: If input/output depth does not match `filters`' shape, or if 1504 padding is other than `'VALID'` or `'SAME'`. 1505 1506 References: 1507 Multi-Scale Context Aggregation by Dilated Convolutions: 1508 [Yu et al., 2016](https://arxiv.org/abs/1511.07122) 1509 ([pdf](https://arxiv.org/pdf/1511.07122.pdf)) 1510 Semantic Image Segmentation with Deep Convolutional Nets and Fully 1511 Connected CRFs: 1512 [Chen et al., 2015](http://arxiv.org/abs/1412.7062) 1513 ([pdf](https://arxiv.org/pdf/1412.7062)) 1514 OverFeat - Integrated Recognition, Localization and Detection using 1515 Convolutional Networks: 1516 [Sermanet et al., 2014](https://arxiv.org/abs/1312.6229) 1517 ([pdf](https://arxiv.org/pdf/1312.6229.pdf)) 1518 Fast Image Scanning with Deep Max-Pooling Convolutional Neural Networks: 1519 [Giusti et al., 2013] 1520 (https://ieeexplore.ieee.org/abstract/document/6738831) 1521 ([pdf](https://arxiv.org/pdf/1302.1700.pdf)) 1522 """ 1523 return convolution( 1524 input=value, 1525 filter=filters, 1526 padding=padding, 1527 dilation_rate=np.broadcast_to(rate, (2,)), 1528 name=name) 1529 1530 1531def _convert_padding(padding): 1532 """Converts Python padding to C++ padding for ops which take EXPLICIT padding. 1533 1534 Args: 1535 padding: the `padding` argument for a Python op which supports EXPLICIT 1536 padding. 1537 1538 Returns: 1539 (padding, explicit_paddings) pair, which should be passed as attributes to a 1540 C++ op. 1541 1542 Raises: 1543 ValueError: If padding is invalid. 1544 """ 1545 explicit_paddings = [] 1546 if padding == "EXPLICIT": 1547 # Give a better error message if EXPLICIT is passed. 1548 raise ValueError('"EXPLICIT" is not a valid value for the padding ' 1549 "parameter. To use explicit padding, the padding " 1550 "parameter must be a list.") 1551 if isinstance(padding, (list, tuple)): 1552 for i, dim_paddings in enumerate(padding): 1553 if not isinstance(dim_paddings, (list, tuple)): 1554 raise ValueError("When padding is a list, each element of padding must " 1555 "be a list/tuple of size 2. Element with index %d of " 1556 "padding is not a list/tuple" % i) 1557 if len(dim_paddings) != 2: 1558 raise ValueError("When padding is a list, each element of padding must " 1559 "be a list/tuple of size 2. Element with index %d of " 1560 "padding has size %d" % (i, len(dim_paddings))) 1561 explicit_paddings.extend(dim_paddings) 1562 if len(padding) != 4: 1563 raise ValueError("When padding is a list, it must be of size 4. Got " 1564 "padding of size: %d" % len(padding)) 1565 padding = "EXPLICIT" 1566 return padding, explicit_paddings 1567 1568 1569@tf_export(v1=["nn.conv1d"]) 1570@deprecation.deprecated_arg_values( 1571 None, 1572 "`NCHW` for data_format is deprecated, use `NCW` instead", 1573 warn_once=True, 1574 data_format="NCHW") 1575@deprecation.deprecated_arg_values( 1576 None, 1577 "`NHWC` for data_format is deprecated, use `NWC` instead", 1578 warn_once=True, 1579 data_format="NHWC") 1580def conv1d( 1581 value=None, 1582 filters=None, 1583 stride=None, 1584 padding=None, 1585 use_cudnn_on_gpu=None, 1586 data_format=None, 1587 name=None, 1588 input=None, # pylint: disable=redefined-builtin 1589 dilations=None): 1590 r"""Computes a 1-D convolution given 3-D input and filter tensors. 1591 1592 Given an input tensor of shape 1593 [batch, in_width, in_channels] 1594 if data_format is "NWC", or 1595 [batch, in_channels, in_width] 1596 if data_format is "NCW", 1597 and a filter / kernel tensor of shape 1598 [filter_width, in_channels, out_channels], this op reshapes 1599 the arguments to pass them to conv2d to perform the equivalent 1600 convolution operation. 1601 1602 Internally, this op reshapes the input tensors and invokes `tf.nn.conv2d`. 1603 For example, if `data_format` does not start with "NC", a tensor of shape 1604 [batch, in_width, in_channels] 1605 is reshaped to 1606 [batch, 1, in_width, in_channels], 1607 and the filter is reshaped to 1608 [1, filter_width, in_channels, out_channels]. 1609 The result is then reshaped back to 1610 [batch, out_width, out_channels] 1611 \(where out_width is a function of the stride and padding as in conv2d\) and 1612 returned to the caller. 1613 1614 Args: 1615 value: A 3D `Tensor`. Must be of type `float16`, `float32`, or `float64`. 1616 filters: A 3D `Tensor`. Must have the same type as `value`. 1617 stride: An int or list of `ints` that has length `1` or `3`. The number of 1618 entries by which the filter is moved right at each step. 1619 padding: 'SAME' or 'VALID' 1620 use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. 1621 data_format: An optional `string` from `"NWC", "NCW"`. Defaults to `"NWC"`, 1622 the data is stored in the order of [batch, in_width, in_channels]. The 1623 `"NCW"` format stores data as [batch, in_channels, in_width]. 1624 name: A name for the operation (optional). 1625 input: Alias for value. 1626 dilations: An int or list of `ints` that has length `1` or `3` which 1627 defaults to 1. The dilation factor for each dimension of input. If set to 1628 k > 1, there will be k-1 skipped cells between each filter element on that 1629 dimension. Dilations in the batch and depth dimensions must be 1. 1630 1631 Returns: 1632 A `Tensor`. Has the same type as input. 1633 1634 Raises: 1635 ValueError: if `data_format` is invalid. 1636 """ 1637 value = deprecation.deprecated_argument_lookup("input", input, "value", value) 1638 with ops.name_scope(name, "conv1d", [value, filters]) as name: 1639 # Reshape the input tensor to [batch, 1, in_width, in_channels] 1640 if data_format is None or data_format == "NHWC" or data_format == "NWC": 1641 data_format = "NHWC" 1642 spatial_start_dim = 1 1643 channel_index = 2 1644 elif data_format == "NCHW" or data_format == "NCW": 1645 data_format = "NCHW" 1646 spatial_start_dim = 2 1647 channel_index = 1 1648 else: 1649 raise ValueError("data_format must be \"NWC\" or \"NCW\".") 1650 strides = [1] + _get_sequence(stride, 1, channel_index, "stride") 1651 dilations = [1] + _get_sequence(dilations, 1, channel_index, "dilations") 1652 1653 value = array_ops.expand_dims(value, spatial_start_dim) 1654 filters = array_ops.expand_dims(filters, 0) 1655 result = gen_nn_ops.conv2d( 1656 value, 1657 filters, 1658 strides, 1659 padding, 1660 use_cudnn_on_gpu=use_cudnn_on_gpu, 1661 data_format=data_format, 1662 dilations=dilations, 1663 name=name) 1664 return array_ops.squeeze(result, [spatial_start_dim]) 1665 1666 1667@tf_export("nn.conv1d", v1=[]) 1668def conv1d_v2( 1669 input, # pylint: disable=redefined-builtin 1670 filters, 1671 stride, 1672 padding, 1673 data_format="NWC", 1674 dilations=None, 1675 name=None): 1676 r"""Computes a 1-D convolution given 3-D input and filter tensors. 1677 1678 Given an input tensor of shape 1679 [batch, in_width, in_channels] 1680 if data_format is "NWC", or 1681 [batch, in_channels, in_width] 1682 if data_format is "NCW", 1683 and a filter / kernel tensor of shape 1684 [filter_width, in_channels, out_channels], this op reshapes 1685 the arguments to pass them to conv2d to perform the equivalent 1686 convolution operation. 1687 1688 Internally, this op reshapes the input tensors and invokes `tf.nn.conv2d`. 1689 For example, if `data_format` does not start with "NC", a tensor of shape 1690 [batch, in_width, in_channels] 1691 is reshaped to 1692 [batch, 1, in_width, in_channels], 1693 and the filter is reshaped to 1694 [1, filter_width, in_channels, out_channels]. 1695 The result is then reshaped back to 1696 [batch, out_width, out_channels] 1697 \(where out_width is a function of the stride and padding as in conv2d\) and 1698 returned to the caller. 1699 1700 Args: 1701 input: A 3D `Tensor`. Must be of type `float16`, `float32`, or `float64`. 1702 filters: A 3D `Tensor`. Must have the same type as `input`. 1703 stride: An int or list of `ints` that has length `1` or `3`. The number of 1704 entries by which the filter is moved right at each step. 1705 padding: 'SAME' or 'VALID' 1706 data_format: An optional `string` from `"NWC", "NCW"`. Defaults to `"NWC"`, 1707 the data is stored in the order of [batch, in_width, in_channels]. The 1708 `"NCW"` format stores data as [batch, in_channels, in_width]. 1709 dilations: An int or list of `ints` that has length `1` or `3` which 1710 defaults to 1. The dilation factor for each dimension of input. If set to 1711 k > 1, there will be k-1 skipped cells between each filter element on that 1712 dimension. Dilations in the batch and depth dimensions must be 1. 1713 name: A name for the operation (optional). 1714 1715 Returns: 1716 A `Tensor`. Has the same type as input. 1717 1718 Raises: 1719 ValueError: if `data_format` is invalid. 1720 """ 1721 return conv1d( 1722 input, # pylint: disable=redefined-builtin 1723 filters, 1724 stride, 1725 padding, 1726 use_cudnn_on_gpu=True, 1727 data_format=data_format, 1728 name=name, 1729 dilations=dilations) 1730 1731 1732@tf_export("nn.conv1d_transpose") 1733def conv1d_transpose( 1734 input, # pylint: disable=redefined-builtin 1735 filters, 1736 output_shape, 1737 strides, 1738 padding="SAME", 1739 data_format="NWC", 1740 dilations=None, 1741 name=None): 1742 """The transpose of `conv1d`. 1743 1744 This operation is sometimes called "deconvolution" after 1745 (Zeiler et al., 2010), but is actually the transpose (gradient) of `conv1d` 1746 rather than an actual deconvolution. 1747 1748 Args: 1749 input: A 3-D `Tensor` of type `float` and shape 1750 `[batch, in_width, in_channels]` for `NWC` data format or 1751 `[batch, in_channels, in_width]` for `NCW` data format. 1752 filters: A 3-D `Tensor` with the same type as `value` and shape 1753 `[filter_width, output_channels, in_channels]`. `filter`'s 1754 `in_channels` dimension must match that of `value`. 1755 output_shape: A 1-D `Tensor`, containing three elements, representing the 1756 output shape of the deconvolution op. 1757 strides: An int or list of `ints` that has length `1` or `3`. The number of 1758 entries by which the filter is moved right at each step. 1759 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 1760 See the "returns" section of `tf.nn.convolution` for details. 1761 data_format: A string. `'NWC'` and `'NCW'` are supported. 1762 dilations: An int or list of `ints` that has length `1` or `3` which 1763 defaults to 1. The dilation factor for each dimension of input. If set to 1764 k > 1, there will be k-1 skipped cells between each filter element on that 1765 dimension. Dilations in the batch and depth dimensions must be 1. 1766 name: Optional name for the returned tensor. 1767 1768 Returns: 1769 A `Tensor` with the same type as `value`. 1770 1771 Raises: 1772 ValueError: If input/output depth does not match `filter`'s shape, if 1773 `output_shape` is not at 3-element vector, if `padding` is other than 1774 `'VALID'` or `'SAME'`, or if `data_format` is invalid. 1775 1776 References: 1777 Deconvolutional Networks: 1778 [Zeiler et al., 2010] 1779 (https://ieeexplore.ieee.org/abstract/document/5539957) 1780 ([pdf] 1781 (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.232.4023&rep=rep1&type=pdf)) 1782 """ 1783 with ops.name_scope(name, "conv1d_transpose", 1784 [input, filters, output_shape]) as name: 1785 # The format could be either NWC or NCW, map to NHWC or NCHW 1786 if data_format is None or data_format == "NWC": 1787 data_format = "NHWC" 1788 spatial_start_dim = 1 1789 channel_index = 2 1790 elif data_format == "NCW": 1791 data_format = "NCHW" 1792 spatial_start_dim = 2 1793 channel_index = 1 1794 else: 1795 raise ValueError("data_format must be \"NWC\" or \"NCW\".") 1796 1797 # Reshape the input tensor to [batch, 1, in_width, in_channels] 1798 strides = [1] + _get_sequence(strides, 1, channel_index, "stride") 1799 dilations = [1] + _get_sequence(dilations, 1, channel_index, "dilations") 1800 1801 input = array_ops.expand_dims(input, spatial_start_dim) 1802 filters = array_ops.expand_dims(filters, 0) 1803 output_shape = list(output_shape) if not isinstance( 1804 output_shape, ops.Tensor) else output_shape 1805 output_shape = array_ops.concat([output_shape[: spatial_start_dim], [1], 1806 output_shape[spatial_start_dim:]], 0) 1807 1808 result = gen_nn_ops.conv2d_backprop_input( 1809 input_sizes=output_shape, 1810 filter=filters, 1811 out_backprop=input, 1812 strides=strides, 1813 padding=padding, 1814 data_format=data_format, 1815 dilations=dilations, 1816 name=name) 1817 return array_ops.squeeze(result, spatial_start_dim) 1818 1819 1820@tf_export("nn.conv2d", v1=[]) 1821def conv2d_v2(input, # pylint: disable=redefined-builtin 1822 filters, 1823 strides, 1824 padding, 1825 data_format="NHWC", 1826 dilations=None, 1827 name=None): 1828 # pylint: disable=line-too-long 1829 r"""Computes a 2-D convolution given 4-D `input` and `filters` tensors. 1830 1831 Given an input tensor of shape `[batch, in_height, in_width, in_channels]` 1832 and a filter / kernel tensor of shape 1833 `[filter_height, filter_width, in_channels, out_channels]`, this op 1834 performs the following: 1835 1836 1. Flattens the filter to a 2-D matrix with shape 1837 `[filter_height * filter_width * in_channels, output_channels]`. 1838 2. Extracts image patches from the input tensor to form a *virtual* 1839 tensor of shape `[batch, out_height, out_width, 1840 filter_height * filter_width * in_channels]`. 1841 3. For each patch, right-multiplies the filter matrix and the image patch 1842 vector. 1843 1844 In detail, with the default NHWC format, 1845 1846 output[b, i, j, k] = 1847 sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] * 1848 filter[di, dj, q, k] 1849 1850 Must have `strides[0] = strides[3] = 1`. For the most common case of the same 1851 horizontal and vertical strides, `strides = [1, stride, stride, 1]`. 1852 1853 Usage Example: 1854 1855 >>> x_in = np.array([[ 1856 ... [[2], [1], [2], [0], [1]], 1857 ... [[1], [3], [2], [2], [3]], 1858 ... [[1], [1], [3], [3], [0]], 1859 ... [[2], [2], [0], [1], [1]], 1860 ... [[0], [0], [3], [1], [2]], ]]) 1861 >>> kernel_in = np.array([ 1862 ... [ [[2, 0.1]], [[3, 0.2]] ], 1863 ... [ [[0, 0.3]],[[1, 0.4]] ], ]) 1864 >>> x = tf.constant(x_in, dtype=tf.float32) 1865 >>> kernel = tf.constant(kernel_in, dtype=tf.float32) 1866 >>> tf.nn.conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') 1867 <tf.Tensor: shape=(1, 4, 4, 2), dtype=float32, numpy=..., dtype=float32)> 1868 1869 Args: 1870 input: A `Tensor`. Must be one of the following types: 1871 `half`, `bfloat16`, `float32`, `float64`. 1872 A 4-D tensor. The dimension order is interpreted according to the value 1873 of `data_format`, see below for details. 1874 filters: A `Tensor`. Must have the same type as `input`. 1875 A 4-D tensor of shape 1876 `[filter_height, filter_width, in_channels, out_channels]` 1877 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 1878 stride of the sliding window for each dimension of `input`. If a single 1879 value is given it is replicated in the `H` and `W` dimension. By default 1880 the `N` and `C` dimensions are set to 1. The dimension order is determined 1881 by the value of `data_format`, see below for details. 1882 padding: Either the `string` `"SAME"` or `"VALID"` indicating the type of 1883 padding algorithm to use, or a list indicating the explicit paddings at 1884 the start and end of each dimension. When explicit padding is used and 1885 data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, 1886 pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used 1887 and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], 1888 [pad_top, pad_bottom], [pad_left, pad_right]]`. 1889 data_format: An optional `string` from: `"NHWC", "NCHW"`. 1890 Defaults to `"NHWC"`. 1891 Specify the data format of the input and output data. With the 1892 default format "NHWC", the data is stored in the order of: 1893 [batch, height, width, channels]. 1894 Alternatively, the format could be "NCHW", the data storage order of: 1895 [batch, channels, height, width]. 1896 dilations: An int or list of `ints` that has length `1`, `2` or `4`, 1897 defaults to 1. The dilation factor for each dimension of`input`. If a 1898 single value is given it is replicated in the `H` and `W` dimension. By 1899 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 1900 will be k-1 skipped cells between each filter element on that dimension. 1901 The dimension order is determined by the value of `data_format`, see above 1902 for details. Dilations in the batch and depth dimensions if a 4-d tensor 1903 must be 1. 1904 name: A name for the operation (optional). 1905 1906 Returns: 1907 A `Tensor`. Has the same type as `input`. 1908 """ 1909 # pylint: enable=line-too-long 1910 return conv2d(input, # pylint: disable=redefined-builtin 1911 filters, 1912 strides, 1913 padding, 1914 use_cudnn_on_gpu=True, 1915 data_format=data_format, 1916 dilations=dilations, 1917 name=name) 1918 1919 1920@tf_export(v1=["nn.conv2d"]) 1921def conv2d( # pylint: disable=redefined-builtin,dangerous-default-value 1922 input, 1923 filter=None, 1924 strides=None, 1925 padding=None, 1926 use_cudnn_on_gpu=True, 1927 data_format="NHWC", 1928 dilations=[1, 1, 1, 1], 1929 name=None, 1930 filters=None): 1931 r"""Computes a 2-D convolution given 4-D `input` and `filter` tensors. 1932 1933 Given an input tensor of shape `[batch, in_height, in_width, in_channels]` 1934 and a filter / kernel tensor of shape 1935 `[filter_height, filter_width, in_channels, out_channels]`, this op 1936 performs the following: 1937 1938 1. Flattens the filter to a 2-D matrix with shape 1939 `[filter_height * filter_width * in_channels, output_channels]`. 1940 2. Extracts image patches from the input tensor to form a *virtual* 1941 tensor of shape `[batch, out_height, out_width, 1942 filter_height * filter_width * in_channels]`. 1943 3. For each patch, right-multiplies the filter matrix and the image patch 1944 vector. 1945 1946 In detail, with the default NHWC format, 1947 1948 output[b, i, j, k] = 1949 sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] 1950 * filter[di, dj, q, k] 1951 1952 Must have `strides[0] = strides[3] = 1`. For the most common case of the same 1953 horizontal and vertical strides, `strides = [1, stride, stride, 1]`. 1954 1955 Args: 1956 input: A `Tensor`. Must be one of the following types: 1957 `half`, `bfloat16`, `float32`, `float64`. 1958 A 4-D tensor. The dimension order is interpreted according to the value 1959 of `data_format`, see below for details. 1960 filter: A `Tensor`. Must have the same type as `input`. 1961 A 4-D tensor of shape 1962 `[filter_height, filter_width, in_channels, out_channels]` 1963 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 1964 stride of the sliding window for each dimension of `input`. If a single 1965 value is given it is replicated in the `H` and `W` dimension. By default 1966 the `N` and `C` dimensions are set to 1. The dimension order is determined 1967 by the value of `data_format`, see below for details. 1968 padding: Either the `string` `"SAME"` or `"VALID"` indicating the type of 1969 padding algorithm to use, or a list indicating the explicit paddings at 1970 the start and end of each dimension. When explicit padding is used and 1971 data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, 1972 pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used 1973 and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], 1974 [pad_top, pad_bottom], [pad_left, pad_right]]`. 1975 use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. 1976 data_format: An optional `string` from: `"NHWC", "NCHW"`. 1977 Defaults to `"NHWC"`. 1978 Specify the data format of the input and output data. With the 1979 default format "NHWC", the data is stored in the order of: 1980 [batch, height, width, channels]. 1981 Alternatively, the format could be "NCHW", the data storage order of: 1982 [batch, channels, height, width]. 1983 dilations: An int or list of `ints` that has length `1`, `2` or `4`, 1984 defaults to 1. The dilation factor for each dimension of`input`. If a 1985 single value is given it is replicated in the `H` and `W` dimension. By 1986 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 1987 will be k-1 skipped cells between each filter element on that dimension. 1988 The dimension order is determined by the value of `data_format`, see above 1989 for details. Dilations in the batch and depth dimensions if a 4-d tensor 1990 must be 1. 1991 name: A name for the operation (optional). 1992 filters: Alias for filter. 1993 1994 Returns: 1995 A `Tensor`. Has the same type as `input`. 1996 """ 1997 filter = deprecation.deprecated_argument_lookup( 1998 "filters", filters, "filter", filter) 1999 padding, explicit_paddings = _convert_padding(padding) 2000 if data_format is None: 2001 data_format = "NHWC" 2002 channel_index = 1 if data_format.startswith("NC") else 3 2003 2004 strides = _get_sequence(strides, 2, channel_index, "strides") 2005 dilations = _get_sequence(dilations, 2, channel_index, "dilations") 2006 return gen_nn_ops.conv2d(input, # pylint: disable=redefined-builtin 2007 filter, 2008 strides, 2009 padding, 2010 use_cudnn_on_gpu=use_cudnn_on_gpu, 2011 explicit_paddings=explicit_paddings, 2012 data_format=data_format, 2013 dilations=dilations, 2014 name=name) 2015 2016 2017@tf_export(v1=["nn.conv2d_backprop_filter"]) 2018def conv2d_backprop_filter( # pylint: disable=redefined-builtin,dangerous-default-value 2019 input, 2020 filter_sizes, 2021 out_backprop, 2022 strides, 2023 padding, 2024 use_cudnn_on_gpu=True, 2025 data_format="NHWC", 2026 dilations=[1, 1, 1, 1], 2027 name=None): 2028 r"""Computes the gradients of convolution with respect to the filter. 2029 2030 Args: 2031 input: A `Tensor`. Must be one of the following types: 2032 `half`, `bfloat16`, `float32`, `float64`. 2033 4-D with shape `[batch, in_height, in_width, in_channels]`. 2034 filter_sizes: A `Tensor` of type `int32`. 2035 An integer vector representing the tensor shape of `filter`, 2036 where `filter` is a 4-D 2037 `[filter_height, filter_width, in_channels, out_channels]` tensor. 2038 out_backprop: A `Tensor`. Must have the same type as `input`. 2039 4-D with shape `[batch, out_height, out_width, out_channels]`. 2040 Gradients w.r.t. the output of the convolution. 2041 strides: A list of `ints`. 2042 The stride of the sliding window for each dimension of the input 2043 of the convolution. Must be in the same order as the dimension specified 2044 with format. 2045 padding: Either the `string `"SAME"` or `"VALID"` indicating the type of 2046 padding algorithm to use, or a list indicating the explicit paddings at 2047 the start and end of each dimension. When explicit padding is used and 2048 data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, 2049 pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used 2050 and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], 2051 [pad_top, pad_bottom], [pad_left, pad_right]]`. 2052 use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. 2053 data_format: An optional `string` from: `"NHWC", "NCHW"`. 2054 Defaults to `"NHWC"`. 2055 Specify the data format of the input and output data. With the 2056 default format "NHWC", the data is stored in the order of: 2057 [batch, in_height, in_width, in_channels]. 2058 Alternatively, the format could be "NCHW", the data storage order of: 2059 [batch, in_channels, in_height, in_width]. 2060 dilations: An optional list of `ints`. Defaults to `[1, 1, 1, 1]`. 2061 1-D tensor of length 4. The dilation factor for each dimension of 2062 `input`. If set to k > 1, there will be k-1 skipped cells between each 2063 filter element on that dimension. The dimension order is determined by 2064 the value of `data_format`, see above for details. Dilations in the batch 2065 and depth dimensions must be 1. 2066 name: A name for the operation (optional). 2067 2068 Returns: 2069 A `Tensor`. Has the same type as `input`. 2070 """ 2071 padding, explicit_paddings = _convert_padding(padding) 2072 return gen_nn_ops.conv2d_backprop_filter( 2073 input, filter_sizes, out_backprop, strides, padding, use_cudnn_on_gpu, 2074 explicit_paddings, data_format, dilations, name) 2075 2076 2077@tf_export(v1=["nn.conv2d_backprop_input"]) 2078def conv2d_backprop_input( # pylint: disable=redefined-builtin,dangerous-default-value 2079 input_sizes, 2080 filter=None, 2081 out_backprop=None, 2082 strides=None, 2083 padding=None, 2084 use_cudnn_on_gpu=True, 2085 data_format="NHWC", 2086 dilations=[1, 1, 1, 1], 2087 name=None, 2088 filters=None): 2089 r"""Computes the gradients of convolution with respect to the input. 2090 2091 Args: 2092 input_sizes: A `Tensor` of type `int32`. 2093 An integer vector representing the shape of `input`, 2094 where `input` is a 4-D `[batch, height, width, channels]` tensor. 2095 filter: A `Tensor`. Must be one of the following types: 2096 `half`, `bfloat16`, `float32`, `float64`. 2097 4-D with shape 2098 `[filter_height, filter_width, in_channels, out_channels]`. 2099 out_backprop: A `Tensor`. Must have the same type as `filter`. 2100 4-D with shape `[batch, out_height, out_width, out_channels]`. 2101 Gradients w.r.t. the output of the convolution. 2102 strides: A list of `ints`. 2103 The stride of the sliding window for each dimension of the input 2104 of the convolution. Must be in the same order as the dimension specified 2105 with format. 2106 padding: Either the `string `"SAME"` or `"VALID"` indicating the type of 2107 padding algorithm to use, or a list indicating the explicit paddings at 2108 the start and end of each dimension. When explicit padding is used and 2109 data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, 2110 pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used 2111 and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], 2112 [pad_top, pad_bottom], [pad_left, pad_right]]`. 2113 use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. 2114 data_format: An optional `string` from: `"NHWC", "NCHW"`. 2115 Defaults to `"NHWC"`. 2116 Specify the data format of the input and output data. With the 2117 default format "NHWC", the data is stored in the order of: 2118 [batch, in_height, in_width, in_channels]. 2119 Alternatively, the format could be "NCHW", the data storage order of: 2120 [batch, in_channels, in_height, in_width]. 2121 dilations: An optional list of `ints`. Defaults to `[1, 1, 1, 1]`. 2122 1-D tensor of length 4. The dilation factor for each dimension of 2123 `input`. If set to k > 1, there will be k-1 skipped cells between each 2124 filter element on that dimension. The dimension order is determined by 2125 the value of `data_format`, see above for details. Dilations in the batch 2126 and depth dimensions must be 1. 2127 name: A name for the operation (optional). 2128 filters: Alias for filter. 2129 2130 Returns: 2131 A `Tensor`. Has the same type as `filter`. 2132 """ 2133 filter = deprecation.deprecated_argument_lookup( 2134 "filters", filters, "filter", filter) 2135 padding, explicit_paddings = _convert_padding(padding) 2136 return gen_nn_ops.conv2d_backprop_input( 2137 input_sizes, filter, out_backprop, strides, padding, use_cudnn_on_gpu, 2138 explicit_paddings, data_format, dilations, name) 2139 2140 2141@tf_export(v1=["nn.conv2d_transpose"]) 2142def conv2d_transpose( 2143 value=None, 2144 filter=None, # pylint: disable=redefined-builtin 2145 output_shape=None, 2146 strides=None, 2147 padding="SAME", 2148 data_format="NHWC", 2149 name=None, 2150 input=None, # pylint: disable=redefined-builtin 2151 filters=None, 2152 dilations=None): 2153 """The transpose of `conv2d`. 2154 2155 This operation is sometimes called "deconvolution" after 2156 (Zeiler et al., 2010), but is really the transpose (gradient) of `conv2d` 2157 rather than an actual deconvolution. 2158 2159 Args: 2160 value: A 4-D `Tensor` of type `float` and shape 2161 `[batch, height, width, in_channels]` for `NHWC` data format or 2162 `[batch, in_channels, height, width]` for `NCHW` data format. 2163 filter: A 4-D `Tensor` with the same type as `value` and shape 2164 `[height, width, output_channels, in_channels]`. `filter`'s 2165 `in_channels` dimension must match that of `value`. 2166 output_shape: A 1-D `Tensor` representing the output shape of the 2167 deconvolution op. 2168 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 2169 stride of the sliding window for each dimension of `input`. If a single 2170 value is given it is replicated in the `H` and `W` dimension. By default 2171 the `N` and `C` dimensions are set to 0. The dimension order is determined 2172 by the value of `data_format`, see below for details. 2173 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 2174 See the "returns" section of `tf.nn.convolution` for details. 2175 data_format: A string. 'NHWC' and 'NCHW' are supported. 2176 name: Optional name for the returned tensor. 2177 input: Alias for value. 2178 filters: Alias for filter. 2179 dilations: An int or list of `ints` that has length `1`, `2` or `4`, 2180 defaults to 1. The dilation factor for each dimension of`input`. If a 2181 single value is given it is replicated in the `H` and `W` dimension. By 2182 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2183 will be k-1 skipped cells between each filter element on that dimension. 2184 The dimension order is determined by the value of `data_format`, see above 2185 for details. Dilations in the batch and depth dimensions if a 4-d tensor 2186 must be 1. 2187 2188 Returns: 2189 A `Tensor` with the same type as `value`. 2190 2191 Raises: 2192 ValueError: If input/output depth does not match `filter`'s shape, or if 2193 padding is other than `'VALID'` or `'SAME'`. 2194 2195 References: 2196 Deconvolutional Networks: 2197 [Zeiler et al., 2010] 2198 (https://ieeexplore.ieee.org/abstract/document/5539957) 2199 ([pdf] 2200 (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.232.4023&rep=rep1&type=pdf)) 2201 """ 2202 value = deprecated_argument_lookup("input", input, "value", value) 2203 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 2204 with ops.name_scope(name, "conv2d_transpose", 2205 [value, filter, output_shape]) as name: 2206 return conv2d_transpose_v2( 2207 value, 2208 filter, 2209 output_shape, 2210 strides, 2211 padding=padding, 2212 data_format=data_format, 2213 dilations=dilations, 2214 name=name) 2215 2216 2217@tf_export("nn.conv2d_transpose", v1=[]) 2218def conv2d_transpose_v2( 2219 input, # pylint: disable=redefined-builtin 2220 filters, # pylint: disable=redefined-builtin 2221 output_shape, 2222 strides, 2223 padding="SAME", 2224 data_format="NHWC", 2225 dilations=None, 2226 name=None): 2227 """The transpose of `conv2d`. 2228 2229 This operation is sometimes called "deconvolution" after 2230 (Zeiler et al., 2010), but is really the transpose (gradient) of 2231 `atrous_conv2d` rather than an actual deconvolution. 2232 2233 Args: 2234 input: A 4-D `Tensor` of type `float` and shape `[batch, height, width, 2235 in_channels]` for `NHWC` data format or `[batch, in_channels, height, 2236 width]` for `NCHW` data format. 2237 filters: A 4-D `Tensor` with the same type as `input` and shape `[height, 2238 width, output_channels, in_channels]`. `filter`'s `in_channels` dimension 2239 must match that of `input`. 2240 output_shape: A 1-D `Tensor` representing the output shape of the 2241 deconvolution op. 2242 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 2243 stride of the sliding window for each dimension of `input`. If a single 2244 value is given it is replicated in the `H` and `W` dimension. By default 2245 the `N` and `C` dimensions are set to 0. The dimension order is determined 2246 by the value of `data_format`, see below for details. 2247 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 2248 the "returns" section of `tf.nn.convolution` for details. 2249 data_format: A string. 'NHWC' and 'NCHW' are supported. 2250 dilations: An int or list of `ints` that has length `1`, `2` or `4`, 2251 defaults to 1. The dilation factor for each dimension of`input`. If a 2252 single value is given it is replicated in the `H` and `W` dimension. By 2253 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2254 will be k-1 skipped cells between each filter element on that dimension. 2255 The dimension order is determined by the value of `data_format`, see above 2256 for details. Dilations in the batch and depth dimensions if a 4-d tensor 2257 must be 1. 2258 name: Optional name for the returned tensor. 2259 2260 Returns: 2261 A `Tensor` with the same type as `input`. 2262 2263 Raises: 2264 ValueError: If input/output depth does not match `filter`'s shape, or if 2265 padding is other than `'VALID'` or `'SAME'`. 2266 2267 References: 2268 Deconvolutional Networks: 2269 [Zeiler et al., 2010] 2270 (https://ieeexplore.ieee.org/abstract/document/5539957) 2271 ([pdf] 2272 (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.232.4023&rep=rep1&type=pdf)) 2273 """ 2274 with ops.name_scope(name, "conv2d_transpose", 2275 [input, filter, output_shape]) as name: 2276 if data_format is None: 2277 data_format = "NHWC" 2278 channel_index = 1 if data_format.startswith("NC") else 3 2279 2280 strides = _get_sequence(strides, 2, channel_index, "strides") 2281 dilations = _get_sequence(dilations, 2, channel_index, "dilations") 2282 2283 return gen_nn_ops.conv2d_backprop_input( 2284 input_sizes=output_shape, 2285 filter=filters, 2286 out_backprop=input, 2287 strides=strides, 2288 padding=padding, 2289 data_format=data_format, 2290 dilations=dilations, 2291 name=name) 2292 2293 2294@tf_export("nn.atrous_conv2d_transpose") 2295def atrous_conv2d_transpose(value, 2296 filters, 2297 output_shape, 2298 rate, 2299 padding, 2300 name=None): 2301 """The transpose of `atrous_conv2d`. 2302 2303 This operation is sometimes called "deconvolution" after 2304 (Zeiler et al., 2010), but is really the transpose (gradient) of 2305 `atrous_conv2d` rather than an actual deconvolution. 2306 2307 Args: 2308 value: A 4-D `Tensor` of type `float`. It needs to be in the default `NHWC` 2309 format. Its shape is `[batch, in_height, in_width, in_channels]`. 2310 filters: A 4-D `Tensor` with the same type as `value` and shape 2311 `[filter_height, filter_width, out_channels, in_channels]`. `filters`' 2312 `in_channels` dimension must match that of `value`. Atrous convolution is 2313 equivalent to standard convolution with upsampled filters with effective 2314 height `filter_height + (filter_height - 1) * (rate - 1)` and effective 2315 width `filter_width + (filter_width - 1) * (rate - 1)`, produced by 2316 inserting `rate - 1` zeros along consecutive elements across the 2317 `filters`' spatial dimensions. 2318 output_shape: A 1-D `Tensor` of shape representing the output shape of the 2319 deconvolution op. 2320 rate: A positive int32. The stride with which we sample input values across 2321 the `height` and `width` dimensions. Equivalently, the rate by which we 2322 upsample the filter values by inserting zeros across the `height` and 2323 `width` dimensions. In the literature, the same parameter is sometimes 2324 called `input stride` or `dilation`. 2325 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 2326 name: Optional name for the returned tensor. 2327 2328 Returns: 2329 A `Tensor` with the same type as `value`. 2330 2331 Raises: 2332 ValueError: If input/output depth does not match `filters`' shape, or if 2333 padding is other than `'VALID'` or `'SAME'`, or if the `rate` is less 2334 than one, or if the output_shape is not a tensor with 4 elements. 2335 2336 References: 2337 Deconvolutional Networks: 2338 [Zeiler et al., 2010] 2339 (https://ieeexplore.ieee.org/abstract/document/5539957) 2340 ([pdf] 2341 (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.232.4023&rep=rep1&type=pdf)) 2342 """ 2343 with ops.name_scope(name, "atrous_conv2d_transpose", 2344 [value, filters, output_shape]) as name: 2345 value = ops.convert_to_tensor(value, name="value") 2346 filters = ops.convert_to_tensor(filters, name="filters") 2347 if not value.get_shape().dims[3].is_compatible_with(filters.get_shape()[3]): 2348 raise ValueError( 2349 "value's input channels does not match filters' input channels, " 2350 "{} != {}".format(value.get_shape()[3], 2351 filters.get_shape()[3])) 2352 if rate < 1: 2353 raise ValueError("rate {} cannot be less than one".format(rate)) 2354 2355 if rate == 1: 2356 return conv2d_transpose( 2357 value, 2358 filters, 2359 output_shape, 2360 strides=[1, 1, 1, 1], 2361 padding=padding, 2362 data_format="NHWC") 2363 2364 output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") 2365 if not output_shape_.get_shape().is_compatible_with( 2366 tensor_shape.TensorShape([4])): 2367 raise ValueError("output_shape must have shape (4,), got {}".format( 2368 output_shape_.get_shape())) 2369 2370 if isinstance(output_shape, tuple): 2371 output_shape = list(output_shape) 2372 2373 if isinstance(output_shape, (list, np.ndarray)): 2374 # output_shape's shape should be == [4] if reached this point. 2375 if not filters.get_shape().dims[2].is_compatible_with(output_shape[3]): 2376 raise ValueError( 2377 "output_shape does not match filter's output channels, " 2378 "{} != {}".format(output_shape[3], 2379 filters.get_shape()[2])) 2380 2381 # We have two padding contributions. The first is used for converting "SAME" 2382 # to "VALID". The second is required so that the height and width of the 2383 # zero-padded value tensor are multiples of rate. 2384 2385 # Padding required to reduce to "VALID" convolution 2386 if padding == "SAME": 2387 # Handle filters whose shape is unknown during graph creation. 2388 if filters.get_shape().is_fully_defined(): 2389 filter_shape = filters.get_shape().as_list() 2390 else: 2391 filter_shape = array_ops.shape(filters) 2392 filter_height, filter_width = filter_shape[0], filter_shape[1] 2393 2394 # Spatial dimensions of the filters and the upsampled filters in which we 2395 # introduce (rate - 1) zeros between consecutive filter values. 2396 filter_height_up = filter_height + (filter_height - 1) * (rate - 1) 2397 filter_width_up = filter_width + (filter_width - 1) * (rate - 1) 2398 2399 pad_height = filter_height_up - 1 2400 pad_width = filter_width_up - 1 2401 2402 # When pad_height (pad_width) is odd, we pad more to bottom (right), 2403 # following the same convention as conv2d(). 2404 pad_top = pad_height // 2 2405 pad_bottom = pad_height - pad_top 2406 pad_left = pad_width // 2 2407 pad_right = pad_width - pad_left 2408 elif padding == "VALID": 2409 pad_top = 0 2410 pad_bottom = 0 2411 pad_left = 0 2412 pad_right = 0 2413 else: 2414 raise ValueError("padding must be either VALID or SAME:" 2415 " {}".format(padding)) 2416 2417 in_height = output_shape[1] + pad_top + pad_bottom 2418 in_width = output_shape[2] + pad_left + pad_right 2419 2420 # More padding so that rate divides the height and width of the input. 2421 pad_bottom_extra = (rate - in_height % rate) % rate 2422 pad_right_extra = (rate - in_width % rate) % rate 2423 2424 # The paddings argument to space_to_batch is just the extra padding 2425 # component. 2426 space_to_batch_pad = [[0, pad_bottom_extra], [0, pad_right_extra]] 2427 2428 value = array_ops.space_to_batch( 2429 input=value, paddings=space_to_batch_pad, block_size=rate) 2430 2431 input_sizes = [ 2432 rate * rate * output_shape[0], (in_height + pad_bottom_extra) // rate, 2433 (in_width + pad_right_extra) // rate, output_shape[3] 2434 ] 2435 2436 value = gen_nn_ops.conv2d_backprop_input( 2437 input_sizes=input_sizes, 2438 filter=filters, 2439 out_backprop=value, 2440 strides=[1, 1, 1, 1], 2441 padding="VALID", 2442 data_format="NHWC") 2443 2444 # The crops argument to batch_to_space includes both padding components. 2445 batch_to_space_crop = [[pad_top, pad_bottom + pad_bottom_extra], 2446 [pad_left, pad_right + pad_right_extra]] 2447 2448 return array_ops.batch_to_space( 2449 input=value, crops=batch_to_space_crop, block_size=rate) 2450 2451 2452@tf_export("nn.conv3d", v1=[]) 2453def conv3d_v2(input, # pylint: disable=redefined-builtin,missing-docstring 2454 filters, 2455 strides, 2456 padding, 2457 data_format="NDHWC", 2458 dilations=None, 2459 name=None): 2460 if dilations is None: 2461 dilations = [1, 1, 1, 1, 1] 2462 return gen_nn_ops.conv3d(input, 2463 filters, 2464 strides, 2465 padding, 2466 data_format=data_format, 2467 dilations=dilations, 2468 name=name) 2469 2470 2471@tf_export(v1=["nn.conv3d"]) 2472def conv3d_v1( # pylint: disable=missing-docstring,dangerous-default-value 2473 input, # pylint: disable=redefined-builtin 2474 filter=None, # pylint: disable=redefined-builtin 2475 strides=None, 2476 padding=None, 2477 data_format="NDHWC", 2478 dilations=[1, 1, 1, 1, 1], 2479 name=None, 2480 filters=None): 2481 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 2482 return gen_nn_ops.conv3d( 2483 input, filter, strides, padding, data_format, dilations, name) 2484 2485 2486conv3d_v2.__doc__ = deprecation.rewrite_argument_docstring( 2487 gen_nn_ops.conv3d.__doc__, "filter", "filters") 2488conv3d_v1.__doc__ = gen_nn_ops.conv3d.__doc__ 2489 2490 2491@tf_export(v1=["nn.conv3d_transpose"]) 2492def conv3d_transpose( 2493 value, 2494 filter=None, # pylint: disable=redefined-builtin 2495 output_shape=None, 2496 strides=None, 2497 padding="SAME", 2498 data_format="NDHWC", 2499 name=None, 2500 input=None, # pylint: disable=redefined-builtin 2501 filters=None, 2502 dilations=None): 2503 """The transpose of `conv3d`. 2504 2505 This operation is sometimes called "deconvolution" after 2506 (Zeiler et al., 2010), but is really the transpose (gradient) of `conv3d` 2507 rather than an actual deconvolution. 2508 2509 Args: 2510 value: A 5-D `Tensor` of type `float` and shape 2511 `[batch, depth, height, width, in_channels]`. 2512 filter: A 5-D `Tensor` with the same type as `value` and shape 2513 `[depth, height, width, output_channels, in_channels]`. `filter`'s 2514 `in_channels` dimension must match that of `value`. 2515 output_shape: A 1-D `Tensor` representing the output shape of the 2516 deconvolution op. 2517 strides: A list of ints. The stride of the sliding window for each 2518 dimension of the input tensor. 2519 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 2520 See the "returns" section of `tf.nn.convolution` for details. 2521 data_format: A string, either `'NDHWC'` or `'NCDHW`' specifying the layout 2522 of the input and output tensors. Defaults to `'NDHWC'`. 2523 name: Optional name for the returned tensor. 2524 input: Alias of value. 2525 filters: Alias of filter. 2526 dilations: An int or list of `ints` that has length `1`, `3` or `5`, 2527 defaults to 1. The dilation factor for each dimension of`input`. If a 2528 single value is given it is replicated in the `D`, `H` and `W` dimension. 2529 By default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2530 will be k-1 skipped cells between each filter element on that dimension. 2531 The dimension order is determined by the value of `data_format`, see above 2532 for details. Dilations in the batch and depth dimensions if a 5-d tensor 2533 must be 1. 2534 2535 Returns: 2536 A `Tensor` with the same type as `value`. 2537 2538 Raises: 2539 ValueError: If input/output depth does not match `filter`'s shape, or if 2540 padding is other than `'VALID'` or `'SAME'`. 2541 2542 References: 2543 Deconvolutional Networks: 2544 [Zeiler et al., 2010] 2545 (https://ieeexplore.ieee.org/abstract/document/5539957) 2546 ([pdf] 2547 (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.232.4023&rep=rep1&type=pdf)) 2548 """ 2549 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 2550 value = deprecated_argument_lookup("input", input, "value", value) 2551 return conv3d_transpose_v2( 2552 value, 2553 filter, 2554 output_shape, 2555 strides, 2556 padding=padding, 2557 data_format=data_format, 2558 dilations=dilations, 2559 name=name) 2560 2561 2562@tf_export("nn.conv3d_transpose", v1=[]) 2563def conv3d_transpose_v2(input, # pylint: disable=redefined-builtin 2564 filters, 2565 output_shape, 2566 strides, 2567 padding="SAME", 2568 data_format="NDHWC", 2569 dilations=None, 2570 name=None): 2571 """The transpose of `conv3d`. 2572 2573 This operation is sometimes called "deconvolution" after 2574 (Zeiler et al., 2010), but is really the transpose (gradient) of `conv3d` 2575 rather than an actual deconvolution. 2576 2577 Args: 2578 input: A 5-D `Tensor` of type `float` and shape `[batch, height, width, 2579 in_channels]` for `NHWC` data format or `[batch, in_channels, height, 2580 width]` for `NCHW` data format. 2581 filters: A 5-D `Tensor` with the same type as `value` and shape `[height, 2582 width, output_channels, in_channels]`. `filter`'s `in_channels` dimension 2583 must match that of `value`. 2584 output_shape: A 1-D `Tensor` representing the output shape of the 2585 deconvolution op. 2586 strides: An int or list of `ints` that has length `1`, `3` or `5`. The 2587 stride of the sliding window for each dimension of `input`. If a single 2588 value is given it is replicated in the `D`, `H` and `W` dimension. By 2589 default the `N` and `C` dimensions are set to 0. The dimension order is 2590 determined by the value of `data_format`, see below for details. 2591 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 2592 the "returns" section of `tf.nn.convolution` for details. 2593 data_format: A string. 'NDHWC' and 'NCDHW' are supported. 2594 dilations: An int or list of `ints` that has length `1`, `3` or `5`, 2595 defaults to 1. The dilation factor for each dimension of`input`. If a 2596 single value is given it is replicated in the `D`, `H` and `W` dimension. 2597 By default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2598 will be k-1 skipped cells between each filter element on that dimension. 2599 The dimension order is determined by the value of `data_format`, see above 2600 for details. Dilations in the batch and depth dimensions if a 5-d tensor 2601 must be 1. 2602 name: Optional name for the returned tensor. 2603 2604 Returns: 2605 A `Tensor` with the same type as `value`. 2606 2607 References: 2608 Deconvolutional Networks: 2609 [Zeiler et al., 2010] 2610 (https://ieeexplore.ieee.org/abstract/document/5539957) 2611 ([pdf] 2612 (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.232.4023&rep=rep1&type=pdf)) 2613 """ 2614 with ops.name_scope(name, "conv3d_transpose", 2615 [input, filter, output_shape]) as name: 2616 if data_format is None: 2617 data_format = "NDHWC" 2618 channel_index = 1 if data_format.startswith("NC") else 4 2619 2620 strides = _get_sequence(strides, 3, channel_index, "strides") 2621 dilations = _get_sequence(dilations, 3, channel_index, "dilations") 2622 2623 return gen_nn_ops.conv3d_backprop_input_v2( 2624 input_sizes=output_shape, 2625 filter=filters, 2626 out_backprop=input, 2627 strides=strides, 2628 padding=padding, 2629 data_format=data_format, 2630 dilations=dilations, 2631 name=name) 2632 2633 2634CONV_TRANSPOSE_OPS = ( 2635 conv1d_transpose, 2636 conv2d_transpose_v2, 2637 conv3d_transpose_v2, 2638) 2639 2640 2641@tf_export("nn.conv_transpose") 2642def conv_transpose(input, # pylint: disable=redefined-builtin 2643 filters, 2644 output_shape, 2645 strides, 2646 padding="SAME", 2647 data_format=None, 2648 dilations=None, 2649 name=None): 2650 """The transpose of `convolution`. 2651 2652 This operation is sometimes called "deconvolution" after 2653 (Zeiler et al., 2010), but is really the transpose (gradient) of `conv3d` 2654 rather than an actual deconvolution. 2655 2656 Args: 2657 input: An N+2 dimensional `Tensor` of shape 2658 `[batch_size] + input_spatial_shape + [in_channels]` if data_format does 2659 not start with "NC" (default), or 2660 `[batch_size, in_channels] + input_spatial_shape` if data_format starts 2661 with "NC". It must be one of the following types: 2662 `half`, `bfloat16`, `float32`, `float64`. 2663 filters: An N+2 dimensional `Tensor` with the same type as `input` and 2664 shape `spatial_filter_shape + [in_channels, out_channels]`. 2665 output_shape: A 1-D `Tensor` representing the output shape of the 2666 deconvolution op. 2667 strides: An int or list of `ints` that has length `1`, `N` or `N+2`. The 2668 stride of the sliding window for each dimension of `input`. If a single 2669 value is given it is replicated in the spatial dimensions. By default 2670 the `N` and `C` dimensions are set to 0. The dimension order is determined 2671 by the value of `data_format`, see below for details. 2672 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 2673 the "returns" section of `tf.nn.convolution` for details. 2674 data_format: A string or None. Specifies whether the channel dimension of 2675 the `input` and output is the last dimension (default, or if `data_format` 2676 does not start with "NC"), or the second dimension (if `data_format` 2677 starts with "NC"). For N=1, the valid values are "NWC" (default) and 2678 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 2679 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 2680 dilations: An int or list of `ints` that has length `1`, `N` or `N+2`, 2681 defaults to 1. The dilation factor for each dimension of`input`. If a 2682 single value is given it is replicated in the spatial dimensions. By 2683 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2684 will be k-1 skipped cells between each filter element on that dimension. 2685 The dimension order is determined by the value of `data_format`, see above 2686 for details. 2687 name: A name for the operation (optional). If not specified "conv_transpose" 2688 is used. 2689 2690 Returns: 2691 A `Tensor` with the same type as `value`. 2692 2693 References: 2694 Deconvolutional Networks: 2695 [Zeiler et al., 2010] 2696 (https://ieeexplore.ieee.org/abstract/document/5539957) 2697 ([pdf] 2698 (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.232.4023&rep=rep1&type=pdf)) 2699 """ 2700 with ops.name_scope(name, "conv_transpose", 2701 [input, filter, output_shape]) as name: 2702 if tensor_util.is_tensor(output_shape): 2703 n = output_shape.shape[0] - 2 2704 elif isinstance(output_shape, collections.Sized): 2705 n = len(output_shape) - 2 2706 else: 2707 raise ValueError("output_shape must be a tensor or sized collection.") 2708 2709 if not 1 <= n <= 3: 2710 raise ValueError( 2711 "output_shape must be of length 3, 4 or 5 but was {}.".format(n + 2)) 2712 2713 op = CONV_TRANSPOSE_OPS[n-1] 2714 return op( 2715 input, 2716 filters, 2717 output_shape, 2718 strides, 2719 padding=padding, 2720 data_format=data_format, 2721 dilations=dilations, 2722 name=name) 2723 2724 2725def _tf_deterministic_ops(): 2726 if _tf_deterministic_ops.value is None: 2727 tf_deterministic_ops = os.environ.get("TF_DETERMINISTIC_OPS") 2728 if tf_deterministic_ops is not None: 2729 tf_deterministic_ops = tf_deterministic_ops.lower() 2730 _tf_deterministic_ops.value = ( 2731 tf_deterministic_ops == "true" or tf_deterministic_ops == "1") 2732 return _tf_deterministic_ops.value 2733 2734 2735_tf_deterministic_ops.value = None 2736 2737 2738@tf_export("nn.bias_add") 2739def bias_add(value, bias, data_format=None, name=None): 2740 """Adds `bias` to `value`. 2741 2742 This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D. 2743 Broadcasting is supported, so `value` may have any number of dimensions. 2744 Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the 2745 case where both types are quantized. 2746 2747 Args: 2748 value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`, 2749 `int16`, `int8`, `complex64`, or `complex128`. 2750 bias: A 1-D `Tensor` with size matching the channel dimension of `value`. 2751 Must be the same type as `value` unless `value` is a quantized type, 2752 in which case a different quantized type may be used. 2753 data_format: A string. 'N...C' and 'NC...' are supported. If `None` (the 2754 default) is specified then 'N..C' is assumed. 2755 name: A name for the operation (optional). 2756 2757 Returns: 2758 A `Tensor` with the same type as `value`. 2759 2760 Raises: 2761 ValueError if data format is unrecognized, if `value` has less than two 2762 dimensions when `data_format` is 'N..C'/`None` or `value` has less 2763 then three dimensions when `data_format` is `NC..`, if `bias` does not 2764 have exactly one dimension (is a vector), or if the size of `bias` 2765 does not match the size of the channel dimension of `value`. 2766 """ 2767 with ops.name_scope(name, "BiasAdd", [value, bias]) as name: 2768 if data_format is not None: 2769 if data_format.startswith("NC"): 2770 data_format = "NCHW" 2771 elif data_format.startswith("N") and data_format.endswith("C"): 2772 data_format = "NHWC" 2773 else: 2774 raise ValueError("data_format must be of the form `N...C` or `NC...`") 2775 2776 if not context.executing_eagerly(): 2777 value = ops.convert_to_tensor(value, name="input") 2778 bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") 2779 2780 # TODO(duncanriach): Implement deterministic functionality at CUDA kernel 2781 # level. 2782 if _tf_deterministic_ops(): 2783 # Note that this code does not implement the same error checks as the 2784 # pre-existing C++ ops. 2785 if data_format == "NCHW": 2786 broadcast_shape_head = [1, array_ops.size(bias)] 2787 broadcast_shape_tail = array_ops.ones( 2788 array_ops.rank(value) - 2, dtype=dtypes.int32) 2789 broadcast_shape = array_ops.concat( 2790 [broadcast_shape_head, broadcast_shape_tail], 0) 2791 return math_ops.add( 2792 value, array_ops.reshape(bias, broadcast_shape), name=name) 2793 else: # data_format == 'NHWC' or data_format == None 2794 return math_ops.add(value, bias, name=name) 2795 else: 2796 return gen_nn_ops.bias_add( 2797 value, bias, data_format=data_format, name=name) 2798 2799 2800def bias_add_v1(value, bias, name=None): 2801 """Adds `bias` to `value`. 2802 2803 This is a deprecated version of bias_add and will soon to be removed. 2804 2805 This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D. 2806 Broadcasting is supported, so `value` may have any number of dimensions. 2807 Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the 2808 case where both types are quantized. 2809 2810 Args: 2811 value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`, 2812 `int16`, `int8`, `complex64`, or `complex128`. 2813 bias: A 1-D `Tensor` with size matching the last dimension of `value`. 2814 Must be the same type as `value` unless `value` is a quantized type, 2815 in which case a different quantized type may be used. 2816 name: A name for the operation (optional). 2817 2818 Returns: 2819 A `Tensor` with the same type as `value`. 2820 """ 2821 with ops.name_scope(name, "BiasAddV1", [value, bias]) as name: 2822 value = ops.convert_to_tensor(value, name="input") 2823 bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") 2824 return gen_nn_ops.bias_add_v1(value, bias, name=name) 2825 2826 2827@tf_export(v1=["nn.crelu"]) 2828def crelu(features, name=None, axis=-1): 2829 """Computes Concatenated ReLU. 2830 2831 Concatenates a ReLU which selects only the positive part of the activation 2832 with a ReLU which selects only the *negative* part of the activation. 2833 Note that as a result this non-linearity doubles the depth of the activations. 2834 Source: [Understanding and Improving Convolutional Neural Networks via 2835 Concatenated Rectified Linear Units. W. Shang, et 2836 al.](https://arxiv.org/abs/1603.05201) 2837 2838 Args: 2839 features: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, 2840 `int16`, or `int8`. 2841 name: A name for the operation (optional). 2842 axis: The axis that the output values are concatenated along. Default is -1. 2843 2844 Returns: 2845 A `Tensor` with the same type as `features`. 2846 2847 References: 2848 Understanding and Improving Convolutional Neural Networks via Concatenated 2849 Rectified Linear Units: 2850 [Shang et al., 2016](http://proceedings.mlr.press/v48/shang16) 2851 ([pdf](http://proceedings.mlr.press/v48/shang16.pdf)) 2852 """ 2853 with ops.name_scope(name, "CRelu", [features]) as name: 2854 features = ops.convert_to_tensor(features, name="features") 2855 c = array_ops.concat([features, -features], axis, name=name) 2856 return gen_nn_ops.relu(c) 2857 2858 2859@tf_export("nn.crelu", v1=[]) 2860def crelu_v2(features, axis=-1, name=None): 2861 return crelu(features, name=name, axis=axis) 2862crelu_v2.__doc__ = crelu.__doc__ 2863 2864 2865@tf_export("nn.relu6") 2866def relu6(features, name=None): 2867 """Computes Rectified Linear 6: `min(max(features, 0), 6)`. 2868 2869 Args: 2870 features: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, 2871 `int16`, or `int8`. 2872 name: A name for the operation (optional). 2873 2874 Returns: 2875 A `Tensor` with the same type as `features`. 2876 2877 References: 2878 Convolutional Deep Belief Networks on CIFAR-10: 2879 Krizhevsky et al., 2010 2880 ([pdf](http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf)) 2881 """ 2882 with ops.name_scope(name, "Relu6", [features]) as name: 2883 features = ops.convert_to_tensor(features, name="features") 2884 return gen_nn_ops.relu6(features, name=name) 2885 2886 2887@tf_export("nn.leaky_relu") 2888def leaky_relu(features, alpha=0.2, name=None): 2889 """Compute the Leaky ReLU activation function. 2890 2891 Source: [Rectifier Nonlinearities Improve Neural Network Acoustic Models. 2892 AL Maas, AY Hannun, AY Ng - Proc. ICML, 2013] 2893 (https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf). 2894 Args: 2895 features: A `Tensor` representing preactivation values. Must be one of 2896 the following types: `float16`, `float32`, `float64`, `int32`, `int64`. 2897 alpha: Slope of the activation function at x < 0. 2898 name: A name for the operation (optional). 2899 2900 Returns: 2901 The activation value. 2902 2903 References: 2904 Rectifier Nonlinearities Improve Neural Network Acoustic Models: 2905 [Maas et al., 2013] 2906 (http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.693.1422) 2907 ([pdf] 2908 (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.693.1422&rep=rep1&type=pdf)) 2909 """ 2910 with ops.name_scope(name, "LeakyRelu", [features, alpha]) as name: 2911 features = ops.convert_to_tensor(features, name="features") 2912 if features.dtype.is_integer: 2913 features = math_ops.cast(features, dtypes.float32) 2914 if isinstance(alpha, np.ndarray): 2915 alpha = alpha.item() 2916 return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) 2917 2918 2919def _flatten_outer_dims(logits): 2920 """Flattens logits' outer dimensions and keep its last dimension.""" 2921 rank = array_ops.rank(logits) 2922 last_dim_size = array_ops.slice( 2923 array_ops.shape(logits), [math_ops.subtract(rank, 1)], [1]) 2924 output = array_ops.reshape(logits, array_ops.concat([[-1], last_dim_size], 0)) 2925 2926 # Set output shape if known. 2927 if not context.executing_eagerly(): 2928 shape = logits.get_shape() 2929 if shape is not None and shape.dims is not None: 2930 shape = shape.as_list() 2931 product = 1 2932 product_valid = True 2933 for d in shape[:-1]: 2934 if d is None: 2935 product_valid = False 2936 break 2937 else: 2938 product *= d 2939 if product_valid: 2940 output_shape = [product, shape[-1]] 2941 output.set_shape(output_shape) 2942 2943 return output 2944 2945 2946def _softmax(logits, compute_op, dim=-1, name=None): 2947 """Helper function for softmax and log_softmax. 2948 2949 It reshapes and transposes the input logits into a 2-D Tensor and then invokes 2950 the tf.nn._softmax or tf.nn._log_softmax function. The output would be 2951 transposed and reshaped back. 2952 2953 Args: 2954 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 2955 `float32`, `float64`. 2956 compute_op: Either gen_nn_ops.softmax or gen_nn_ops.log_softmax 2957 dim: The dimension softmax would be performed on. The default is -1 which 2958 indicates the last dimension. 2959 name: A name for the operation (optional). 2960 2961 Returns: 2962 A `Tensor`. Has the same type as `logits`. Same shape as `logits`. 2963 Raises: 2964 InvalidArgumentError: if `logits` is empty or `dim` is beyond the last 2965 dimension of `logits`. 2966 """ 2967 2968 def _swap_axis(logits, dim_index, last_index, name=None): 2969 """Swaps logits's dim_index and last_index.""" 2970 return array_ops.transpose( 2971 logits, 2972 array_ops.concat([ 2973 math_ops.range(dim_index), [last_index], 2974 math_ops.range(dim_index + 1, last_index), [dim_index] 2975 ], 0), 2976 name=name) 2977 2978 logits = ops.convert_to_tensor(logits) 2979 2980 # We need its original shape for shape inference. 2981 shape = logits.get_shape() 2982 is_last_dim = (dim == -1) or (dim == shape.ndims - 1) 2983 2984 if is_last_dim: 2985 return compute_op(logits, name=name) 2986 2987 dim_val = dim 2988 if isinstance(dim, ops.Tensor): 2989 dim_val = tensor_util.constant_value(dim) 2990 if dim_val is not None and not -shape.ndims <= dim_val < shape.ndims: 2991 raise errors_impl.InvalidArgumentError( 2992 None, None, 2993 "Dimension (%d) must be in the range [%d, %d) where %d is the number of" 2994 " dimensions in the input." % (dim_val, -shape.ndims, shape.ndims, 2995 shape.ndims)) 2996 2997 # If dim is not the last dimension, we have to do a transpose so that we can 2998 # still perform softmax on its last dimension. 2999 3000 # In case dim is negative (and is not last dimension -1), add shape.ndims 3001 ndims = array_ops.rank(logits) 3002 if not isinstance(dim, ops.Tensor): 3003 if dim < 0: 3004 dim += ndims 3005 else: 3006 dim = array_ops.where(math_ops.less(dim, 0), dim + ndims, dim) 3007 3008 # Swap logits' dimension of dim and its last dimension. 3009 input_rank = array_ops.rank(logits) 3010 dim_axis = dim % shape.ndims 3011 logits = _swap_axis(logits, dim_axis, math_ops.subtract(input_rank, 1)) 3012 3013 # Do the actual softmax on its last dimension. 3014 output = compute_op(logits) 3015 3016 output = _swap_axis( 3017 output, dim_axis, math_ops.subtract(input_rank, 1), name=name) 3018 3019 # Make shape inference work since transpose may erase its static shape. 3020 output.set_shape(shape) 3021 3022 return output 3023 3024 3025@tf_export(v1=["nn.softmax", "math.softmax"]) 3026@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") 3027def softmax(logits, axis=None, name=None, dim=None): 3028 """Computes softmax activations. 3029 3030 This function performs the equivalent of 3031 3032 softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) 3033 3034 See: https://en.wikipedia.org/wiki/Softmax_function 3035 3036 Example usage: 3037 >>> tf.nn.softmax([-1, 0., 1.]) 3038 <tf.Tensor: shape=(3,), dtype=float32, 3039 numpy=array([0.09003057, 0.24472848, 0.66524094], dtype=float32)> 3040 3041 Args: 3042 logits: A non-empty `Tensor`, or an object whose type has a registered 3043 `Tensor` conversion function. Must be one of the following types: 3044 `half`,`float32`, `float64`. See also `convert_to_tensor` 3045 axis: The dimension softmax would be performed on. The default is -1 which 3046 indicates the last dimension. 3047 name: A name for the operation (optional). 3048 dim: Deprecated alias for `axis`. 3049 3050 Returns: 3051 A `Tensor`. Has the same type and shape as `logits`. 3052 3053 Raises: 3054 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 3055 dimension of `logits`. 3056 TypeError: If no conversion function is registered for `logits` to 3057 Tensor. 3058 RuntimeError: If a registered conversion function returns an invalid 3059 value. 3060 3061 """ 3062 axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) 3063 if axis is None: 3064 axis = -1 3065 return _softmax(logits, gen_nn_ops.softmax, axis, name) 3066 3067 3068@tf_export("nn.softmax", "math.softmax", v1=[]) 3069def softmax_v2(logits, axis=None, name=None): 3070 """Computes softmax activations. 3071 3072 This function performs the equivalent of 3073 3074 softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) 3075 3076 Args: 3077 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 3078 `float32`, `float64`. 3079 axis: The dimension softmax would be performed on. The default is -1 which 3080 indicates the last dimension. 3081 name: A name for the operation (optional). 3082 3083 Returns: 3084 A `Tensor`. Has the same type and shape as `logits`. 3085 3086 Raises: 3087 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 3088 dimension of `logits`. 3089 """ 3090 if axis is None: 3091 axis = -1 3092 return _softmax(logits, gen_nn_ops.softmax, axis, name) 3093 3094 3095@tf_export(v1=["nn.log_softmax", "math.log_softmax"]) 3096@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") 3097def log_softmax(logits, axis=None, name=None, dim=None): 3098 """Computes log softmax activations. 3099 3100 For each batch `i` and class `j` we have 3101 3102 logsoftmax = logits - log(reduce_sum(exp(logits), axis)) 3103 3104 Args: 3105 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 3106 `float32`, `float64`. 3107 axis: The dimension softmax would be performed on. The default is -1 which 3108 indicates the last dimension. 3109 name: A name for the operation (optional). 3110 dim: Deprecated alias for `axis`. 3111 3112 Returns: 3113 A `Tensor`. Has the same type as `logits`. Same shape as `logits`. 3114 3115 Raises: 3116 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 3117 dimension of `logits`. 3118 """ 3119 axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) 3120 if axis is None: 3121 axis = -1 3122 return _softmax(logits, gen_nn_ops.log_softmax, axis, name) 3123 3124 3125@tf_export("nn.log_softmax", "math.log_softmax", v1=[]) 3126def log_softmax_v2(logits, axis=None, name=None): 3127 """Computes log softmax activations. 3128 3129 For each batch `i` and class `j` we have 3130 3131 logsoftmax = logits - log(reduce_sum(exp(logits), axis)) 3132 3133 Args: 3134 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 3135 `float32`, `float64`. 3136 axis: The dimension softmax would be performed on. The default is -1 which 3137 indicates the last dimension. 3138 name: A name for the operation (optional). 3139 3140 Returns: 3141 A `Tensor`. Has the same type as `logits`. Same shape as `logits`. 3142 3143 Raises: 3144 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 3145 dimension of `logits`. 3146 """ 3147 if axis is None: 3148 axis = -1 3149 return _softmax(logits, gen_nn_ops.log_softmax, axis, name) 3150 3151 3152def _ensure_xent_args(name, sentinel, labels, logits): 3153 # Make sure that all arguments were passed as named arguments. 3154 if sentinel is not None: 3155 raise ValueError("Only call `%s` with " 3156 "named arguments (labels=..., logits=..., ...)" % name) 3157 if labels is None or logits is None: 3158 raise ValueError("Both labels and logits must be provided.") 3159 3160 3161@tf_export("nn.softmax_cross_entropy_with_logits", v1=[]) 3162def softmax_cross_entropy_with_logits_v2(labels, logits, axis=-1, name=None): 3163 """Computes softmax cross entropy between `logits` and `labels`. 3164 3165 Measures the probability error in discrete classification tasks in which the 3166 classes are mutually exclusive (each entry is in exactly one class). For 3167 example, each CIFAR-10 image is labeled with one and only one label: an image 3168 can be a dog or a truck, but not both. 3169 3170 **NOTE:** While the classes are mutually exclusive, their probabilities 3171 need not be. All that is required is that each row of `labels` is 3172 a valid probability distribution. If they are not, the computation of the 3173 gradient will be incorrect. 3174 3175 If using exclusive `labels` (wherein one and only 3176 one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. 3177 3178 Usage: 3179 >>> logits = [[4.0, 2.0, 1.0], [0.0, 5.0, 1.0]] 3180 >>> labels = [[1.0, 0.0, 0.0], [0.0, 0.8, 0.2]] 3181 >>> tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits) 3182 <tf.Tensor: shape=(2,), dtype=float32, 3183 numpy=array([0.16984604, 0.82474494], dtype=float32)> 3184 3185 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 3186 on `logits` internally for efficiency. Do not call this op with the 3187 output of `softmax`, as it will produce incorrect results. 3188 3189 A common use case is to have logits and labels of shape 3190 `[batch_size, num_classes]`, but higher dimensions are supported, with 3191 the `axis` argument specifying the class dimension. 3192 3193 `logits` and `labels` must have the same dtype (either `float16`, `float32`, 3194 or `float64`). 3195 3196 Backpropagation will happen into both `logits` and `labels`. To disallow 3197 backpropagation into `labels`, pass label tensors through `tf.stop_gradient` 3198 before feeding it to this function. 3199 3200 **Note that to avoid confusion, it is required to pass only named arguments to 3201 this function.** 3202 3203 Args: 3204 labels: Each vector along the class dimension should hold a valid 3205 probability distribution e.g. for the case in which labels are of shape 3206 `[batch_size, num_classes]`, each row of `labels[i]` must be a valid 3207 probability distribution. 3208 logits: Per-label activations, typically a linear output. These activation 3209 energies are interpreted as unnormalized log probabilities. 3210 axis: The class dimension. Defaulted to -1 which is the last dimension. 3211 name: A name for the operation (optional). 3212 3213 Returns: 3214 A `Tensor` that contains the softmax cross entropy loss. Its type is the 3215 same as `logits` and its shape is the same as `labels` except that it does 3216 not have the last dimension of `labels`. 3217 """ 3218 return softmax_cross_entropy_with_logits_v2_helper( 3219 labels=labels, logits=logits, axis=axis, name=name) 3220 3221 3222@tf_export(v1=["nn.softmax_cross_entropy_with_logits_v2"]) 3223@deprecated_args(None, "dim is deprecated, use axis instead", "dim") 3224def softmax_cross_entropy_with_logits_v2_helper( 3225 labels, logits, axis=None, name=None, dim=None): 3226 """Computes softmax cross entropy between `logits` and `labels`. 3227 3228 Measures the probability error in discrete classification tasks in which the 3229 classes are mutually exclusive (each entry is in exactly one class). For 3230 example, each CIFAR-10 image is labeled with one and only one label: an image 3231 can be a dog or a truck, but not both. 3232 3233 **NOTE:** While the classes are mutually exclusive, their probabilities 3234 need not be. All that is required is that each row of `labels` is 3235 a valid probability distribution. If they are not, the computation of the 3236 gradient will be incorrect. 3237 3238 If using exclusive `labels` (wherein one and only 3239 one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. 3240 3241 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 3242 on `logits` internally for efficiency. Do not call this op with the 3243 output of `softmax`, as it will produce incorrect results. 3244 3245 A common use case is to have logits and labels of shape 3246 `[batch_size, num_classes]`, but higher dimensions are supported, with 3247 the `axis` argument specifying the class dimension. 3248 3249 `logits` and `labels` must have the same dtype (either `float16`, `float32`, 3250 or `float64`). 3251 3252 Backpropagation will happen into both `logits` and `labels`. To disallow 3253 backpropagation into `labels`, pass label tensors through `tf.stop_gradient` 3254 before feeding it to this function. 3255 3256 **Note that to avoid confusion, it is required to pass only named arguments to 3257 this function.** 3258 3259 Args: 3260 labels: Each vector along the class dimension should hold a valid 3261 probability distribution e.g. for the case in which labels are of shape 3262 `[batch_size, num_classes]`, each row of `labels[i]` must be a valid 3263 probability distribution. 3264 logits: Unscaled log probabilities. 3265 axis: The class dimension. Defaulted to -1 which is the last dimension. 3266 name: A name for the operation (optional). 3267 dim: Deprecated alias for axis. 3268 3269 Returns: 3270 A `Tensor` that contains the softmax cross entropy loss. Its type is the 3271 same as `logits` and its shape is the same as `labels` except that it does 3272 not have the last dimension of `labels`. 3273 """ 3274 # TODO(pcmurray) Raise an error when the labels do not sum to 1. Note: This 3275 # could break users who call this with bad labels, but disregard the bad 3276 # results. 3277 axis = deprecated_argument_lookup("axis", axis, "dim", dim) 3278 del dim 3279 if axis is None: 3280 axis = -1 3281 3282 with ops.name_scope(name, "softmax_cross_entropy_with_logits", 3283 [logits, labels]) as name: 3284 logits = ops.convert_to_tensor(logits, name="logits") 3285 labels = ops.convert_to_tensor(labels, name="labels") 3286 convert_to_float32 = ( 3287 logits.dtype == dtypes.float16 or logits.dtype == dtypes.bfloat16) 3288 precise_logits = math_ops.cast( 3289 logits, dtypes.float32) if convert_to_float32 else logits 3290 # labels and logits must be of the same type 3291 labels = math_ops.cast(labels, precise_logits.dtype) 3292 input_rank = array_ops.rank(precise_logits) 3293 # For shape inference. 3294 shape = logits.get_shape() 3295 3296 # Move the dim to the end if dim is not the last dimension. 3297 if axis != -1: 3298 3299 def _move_dim_to_end(tensor, dim_index, rank): 3300 return array_ops.transpose( 3301 tensor, 3302 array_ops.concat([ 3303 math_ops.range(dim_index), 3304 math_ops.range(dim_index + 1, rank), [dim_index] 3305 ], 0)) 3306 3307 precise_logits = _move_dim_to_end(precise_logits, axis, input_rank) 3308 labels = _move_dim_to_end(labels, axis, input_rank) 3309 3310 input_shape = array_ops.shape(precise_logits) 3311 3312 # Make precise_logits and labels into matrices. 3313 precise_logits = _flatten_outer_dims(precise_logits) 3314 labels = _flatten_outer_dims(labels) 3315 3316 # Do the actual op computation. 3317 # The second output tensor contains the gradients. We use it in 3318 # CrossEntropyGrad() in nn_grad but not here. 3319 cost, unused_backprop = gen_nn_ops.softmax_cross_entropy_with_logits( 3320 precise_logits, labels, name=name) 3321 3322 # The output cost shape should be the input minus axis. 3323 output_shape = array_ops.slice(input_shape, [0], 3324 [math_ops.subtract(input_rank, 1)]) 3325 cost = array_ops.reshape(cost, output_shape) 3326 3327 # Make shape inference work since reshape and transpose may erase its static 3328 # shape. 3329 if not context.executing_eagerly( 3330 ) and shape is not None and shape.dims is not None: 3331 shape = shape.as_list() 3332 del shape[axis] 3333 cost.set_shape(shape) 3334 3335 if convert_to_float32: 3336 return math_ops.cast(cost, logits.dtype) 3337 else: 3338 return cost 3339 3340 3341_XENT_DEPRECATION = """ 3342Future major versions of TensorFlow will allow gradients to flow 3343into the labels input on backprop by default. 3344 3345See `tf.nn.softmax_cross_entropy_with_logits_v2`. 3346""" 3347 3348 3349@tf_export(v1=["nn.softmax_cross_entropy_with_logits"]) 3350@deprecation.deprecated(date=None, instructions=_XENT_DEPRECATION) 3351def softmax_cross_entropy_with_logits( 3352 _sentinel=None, # pylint: disable=invalid-name 3353 labels=None, 3354 logits=None, 3355 dim=-1, 3356 name=None, 3357 axis=None): 3358 """Computes softmax cross entropy between `logits` and `labels`. 3359 3360 Measures the probability error in discrete classification tasks in which the 3361 classes are mutually exclusive (each entry is in exactly one class). For 3362 example, each CIFAR-10 image is labeled with one and only one label: an image 3363 can be a dog or a truck, but not both. 3364 3365 **NOTE:** While the classes are mutually exclusive, their probabilities 3366 need not be. All that is required is that each row of `labels` is 3367 a valid probability distribution. If they are not, the computation of the 3368 gradient will be incorrect. 3369 3370 If using exclusive `labels` (wherein one and only 3371 one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. 3372 3373 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 3374 on `logits` internally for efficiency. Do not call this op with the 3375 output of `softmax`, as it will produce incorrect results. 3376 3377 A common use case is to have logits and labels of shape 3378 `[batch_size, num_classes]`, but higher dimensions are supported, with 3379 the `dim` argument specifying the class dimension. 3380 3381 Backpropagation will happen only into `logits`. To calculate a cross entropy 3382 loss that allows backpropagation into both `logits` and `labels`, see 3383 `tf.nn.softmax_cross_entropy_with_logits_v2`. 3384 3385 **Note that to avoid confusion, it is required to pass only named arguments to 3386 this function.** 3387 3388 Args: 3389 _sentinel: Used to prevent positional parameters. Internal, do not use. 3390 labels: Each vector along the class dimension should hold a valid 3391 probability distribution e.g. for the case in which labels are of shape 3392 `[batch_size, num_classes]`, each row of `labels[i]` must be a valid 3393 probability distribution. 3394 logits: Per-label activations, typically a linear output. These activation 3395 energies are interpreted as unnormalized log probabilities. 3396 dim: The class dimension. Defaulted to -1 which is the last dimension. 3397 name: A name for the operation (optional). 3398 axis: Alias for dim. 3399 3400 Returns: 3401 A `Tensor` that contains the softmax cross entropy loss. Its type is the 3402 same as `logits` and its shape is the same as `labels` except that it does 3403 not have the last dimension of `labels`. 3404 """ 3405 dim = deprecated_argument_lookup("axis", axis, "dim", dim) 3406 _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, labels, 3407 logits) 3408 3409 with ops.name_scope(name, "softmax_cross_entropy_with_logits_sg", 3410 [logits, labels]) as name: 3411 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient") 3412 3413 return softmax_cross_entropy_with_logits_v2( 3414 labels=labels, logits=logits, axis=dim, name=name) 3415 3416 3417@tf_export(v1=["nn.sparse_softmax_cross_entropy_with_logits"]) 3418def sparse_softmax_cross_entropy_with_logits( 3419 _sentinel=None, # pylint: disable=invalid-name 3420 labels=None, 3421 logits=None, 3422 name=None): 3423 """Computes sparse softmax cross entropy between `logits` and `labels`. 3424 3425 Measures the probability error in discrete classification tasks in which the 3426 classes are mutually exclusive (each entry is in exactly one class). For 3427 example, each CIFAR-10 image is labeled with one and only one label: an image 3428 can be a dog or a truck, but not both. 3429 3430 **NOTE:** For this operation, the probability of a given label is considered 3431 exclusive. That is, soft classes are not allowed, and the `labels` vector 3432 must provide a single specific index for the true class for each row of 3433 `logits` (each minibatch entry). For soft softmax classification with 3434 a probability distribution for each entry, see 3435 `softmax_cross_entropy_with_logits_v2`. 3436 3437 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 3438 on `logits` internally for efficiency. Do not call this op with the 3439 output of `softmax`, as it will produce incorrect results. 3440 3441 A common use case is to have logits of shape 3442 `[batch_size, num_classes]` and have labels of shape 3443 `[batch_size]`, but higher dimensions are supported, in which 3444 case the `dim`-th dimension is assumed to be of size `num_classes`. 3445 `logits` must have the dtype of `float16`, `float32`, or `float64`, and 3446 `labels` must have the dtype of `int32` or `int64`. 3447 3448 **Note that to avoid confusion, it is required to pass only named arguments to 3449 this function.** 3450 3451 Args: 3452 _sentinel: Used to prevent positional parameters. Internal, do not use. 3453 labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of 3454 `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` 3455 must be an index in `[0, num_classes)`. Other values will raise an 3456 exception when this op is run on CPU, and return `NaN` for corresponding 3457 loss and gradient rows on GPU. 3458 logits: Per-label activations (typically a linear output) of shape 3459 `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32`, or 3460 `float64`. These activation energies are interpreted as unnormalized log 3461 probabilities. 3462 name: A name for the operation (optional). 3463 3464 Returns: 3465 A `Tensor` of the same shape as `labels` and of the same type as `logits` 3466 with the softmax cross entropy loss. 3467 3468 Raises: 3469 ValueError: If logits are scalars (need to have rank >= 1) or if the rank 3470 of the labels is not equal to the rank of the logits minus one. 3471 """ 3472 _ensure_xent_args("sparse_softmax_cross_entropy_with_logits", _sentinel, 3473 labels, logits) 3474 3475 # TODO(pcmurray) Raise an error when the label is not an index in 3476 # [0, num_classes). Note: This could break users who call this with bad 3477 # labels, but disregard the bad results. 3478 3479 # Reshape logits and labels to rank 2. 3480 with ops.name_scope(name, "SparseSoftmaxCrossEntropyWithLogits", 3481 [labels, logits]): 3482 labels = ops.convert_to_tensor(labels) 3483 logits = ops.convert_to_tensor(logits) 3484 precise_logits = math_ops.cast(logits, dtypes.float32) if (dtypes.as_dtype( 3485 logits.dtype) == dtypes.float16) else logits 3486 3487 # Store label shape for result later. 3488 labels_static_shape = labels.get_shape() 3489 labels_shape = array_ops.shape(labels) 3490 static_shapes_fully_defined = ( 3491 labels_static_shape.is_fully_defined() and 3492 logits.get_shape()[:-1].is_fully_defined()) 3493 if logits.get_shape().ndims is not None and logits.get_shape().ndims == 0: 3494 raise ValueError( 3495 "Logits cannot be scalars - received shape %s." % logits.get_shape()) 3496 if logits.get_shape().ndims is not None and ( 3497 labels_static_shape.ndims is not None and 3498 labels_static_shape.ndims != logits.get_shape().ndims - 1): 3499 raise ValueError("Rank mismatch: Rank of labels (received %s) should " 3500 "equal rank of logits minus 1 (received %s)." % 3501 (labels_static_shape.ndims, logits.get_shape().ndims)) 3502 if (static_shapes_fully_defined and 3503 labels_static_shape != logits.get_shape()[:-1]): 3504 raise ValueError("Shape mismatch: The shape of labels (received %s) " 3505 "should equal the shape of logits except for the last " 3506 "dimension (received %s)." % (labels_static_shape, 3507 logits.get_shape())) 3508 # Check if no reshapes are required. 3509 if logits.get_shape().ndims == 2: 3510 cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( 3511 precise_logits, labels, name=name) 3512 if logits.dtype == dtypes.float16: 3513 return math_ops.cast(cost, dtypes.float16) 3514 else: 3515 return cost 3516 3517 # Perform a check of the dynamic shapes if the static shapes are not fully 3518 # defined. 3519 shape_checks = [] 3520 if not static_shapes_fully_defined: 3521 shape_checks.append( 3522 check_ops.assert_equal( 3523 array_ops.shape(labels), 3524 array_ops.shape(logits)[:-1])) 3525 with ops.control_dependencies(shape_checks): 3526 # Reshape logits to 2 dim, labels to 1 dim. 3527 num_classes = array_ops.shape(logits)[array_ops.rank(logits) - 1] 3528 precise_logits = array_ops.reshape(precise_logits, [-1, num_classes]) 3529 labels = array_ops.reshape(labels, [-1]) 3530 # The second output tensor contains the gradients. We use it in 3531 # _CrossEntropyGrad() in nn_grad but not here. 3532 cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( 3533 precise_logits, labels, name=name) 3534 cost = array_ops.reshape(cost, labels_shape) 3535 cost.set_shape(labels_static_shape) 3536 if logits.dtype == dtypes.float16: 3537 return math_ops.cast(cost, dtypes.float16) 3538 else: 3539 return cost 3540 3541 3542@tf_export("nn.sparse_softmax_cross_entropy_with_logits", v1=[]) 3543def sparse_softmax_cross_entropy_with_logits_v2(labels, logits, name=None): 3544 """Computes sparse softmax cross entropy between `logits` and `labels`. 3545 3546 Measures the probability error in discrete classification tasks in which the 3547 classes are mutually exclusive (each entry is in exactly one class). For 3548 example, each CIFAR-10 image is labeled with one and only one label: an image 3549 can be a dog or a truck, but not both. 3550 3551 **NOTE:** For this operation, the probability of a given label is considered 3552 exclusive. That is, soft classes are not allowed, and the `labels` vector 3553 must provide a single specific index for the true class for each row of 3554 `logits` (each minibatch entry). For soft softmax classification with 3555 a probability distribution for each entry, see 3556 `softmax_cross_entropy_with_logits_v2`. 3557 3558 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 3559 on `logits` internally for efficiency. Do not call this op with the 3560 output of `softmax`, as it will produce incorrect results. 3561 3562 A common use case is to have logits of shape 3563 `[batch_size, num_classes]` and have labels of shape 3564 `[batch_size]`, but higher dimensions are supported, in which 3565 case the `dim`-th dimension is assumed to be of size `num_classes`. 3566 `logits` must have the dtype of `float16`, `float32`, or `float64`, and 3567 `labels` must have the dtype of `int32` or `int64`. 3568 3569 **Note that to avoid confusion, it is required to pass only named arguments to 3570 this function.** 3571 3572 Args: 3573 labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of 3574 `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` 3575 must be an index in `[0, num_classes)`. Other values will raise an 3576 exception when this op is run on CPU, and return `NaN` for corresponding 3577 loss and gradient rows on GPU. 3578 logits: Unscaled log probabilities of shape `[d_0, d_1, ..., d_{r-1}, 3579 num_classes]` and dtype `float16`, `float32`, or `float64`. 3580 name: A name for the operation (optional). 3581 3582 Returns: 3583 A `Tensor` of the same shape as `labels` and of the same type as `logits` 3584 with the softmax cross entropy loss. 3585 3586 Raises: 3587 ValueError: If logits are scalars (need to have rank >= 1) or if the rank 3588 of the labels is not equal to the rank of the logits minus one. 3589 """ 3590 return sparse_softmax_cross_entropy_with_logits( 3591 labels=labels, logits=logits, name=name) 3592 3593 3594@tf_export("nn.avg_pool", v1=["nn.avg_pool_v2"]) 3595def avg_pool_v2(input, ksize, strides, padding, data_format=None, name=None): # pylint: disable=redefined-builtin 3596 """Performs the avg pooling on the input. 3597 3598 Each entry in `output` is the mean of the corresponding size `ksize` 3599 window in `value`. 3600 3601 Args: 3602 input: Tensor of rank N+2, of shape `[batch_size] + input_spatial_shape + 3603 [num_channels]` if `data_format` does not start with "NC" (default), or 3604 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 3605 with "NC". Pooling happens over the spatial dimensions only. 3606 ksize: An int or list of `ints` that has length `1`, `N` or `N+2`. The size 3607 of the window for each dimension of the input tensor. 3608 strides: An int or list of `ints` that has length `1`, `N` or `N+2`. The 3609 stride of the sliding window for each dimension of the input tensor. 3610 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3611 the "returns" section of `tf.nn.convolution` for details. 3612 data_format: A string. Specifies the channel dimension. For N=1 it can be 3613 either "NWC" (default) or "NCW", for N=2 it can be either "NHWC" (default) 3614 or "NCHW" and for N=3 either "NDHWC" (default) or "NCDHW". 3615 name: Optional name for the operation. 3616 3617 Returns: 3618 A `Tensor` of format specified by `data_format`. 3619 The average pooled output tensor. 3620 """ 3621 if input.shape is not None: 3622 n = len(input.shape) - 2 3623 elif data_format is not None: 3624 n = len(data_format) - 2 3625 else: 3626 raise ValueError( 3627 "The input must have a rank or a data format must be given.") 3628 if not 1 <= n <= 3: 3629 raise ValueError( 3630 "Input tensor must be of rank 3, 4 or 5 but was {}.".format(n + 2)) 3631 3632 if data_format is None: 3633 channel_index = n + 1 3634 else: 3635 channel_index = 1 if data_format.startswith("NC") else n + 1 3636 3637 ksize = _get_sequence(ksize, n, channel_index, "ksize") 3638 strides = _get_sequence(strides, n, channel_index, "strides") 3639 3640 avg_pooling_ops = { 3641 1: avg_pool1d, 3642 2: gen_nn_ops.avg_pool, 3643 3: gen_nn_ops.avg_pool3d 3644 } 3645 3646 op = avg_pooling_ops[n] 3647 return op( 3648 input, 3649 ksize=ksize, 3650 strides=strides, 3651 padding=padding, 3652 data_format=data_format, 3653 name=name) 3654 3655 3656@tf_export(v1=["nn.avg_pool", "nn.avg_pool2d"]) 3657def avg_pool(value, ksize, strides, padding, data_format="NHWC", 3658 name=None, input=None): # pylint: disable=redefined-builtin 3659 """Performs the average pooling on the input. 3660 3661 Each entry in `output` is the mean of the corresponding size `ksize` 3662 window in `value`. 3663 3664 Args: 3665 value: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type 3666 `float32`, `float64`, `qint8`, `quint8`, or `qint32`. 3667 ksize: An int or list of `ints` that has length `1`, `2` or `4`. The size of 3668 the window for each dimension of the input tensor. 3669 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 3670 stride of the sliding window for each dimension of the input tensor. 3671 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 3672 See the "returns" section of `tf.nn.convolution` for details. 3673 data_format: A string. 'NHWC' and 'NCHW' are supported. 3674 name: Optional name for the operation. 3675 input: Alias for value. 3676 3677 Returns: 3678 A `Tensor` with the same type as `value`. The average pooled output tensor. 3679 """ 3680 with ops.name_scope(name, "AvgPool", [value]) as name: 3681 value = deprecation.deprecated_argument_lookup( 3682 "input", input, "value", value) 3683 3684 if data_format is None: 3685 data_format = "NHWC" 3686 channel_index = 1 if data_format.startswith("NC") else 3 3687 3688 ksize = _get_sequence(ksize, 2, channel_index, "ksize") 3689 strides = _get_sequence(strides, 2, channel_index, "strides") 3690 3691 return gen_nn_ops.avg_pool( 3692 value, 3693 ksize=ksize, 3694 strides=strides, 3695 padding=padding, 3696 data_format=data_format, 3697 name=name) 3698 3699 3700@tf_export("nn.avg_pool2d", v1=[]) 3701def avg_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None): # pylint: disable=redefined-builtin 3702 """Performs the average pooling on the input. 3703 3704 Each entry in `output` is the mean of the corresponding size `ksize` 3705 window in `value`. 3706 3707 Args: 3708 input: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type 3709 `float32`, `float64`, `qint8`, `quint8`, or `qint32`. 3710 ksize: An int or list of `ints` that has length `1`, `2` or `4`. The size of 3711 the window for each dimension of the input tensor. 3712 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 3713 stride of the sliding window for each dimension of the input tensor. 3714 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 3715 See the "returns" section of `tf.nn.convolution` for details. 3716 data_format: A string. 'NHWC' and 'NCHW' are supported. 3717 name: Optional name for the operation. 3718 3719 Returns: 3720 A `Tensor` with the same type as `value`. The average pooled output tensor. 3721 """ 3722 with ops.name_scope(name, "AvgPool2D", [input]) as name: 3723 if data_format is None: 3724 data_format = "NHWC" 3725 channel_index = 1 if data_format.startswith("NC") else 3 3726 3727 ksize = _get_sequence(ksize, 2, channel_index, "ksize") 3728 strides = _get_sequence(strides, 2, channel_index, "strides") 3729 3730 return gen_nn_ops.avg_pool( 3731 input, 3732 ksize=ksize, 3733 strides=strides, 3734 padding=padding, 3735 data_format=data_format, 3736 name=name) 3737 3738 3739@tf_export("nn.avg_pool1d") 3740def avg_pool1d(input, ksize, strides, padding, data_format="NWC", name=None): # pylint: disable=redefined-builtin 3741 """Performs the average pooling on the input. 3742 3743 Each entry in `output` is the mean of the corresponding size `ksize` 3744 window in `value`. 3745 3746 Note internally this op reshapes and uses the underlying 2d operation. 3747 3748 Args: 3749 input: A 3-D `Tensor` of the format specified by `data_format`. 3750 ksize: An int or list of `ints` that has length `1` or `3`. The size of the 3751 window for each dimension of the input tensor. 3752 strides: An int or list of `ints` that has length `1` or `3`. The stride of 3753 the sliding window for each dimension of the input tensor. 3754 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3755 the "returns" section of `tf.nn.convolution` for details. 3756 data_format: An optional string from: "NWC", "NCW". Defaults to "NWC". 3757 name: A name for the operation (optional). 3758 3759 Returns: 3760 A `Tensor` of format specified by `data_format`. 3761 The max pooled output tensor. 3762 """ 3763 with ops.name_scope(name, "AvgPool1D", [input]) as name: 3764 if data_format is None: 3765 data_format = "NWC" 3766 channel_index = 1 if data_format.startswith("NC") else 2 3767 ksize = [1] + _get_sequence(ksize, 1, channel_index, "ksize") 3768 strides = [1] + _get_sequence(strides, 1, channel_index, "strides") 3769 3770 expanding_dim = 1 if data_format == "NWC" else 2 3771 data_format = "NHWC" if data_format == "NWC" else "NCHW" 3772 3773 input = array_ops.expand_dims_v2(input, expanding_dim) 3774 result = gen_nn_ops.avg_pool( 3775 input, 3776 ksize=ksize, 3777 strides=strides, 3778 padding=padding, 3779 data_format=data_format, 3780 name=name) 3781 return array_ops.squeeze(result, expanding_dim) 3782 3783 3784@tf_export("nn.avg_pool3d") 3785def avg_pool3d(input, ksize, strides, padding, data_format="NDHWC", name=None): # pylint: disable=redefined-builtin 3786 """Performs the average pooling on the input. 3787 3788 Each entry in `output` is the mean of the corresponding size `ksize` 3789 window in `value`. 3790 3791 Args: 3792 input: A 5-D `Tensor` of shape `[batch, height, width, channels]` and type 3793 `float32`, `float64`, `qint8`, `quint8`, or `qint32`. 3794 ksize: An int or list of `ints` that has length `1`, `3` or `5`. The size of 3795 the window for each dimension of the input tensor. 3796 strides: An int or list of `ints` that has length `1`, `3` or `5`. The 3797 stride of the sliding window for each dimension of the input tensor. 3798 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 3799 See the "returns" section of `tf.nn.convolution` for details. 3800 data_format: A string. 'NDHWC' and 'NCDHW' are supported. 3801 name: Optional name for the operation. 3802 3803 Returns: 3804 A `Tensor` with the same type as `value`. The average pooled output tensor. 3805 """ 3806 with ops.name_scope(name, "AvgPool3D", [input]) as name: 3807 if data_format is None: 3808 data_format = "NDHWC" 3809 channel_index = 1 if data_format.startswith("NC") else 3 3810 3811 ksize = _get_sequence(ksize, 3, channel_index, "ksize") 3812 strides = _get_sequence(strides, 3, channel_index, "strides") 3813 3814 return gen_nn_ops.avg_pool3d( 3815 input, 3816 ksize=ksize, 3817 strides=strides, 3818 padding=padding, 3819 data_format=data_format, 3820 name=name) 3821 3822 3823# pylint: disable=redefined-builtin 3824@tf_export("nn.max_pool", v1=["nn.max_pool_v2"]) 3825def max_pool_v2(input, ksize, strides, padding, data_format=None, name=None): 3826 """Performs the max pooling on the input. 3827 3828 Args: 3829 input: Tensor of rank N+2, of shape `[batch_size] + input_spatial_shape + 3830 [num_channels]` if `data_format` does not start with "NC" (default), or 3831 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 3832 with "NC". Pooling happens over the spatial dimensions only. 3833 ksize: An int or list of `ints` that has length `1`, `N` or `N+2`. The size 3834 of the window for each dimension of the input tensor. 3835 strides: An int or list of `ints` that has length `1`, `N` or `N+2`. The 3836 stride of the sliding window for each dimension of the input tensor. 3837 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3838 the "returns" section of `tf.nn.convolution` for details. 3839 data_format: A string. Specifies the channel dimension. For N=1 it can be 3840 either "NWC" (default) or "NCW", for N=2 it can be either "NHWC" (default) 3841 or "NCHW" and for N=3 either "NDHWC" (default) or "NCDHW". 3842 name: Optional name for the operation. 3843 3844 Returns: 3845 A `Tensor` of format specified by `data_format`. 3846 The max pooled output tensor. 3847 """ 3848 if input.shape is not None: 3849 n = len(input.shape) - 2 3850 elif data_format is not None: 3851 n = len(data_format) - 2 3852 else: 3853 raise ValueError( 3854 "The input must have a rank or a data format must be given.") 3855 if not 1 <= n <= 3: 3856 raise ValueError( 3857 "Input tensor must be of rank 3, 4 or 5 but was {}.".format(n + 2)) 3858 3859 if data_format is None: 3860 channel_index = n + 1 3861 else: 3862 channel_index = 1 if data_format.startswith("NC") else n + 1 3863 3864 ksize = _get_sequence(ksize, n, channel_index, "ksize") 3865 strides = _get_sequence(strides, n, channel_index, "strides") 3866 3867 max_pooling_ops = { 3868 1: max_pool1d, 3869 2: gen_nn_ops.max_pool, 3870 3: gen_nn_ops.max_pool3d 3871 } 3872 3873 op = max_pooling_ops[n] 3874 return op( 3875 input, 3876 ksize=ksize, 3877 strides=strides, 3878 padding=padding, 3879 data_format=data_format, 3880 name=name) 3881# pylint: enable=redefined-builtin 3882 3883 3884@tf_export(v1=["nn.max_pool"]) 3885def max_pool(value, 3886 ksize, 3887 strides, 3888 padding, 3889 data_format="NHWC", 3890 name=None, 3891 input=None): # pylint: disable=redefined-builtin 3892 """Performs the max pooling on the input. 3893 3894 Args: 3895 value: A 4-D `Tensor` of the format specified by `data_format`. 3896 ksize: An int or list of `ints` that has length `1`, `2` or `4`. 3897 The size of the window for each dimension of the input tensor. 3898 strides: An int or list of `ints` that has length `1`, `2` or `4`. 3899 The stride of the sliding window for each dimension of the input tensor. 3900 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 3901 See the "returns" section of `tf.nn.convolution` for details. 3902 data_format: A string. 'NHWC', 'NCHW' and 'NCHW_VECT_C' are supported. 3903 name: Optional name for the operation. 3904 input: Alias for value. 3905 3906 Returns: 3907 A `Tensor` of format specified by `data_format`. 3908 The max pooled output tensor. 3909 """ 3910 value = deprecation.deprecated_argument_lookup("input", input, "value", value) 3911 with ops.name_scope(name, "MaxPool", [value]) as name: 3912 if data_format is None: 3913 data_format = "NHWC" 3914 channel_index = 1 if data_format.startswith("NC") else 3 3915 3916 ksize = _get_sequence(ksize, 2, channel_index, "ksize") 3917 strides = _get_sequence(strides, 2, channel_index, "strides") 3918 if ((np.isscalar(ksize) and ksize == 0) or 3919 (isinstance(ksize, 3920 (list, tuple, np.ndarray)) and any(v == 0 for v in ksize))): 3921 raise ValueError("ksize cannot be zero.") 3922 3923 return gen_nn_ops.max_pool( 3924 value, 3925 ksize=ksize, 3926 strides=strides, 3927 padding=padding, 3928 data_format=data_format, 3929 name=name) 3930 3931 3932# pylint: disable=redefined-builtin 3933@tf_export("nn.max_pool1d") 3934def max_pool1d(input, ksize, strides, padding, data_format="NWC", name=None): 3935 """Performs the max pooling on the input. 3936 3937 Note internally this op reshapes and uses the underlying 2d operation. 3938 3939 Args: 3940 input: A 3-D `Tensor` of the format specified by `data_format`. 3941 ksize: An int or list of `ints` that has length `1` or `3`. The size of the 3942 window for each dimension of the input tensor. 3943 strides: An int or list of `ints` that has length `1` or `3`. The stride of 3944 the sliding window for each dimension of the input tensor. 3945 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3946 the "returns" section of `tf.nn.convolution` for details. 3947 data_format: An optional string from: "NWC", "NCW". Defaults to "NWC". 3948 name: A name for the operation (optional). 3949 3950 Returns: 3951 A `Tensor` of format specified by `data_format`. 3952 The max pooled output tensor. 3953 """ 3954 with ops.name_scope(name, "MaxPool1d", [input]) as name: 3955 if data_format is None: 3956 data_format = "NWC" 3957 channel_index = 1 if data_format.startswith("NC") else 2 3958 ksize = [1] + _get_sequence(ksize, 1, channel_index, "ksize") 3959 strides = [1] + _get_sequence(strides, 1, channel_index, "strides") 3960 3961 expanding_dim = 1 if data_format == "NWC" else 2 3962 data_format = "NHWC" if data_format == "NWC" else "NCHW" 3963 3964 input = array_ops.expand_dims_v2(input, expanding_dim) 3965 result = gen_nn_ops.max_pool( 3966 input, 3967 ksize=ksize, 3968 strides=strides, 3969 padding=padding, 3970 data_format=data_format, 3971 name=name) 3972 return array_ops.squeeze(result, expanding_dim) 3973# pylint: enable=redefined-builtin 3974 3975 3976# pylint: disable=redefined-builtin 3977@tf_export("nn.max_pool2d") 3978def max_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None): 3979 """Performs the max pooling on the input. 3980 3981 Args: 3982 input: A 4-D `Tensor` of the format specified by `data_format`. 3983 ksize: An int or list of `ints` that has length `1`, `2` or `4`. The size of 3984 the window for each dimension of the input tensor. 3985 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 3986 stride of the sliding window for each dimension of the input tensor. 3987 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3988 the "returns" section of `tf.nn.convolution` for details. 3989 data_format: A string. 'NHWC', 'NCHW' and 'NCHW_VECT_C' are supported. 3990 name: Optional name for the operation. 3991 3992 Returns: 3993 A `Tensor` of format specified by `data_format`. 3994 The max pooled output tensor. 3995 """ 3996 with ops.name_scope(name, "MaxPool2d", [input]) as name: 3997 if data_format is None: 3998 data_format = "NHWC" 3999 channel_index = 1 if data_format.startswith("NC") else 3 4000 4001 ksize = _get_sequence(ksize, 2, channel_index, "ksize") 4002 strides = _get_sequence(strides, 2, channel_index, "strides") 4003 4004 return gen_nn_ops.max_pool( 4005 input, 4006 ksize=ksize, 4007 strides=strides, 4008 padding=padding, 4009 data_format=data_format, 4010 name=name) 4011# pylint: enable=redefined-builtin 4012 4013 4014# pylint: disable=redefined-builtin 4015@tf_export("nn.max_pool3d") 4016def max_pool3d(input, ksize, strides, padding, data_format="NDHWC", name=None): 4017 """Performs the max pooling on the input. 4018 4019 Args: 4020 input: A 5-D `Tensor` of the format specified by `data_format`. 4021 ksize: An int or list of `ints` that has length `1`, `3` or `5`. The size of 4022 the window for each dimension of the input tensor. 4023 strides: An int or list of `ints` that has length `1`, `3` or `5`. The 4024 stride of the sliding window for each dimension of the input tensor. 4025 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 4026 the "returns" section of `tf.nn.convolution` for details. 4027 data_format: An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC". 4028 The data format of the input and output data. With the default format 4029 "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, 4030 in_width, in_channels]. Alternatively, the format could be "NCDHW", the 4031 data storage order is: [batch, in_channels, in_depth, in_height, 4032 in_width]. 4033 name: A name for the operation (optional). 4034 4035 Returns: 4036 A `Tensor` of format specified by `data_format`. 4037 The max pooled output tensor. 4038 """ 4039 with ops.name_scope(name, "MaxPool3D", [input]) as name: 4040 if data_format is None: 4041 data_format = "NDHWC" 4042 channel_index = 1 if data_format.startswith("NC") else 4 4043 4044 ksize = _get_sequence(ksize, 3, channel_index, "ksize") 4045 strides = _get_sequence(strides, 3, channel_index, "strides") 4046 4047 return gen_nn_ops.max_pool3d( 4048 input, 4049 ksize=ksize, 4050 strides=strides, 4051 padding=padding, 4052 data_format=data_format, 4053 name=name) 4054# pylint: enable=redefined-builtin 4055 4056 4057@tf_export("nn.max_pool_with_argmax", v1=[]) 4058def max_pool_with_argmax_v2( 4059 input, # pylint: disable=redefined-builtin 4060 ksize, 4061 strides, 4062 padding, 4063 data_format="NHWC", 4064 output_dtype=dtypes.int64, 4065 include_batch_in_index=False, 4066 name=None): 4067 """Performs max pooling on the input and outputs both max values and indices. 4068 4069 The indices in `argmax` are flattened, so that a maximum value at position 4070 `[b, y, x, c]` becomes flattened index: `(y * width + x) * channels + c` if 4071 `include_batch_in_index` is False; 4072 `((b * height + y) * width + x) * channels + c` 4073 if `include_batch_in_index` is True. 4074 4075 The indices returned are always in `[0, height) x [0, width)` before 4076 flattening, even if padding is involved and the mathematically correct answer 4077 is outside (either negative or too large). This is a bug, but fixing it is 4078 difficult to do in a safe backwards compatible way, especially due to 4079 flattening. 4080 4081 Args: 4082 input: A `Tensor`. Must be one of the following types: `float32`, `float64`, 4083 `int32`, `uint8`, `int16`, `int8`, `int64`, `bfloat16`, `uint16`, `half`, 4084 `uint32`, `uint64`. 4085 4-D with shape `[batch, height, width, channels]`. Input to pool over. 4086 ksize: An int or list of `ints` that has length `1`, `2` or `4`. 4087 The size of the window for each dimension of the input tensor. 4088 strides: An int or list of `ints` that has length `1`, `2` or `4`. 4089 The stride of the sliding window for each dimension of the 4090 input tensor. 4091 padding: A `string` from: `"SAME", "VALID"`. 4092 The type of padding algorithm to use. 4093 data_format: An optional `string`, must be set to `"NHWC"`. Defaults to 4094 `"NHWC"`. 4095 Specify the data format of the input and output data. 4096 output_dtype: An optional `tf.DType` from: `tf.int32, tf.int64`. 4097 Defaults to `tf.int64`. 4098 The dtype of the returned argmax tensor. 4099 include_batch_in_index: An optional `boolean`. Defaults to `False`. 4100 Whether to include batch dimension in flattened index of `argmax`. 4101 name: A name for the operation (optional). 4102 4103 Returns: 4104 A tuple of `Tensor` objects (output, argmax). 4105 4106 output: A `Tensor`. Has the same type as `input`. 4107 argmax: A `Tensor` of type `output_dtype`. 4108 """ 4109 4110 if data_format != "NHWC": 4111 raise ValueError("Data formats other than 'NHWC' are not yet supported") 4112 4113 ksize = _get_sequence(ksize, 2, 3, "ksize") 4114 strides = _get_sequence(strides, 2, 3, "strides") 4115 4116 return gen_nn_ops.max_pool_with_argmax( 4117 input=input, 4118 ksize=ksize, 4119 strides=strides, 4120 padding=padding, 4121 Targmax=output_dtype, 4122 include_batch_in_index=include_batch_in_index, 4123 name=name) 4124 4125 4126@tf_export(v1=["nn.max_pool_with_argmax"]) 4127def max_pool_with_argmax_v1( # pylint: disable=missing-docstring,invalid-name 4128 input, # pylint: disable=redefined-builtin 4129 ksize, 4130 strides, 4131 padding, 4132 data_format="NHWC", 4133 Targmax=None, 4134 name=None, 4135 output_dtype=None, 4136 include_batch_in_index=False): 4137 if data_format != "NHWC": 4138 raise ValueError("Data formats other than 'NHWC' are not yet supported") 4139 4140 Targmax = deprecated_argument_lookup( 4141 "output_dtype", output_dtype, "Targmax", Targmax) 4142 if Targmax is None: 4143 Targmax = dtypes.int64 4144 return gen_nn_ops.max_pool_with_argmax( 4145 input=input, 4146 ksize=ksize, 4147 strides=strides, 4148 padding=padding, 4149 Targmax=Targmax, 4150 include_batch_in_index=include_batch_in_index, 4151 name=name) 4152 4153 4154max_pool_with_argmax_v1.__doc__ = gen_nn_ops.max_pool_with_argmax.__doc__ 4155 4156 4157@ops.RegisterStatistics("Conv3D", "flops") 4158def _calc_conv3d_flops(graph, node): 4159 """Calculates the compute resources needed for Conv3D.""" 4160 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 4161 input_shape.assert_is_fully_defined() 4162 filter_shape = graph_util.tensor_shape_from_node_def_name( 4163 graph, node.input[1]) 4164 filter_shape.assert_is_fully_defined() 4165 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 4166 output_shape.assert_is_fully_defined() 4167 filter_time = int(filter_shape[0]) 4168 filter_height = int(filter_shape[1]) 4169 filter_width = int(filter_shape[2]) 4170 filter_in_depth = int(filter_shape[3]) 4171 output_count = np.prod(output_shape.as_list(), dtype=np.int64) 4172 return ops.OpStats("flops", (output_count * filter_in_depth * filter_time * 4173 filter_height * filter_width * 2)) 4174 4175 4176@ops.RegisterStatistics("Conv2D", "flops") 4177def _calc_conv_flops(graph, node): 4178 """Calculates the compute resources needed for Conv2D.""" 4179 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 4180 input_shape.assert_is_fully_defined() 4181 filter_shape = graph_util.tensor_shape_from_node_def_name( 4182 graph, node.input[1]) 4183 filter_shape.assert_is_fully_defined() 4184 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 4185 output_shape.assert_is_fully_defined() 4186 filter_height = int(filter_shape[0]) 4187 filter_width = int(filter_shape[1]) 4188 filter_in_depth = int(filter_shape[2]) 4189 output_count = np.prod(output_shape.as_list(), dtype=np.int64) 4190 return ops.OpStats( 4191 "flops", 4192 (output_count * filter_in_depth * filter_height * filter_width * 2)) 4193 4194 4195@ops.RegisterStatistics("DepthwiseConv2dNative", "flops") 4196def _calc_depthwise_conv_flops(graph, node): 4197 """Calculates the compute resources needed for DepthwiseConv2dNative.""" 4198 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 4199 input_shape.assert_is_fully_defined() 4200 filter_shape = graph_util.tensor_shape_from_node_def_name( 4201 graph, node.input[1]) 4202 filter_shape.assert_is_fully_defined() 4203 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 4204 output_shape.assert_is_fully_defined() 4205 filter_height = int(filter_shape[0]) 4206 filter_width = int(filter_shape[1]) 4207 output_count = np.prod(output_shape.as_list(), dtype=np.int64) 4208 return ops.OpStats("flops", (output_count * filter_height * filter_width * 2)) 4209 4210 4211@ops.RegisterStatistics("BiasAdd", "flops") 4212def _calc_bias_add_flops(graph, node): 4213 """Calculates the computing needed for BiasAdd.""" 4214 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 4215 input_shape.assert_is_fully_defined() 4216 input_count = np.prod(input_shape.as_list()) 4217 return ops.OpStats("flops", input_count) 4218 4219 4220@tf_export(v1=["nn.xw_plus_b"]) 4221def xw_plus_b(x, weights, biases, name=None): # pylint: disable=invalid-name 4222 """Computes matmul(x, weights) + biases. 4223 4224 Args: 4225 x: a 2D tensor. Dimensions typically: batch, in_units 4226 weights: a 2D tensor. Dimensions typically: in_units, out_units 4227 biases: a 1D tensor. Dimensions: out_units 4228 name: A name for the operation (optional). If not specified 4229 "xw_plus_b" is used. 4230 4231 Returns: 4232 A 2-D Tensor computing matmul(x, weights) + biases. 4233 Dimensions typically: batch, out_units. 4234 """ 4235 with ops.name_scope(name, "xw_plus_b", [x, weights, biases]) as name: 4236 x = ops.convert_to_tensor(x, name="x") 4237 weights = ops.convert_to_tensor(weights, name="weights") 4238 biases = ops.convert_to_tensor(biases, name="biases") 4239 mm = math_ops.matmul(x, weights) 4240 return bias_add(mm, biases, name=name) 4241 4242 4243def xw_plus_b_v1(x, weights, biases, name=None): 4244 """Computes matmul(x, weights) + biases. 4245 4246 This is a deprecated version of that will soon be removed. 4247 4248 Args: 4249 x: a 2D tensor. Dimensions typically: batch, in_units 4250 weights: a 2D tensor. Dimensions typically: in_units, out_units 4251 biases: a 1D tensor. Dimensions: out_units 4252 name: A name for the operation (optional). If not specified 4253 "xw_plus_b_v1" is used. 4254 4255 Returns: 4256 A 2-D Tensor computing matmul(x, weights) + biases. 4257 Dimensions typically: batch, out_units. 4258 """ 4259 with ops.name_scope(name, "xw_plus_b_v1", [x, weights, biases]) as name: 4260 x = ops.convert_to_tensor(x, name="x") 4261 weights = ops.convert_to_tensor(weights, name="weights") 4262 biases = ops.convert_to_tensor(biases, name="biases") 4263 mm = math_ops.matmul(x, weights) 4264 return bias_add_v1(mm, biases, name=name) 4265 4266 4267def _get_noise_shape(x, noise_shape): 4268 # If noise_shape is none return immediately. 4269 if noise_shape is None: 4270 return array_ops.shape(x) 4271 4272 try: 4273 # Best effort to figure out the intended shape. 4274 # If not possible, let the op to handle it. 4275 # In eager mode exception will show up. 4276 noise_shape_ = tensor_shape.as_shape(noise_shape) 4277 except (TypeError, ValueError): 4278 return noise_shape 4279 4280 if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_.dims): 4281 new_dims = [] 4282 for i, dim in enumerate(x.shape.dims): 4283 if noise_shape_.dims[i].value is None and dim.value is not None: 4284 new_dims.append(dim.value) 4285 else: 4286 new_dims.append(noise_shape_.dims[i].value) 4287 return tensor_shape.TensorShape(new_dims) 4288 4289 return noise_shape 4290 4291 4292@tf_export(v1=["nn.dropout"]) 4293@deprecation.deprecated_args(None, "Please use `rate` instead of `keep_prob`. " 4294 "Rate should be set to `rate = 1 - keep_prob`.", 4295 "keep_prob") 4296def dropout(x, keep_prob=None, noise_shape=None, seed=None, name=None, 4297 rate=None): 4298 """Computes dropout. 4299 4300 For each element of `x`, with probability `rate`, outputs `0`, and otherwise 4301 scales up the input by `1 / (1-rate)`. The scaling is such that the expected 4302 sum is unchanged. 4303 4304 By default, each element is kept or dropped independently. If `noise_shape` 4305 is specified, it must be 4306 [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) 4307 to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]` 4308 will make independent decisions. For example, if `shape(x) = [k, l, m, n]` 4309 and `noise_shape = [k, 1, 1, n]`, each batch and channel component will be 4310 kept independently and each row and column will be kept or not kept together. 4311 4312 Args: 4313 x: A floating point tensor. 4314 keep_prob: (deprecated) A deprecated alias for `(1-rate)`. 4315 noise_shape: A 1-D `Tensor` of type `int32`, representing the 4316 shape for randomly generated keep/drop flags. 4317 seed: A Python integer. Used to create random seeds. See 4318 `tf.random.set_seed` for behavior. 4319 name: A name for this operation (optional). 4320 rate: A scalar `Tensor` with the same type as `x`. The probability that each 4321 element of `x` is discarded. 4322 4323 Returns: 4324 A Tensor of the same shape of `x`. 4325 4326 Raises: 4327 ValueError: If `rate` is not in `[0, 1)` or if `x` is not a floating 4328 point tensor. 4329 """ 4330 try: 4331 keep = 1. - keep_prob if keep_prob is not None else None 4332 except TypeError: 4333 raise ValueError("keep_prob must be a floating point number or Tensor " 4334 "(got %r)" % keep_prob) 4335 4336 rate = deprecation.deprecated_argument_lookup( 4337 "rate", rate, 4338 "keep_prob", keep) 4339 4340 if rate is None: 4341 raise ValueError("You must provide a rate to dropout.") 4342 4343 return dropout_v2(x, rate, noise_shape=noise_shape, seed=seed, name=name) 4344 4345 4346@tf_export("nn.dropout", v1=[]) 4347def dropout_v2(x, rate, noise_shape=None, seed=None, name=None): 4348 """Computes dropout: randomly sets elements to zero to prevent overfitting. 4349 4350 Note: The behavior of dropout has changed between TensorFlow 1.x and 2.x. 4351 When converting 1.x code, please use named arguments to ensure behavior stays 4352 consistent. 4353 4354 See also: `tf.keras.layers.Dropout` for a dropout layer. 4355 4356 [Dropout](https://arxiv.org/abs/1207.0580) is useful for regularizing DNN 4357 models. Inputs elements are randomly set to zero (and the other elements are 4358 rescaled). This encourages each node to be independently useful, as it cannot 4359 rely on the output of other nodes. 4360 4361 More precisely: With probability `rate` elements of `x` are set to `0`. 4362 The remaining elemenst are scaled up by `1.0 / (1 - rate)`, so that the 4363 expected value is preserved. 4364 4365 >>> tf.random.set_seed(0) 4366 >>> x = tf.ones([3,5]) 4367 >>> tf.nn.dropout(x, rate = 0.5, seed = 1).numpy() 4368 array([[2., 0., 0., 2., 2.], 4369 [2., 2., 2., 2., 2.], 4370 [2., 0., 2., 0., 2.]], dtype=float32) 4371 4372 >>> tf.random.set_seed(0) 4373 >>> x = tf.ones([3,5]) 4374 >>> tf.nn.dropout(x, rate = 0.8, seed = 1).numpy() 4375 array([[0., 0., 0., 5., 5.], 4376 [0., 5., 0., 5., 0.], 4377 [5., 0., 5., 0., 5.]], dtype=float32) 4378 4379 >>> tf.nn.dropout(x, rate = 0.0) == x 4380 <tf.Tensor: shape=(3, 5), dtype=bool, numpy= 4381 array([[ True, True, True, True, True], 4382 [ True, True, True, True, True], 4383 [ True, True, True, True, True]])> 4384 4385 4386 By default, each element is kept or dropped independently. If `noise_shape` 4387 is specified, it must be 4388 [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) 4389 to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]` 4390 will make independent decisions. This is useful for dropping whole 4391 channels from an image or sequence. For example: 4392 4393 >>> tf.random.set_seed(0) 4394 >>> x = tf.ones([3,10]) 4395 >>> tf.nn.dropout(x, rate = 2/3, noise_shape=[1,10], seed=1).numpy() 4396 array([[0., 0., 0., 3., 3., 0., 3., 3., 3., 0.], 4397 [0., 0., 0., 3., 3., 0., 3., 3., 3., 0.], 4398 [0., 0., 0., 3., 3., 0., 3., 3., 3., 0.]], dtype=float32) 4399 4400 Args: 4401 x: A floating point tensor. 4402 rate: A scalar `Tensor` with the same type as x. The probability 4403 that each element is dropped. For example, setting rate=0.1 would drop 4404 10% of input elements. 4405 noise_shape: A 1-D `Tensor` of type `int32`, representing the 4406 shape for randomly generated keep/drop flags. 4407 seed: A Python integer. Used to create random seeds. See 4408 `tf.random.set_seed` for behavior. 4409 name: A name for this operation (optional). 4410 4411 Returns: 4412 A Tensor of the same shape of `x`. 4413 4414 Raises: 4415 ValueError: If `rate` is not in `[0, 1)` or if `x` is not a floating point 4416 tensor. `rate=1` is disallowed, because theoutput would be all zeros, 4417 which is likely not what was intended. 4418 """ 4419 with ops.name_scope(name, "dropout", [x]) as name: 4420 is_rate_number = isinstance(rate, numbers.Real) 4421 if is_rate_number and (rate < 0 or rate >= 1): 4422 raise ValueError("rate must be a scalar tensor or a float in the " 4423 "range [0, 1), got %g" % rate) 4424 x = ops.convert_to_tensor(x, name="x") 4425 x_dtype = x.dtype 4426 if not x_dtype.is_floating: 4427 raise ValueError("x has to be a floating point tensor since it's going " 4428 "to be scaled. Got a %s tensor instead." % x_dtype) 4429 is_executing_eagerly = context.executing_eagerly() 4430 if not tensor_util.is_tensor(rate): 4431 if is_rate_number: 4432 keep_prob = 1 - rate 4433 scale = 1 / keep_prob 4434 scale = ops.convert_to_tensor(scale, dtype=x_dtype) 4435 ret = gen_math_ops.mul(x, scale) 4436 else: 4437 raise ValueError("rate is neither scalar nor scalar tensor %r" % rate) 4438 else: 4439 rate.get_shape().assert_has_rank(0) 4440 rate_dtype = rate.dtype 4441 if rate_dtype != x_dtype: 4442 if not rate_dtype.is_compatible_with(x_dtype): 4443 raise ValueError( 4444 "Tensor dtype %s is incomptaible with Tensor dtype %s: %r" % 4445 (x_dtype.name, rate_dtype.name, rate)) 4446 rate = gen_math_ops.cast(rate, x_dtype, name="rate") 4447 one_tensor = constant_op.constant(1, dtype=x_dtype) 4448 ret = gen_math_ops.real_div(x, gen_math_ops.sub(one_tensor, rate)) 4449 4450 noise_shape = _get_noise_shape(x, noise_shape) 4451 # Sample a uniform distribution on [0.0, 1.0) and select values larger 4452 # than rate. 4453 # 4454 # NOTE: Random uniform can only generate 2^23 floats on [1.0, 2.0) 4455 # and subtract 1.0. 4456 random_tensor = random_ops.random_uniform( 4457 noise_shape, seed=seed, dtype=x_dtype) 4458 # NOTE: if (1.0 + rate) - 1 is equal to rate, then that float is selected, 4459 # hence a >= comparison is used. 4460 keep_mask = random_tensor >= rate 4461 ret = gen_math_ops.mul(ret, gen_math_ops.cast(keep_mask, x_dtype)) 4462 if not is_executing_eagerly: 4463 ret.set_shape(x.get_shape()) 4464 return ret 4465 4466 4467@tf_export("math.top_k", "nn.top_k") 4468def top_k(input, k=1, sorted=True, name=None): # pylint: disable=redefined-builtin 4469 """Finds values and indices of the `k` largest entries for the last dimension. 4470 4471 If the input is a vector (rank=1), finds the `k` largest entries in the vector 4472 and outputs their values and indices as vectors. Thus `values[j]` is the 4473 `j`-th largest entry in `input`, and its index is `indices[j]`. 4474 4475 For matrices (resp. higher rank input), computes the top `k` entries in each 4476 row (resp. vector along the last dimension). Thus, 4477 4478 values.shape = indices.shape = input.shape[:-1] + [k] 4479 4480 If two elements are equal, the lower-index element appears first. 4481 4482 Args: 4483 input: 1-D or higher `Tensor` with last dimension at least `k`. 4484 k: 0-D `int32` `Tensor`. Number of top elements to look for along the last 4485 dimension (along each row for matrices). 4486 sorted: If true the resulting `k` elements will be sorted by the values in 4487 descending order. 4488 name: Optional name for the operation. 4489 4490 Returns: 4491 values: The `k` largest elements along each last dimensional slice. 4492 indices: The indices of `values` within the last dimension of `input`. 4493 """ 4494 return gen_nn_ops.top_kv2(input, k=k, sorted=sorted, name=name) 4495 4496 4497def nth_element(input, n, reverse=False, name=None): # pylint: disable=redefined-builtin 4498 r"""Finds values of the `n`-th smallest value for the last dimension. 4499 4500 Note that n is zero-indexed. 4501 4502 If the input is a vector (rank-1), finds the entries which is the nth-smallest 4503 value in the vector and outputs their values as scalar tensor. 4504 4505 For matrices (resp. higher rank input), computes the entries which is the 4506 nth-smallest value in each row (resp. vector along the last dimension). Thus, 4507 4508 values.shape = input.shape[:-1] 4509 4510 Args: 4511 input: 1-D or higher `Tensor` with last dimension at least `n+1`. 4512 n: A `Tensor` of type `int32`. 4513 0-D. Position of sorted vector to select along the last dimension (along 4514 each row for matrices). Valid range of n is `[0, input.shape[:-1])` 4515 reverse: An optional `bool`. Defaults to `False`. 4516 When set to True, find the nth-largest value in the vector and vice 4517 versa. 4518 name: A name for the operation (optional). 4519 4520 Returns: 4521 A `Tensor`. Has the same type as `input`. 4522 The `n`-th order statistic along each last dimensional slice. 4523 """ 4524 return gen_nn_ops.nth_element(input, n, reverse=reverse, name=name) 4525 4526 4527@tf_export(v1=["nn.fractional_max_pool"]) 4528@deprecation.deprecated(date=None, instructions="`seed2` and `deterministic` " 4529 "args are deprecated. Use fractional_max_pool_v2.") 4530def fractional_max_pool(value, 4531 pooling_ratio, 4532 pseudo_random=False, 4533 overlapping=False, 4534 deterministic=False, 4535 seed=0, 4536 seed2=0, 4537 name=None): # pylint: disable=redefined-builtin 4538 r"""Performs fractional max pooling on the input. 4539 4540 This is a deprecated version of `fractional_max_pool`. 4541 4542 Fractional max pooling is slightly different than regular max pooling. In 4543 regular max pooling, you downsize an input set by taking the maximum value of 4544 smaller N x N subsections of the set (often 2x2), and try to reduce the set by 4545 a factor of N, where N is an integer. Fractional max pooling, as you might 4546 expect from the word "fractional", means that the overall reduction ratio N 4547 does not have to be an integer. 4548 4549 The sizes of the pooling regions are generated randomly but are fairly 4550 uniform. For example, let's look at the height dimension, and the constraints 4551 on the list of rows that will be pool boundaries. 4552 4553 First we define the following: 4554 4555 1. input_row_length : the number of rows from the input set 4556 2. output_row_length : which will be smaller than the input 4557 3. alpha = input_row_length / output_row_length : our reduction ratio 4558 4. K = floor(alpha) 4559 5. row_pooling_sequence : this is the result list of pool boundary rows 4560 4561 Then, row_pooling_sequence should satisfy: 4562 4563 1. a[0] = 0 : the first value of the sequence is 0 4564 2. a[end] = input_row_length : the last value of the sequence is the size 4565 3. K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size 4566 4. length(row_pooling_sequence) = output_row_length+1 4567 4568 Args: 4569 value: A `Tensor`. 4-D with shape `[batch, height, width, channels]`. 4570 pooling_ratio: A list of `floats` that has length >= 4. Pooling ratio for 4571 each dimension of `value`, currently only supports row and col dimension 4572 and should be >= 1.0. For example, a valid pooling ratio looks like [1.0, 4573 1.44, 1.73, 1.0]. The first and last elements must be 1.0 because we don't 4574 allow pooling on batch and channels dimensions. 1.44 and 1.73 are pooling 4575 ratio on height and width dimensions respectively. 4576 pseudo_random: An optional `bool`. Defaults to `False`. When set to `True`, 4577 generates the pooling sequence in a pseudorandom fashion, otherwise, in a 4578 random fashion. Check (Graham, 2015) for difference between 4579 pseudorandom and random. 4580 overlapping: An optional `bool`. Defaults to `False`. When set to `True`, 4581 it means when pooling, the values at the boundary of adjacent pooling 4582 cells are used by both cells. For example: 4583 `index 0 1 2 3 4` 4584 `value 20 5 16 3 7` 4585 If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used 4586 twice. The result would be [20, 16] for fractional max pooling. 4587 deterministic: An optional `bool`. Deprecated; use `fractional_max_pool_v2` 4588 instead. 4589 seed: An optional `int`. Defaults to `0`. If set to be non-zero, the 4590 random number generator is seeded by the given seed. Otherwise it is 4591 seeded by a random seed. 4592 seed2: An optional `int`. Deprecated; use `fractional_max_pool_v2` instead. 4593 name: A name for the operation (optional). 4594 4595 Returns: 4596 A tuple of `Tensor` objects (`output`, `row_pooling_sequence`, 4597 `col_pooling_sequence`). 4598 output: Output `Tensor` after fractional max pooling. Has the same type as 4599 `value`. 4600 row_pooling_sequence: A `Tensor` of type `int64`. 4601 col_pooling_sequence: A `Tensor` of type `int64`. 4602 4603 References: 4604 Fractional Max-Pooling: 4605 [Graham, 2015](https://arxiv.org/abs/1412.6071) 4606 ([pdf](https://arxiv.org/pdf/1412.6071.pdf)) 4607 """ 4608 return gen_nn_ops.fractional_max_pool(value, pooling_ratio, pseudo_random, 4609 overlapping, deterministic, seed, seed2, 4610 name) 4611 4612 4613@tf_export("nn.fractional_max_pool", v1=[]) 4614def fractional_max_pool_v2(value, 4615 pooling_ratio, 4616 pseudo_random=False, 4617 overlapping=False, 4618 seed=0, 4619 name=None): # pylint: disable=redefined-builtin 4620 r"""Performs fractional max pooling on the input. 4621 4622 Fractional max pooling is slightly different than regular max pooling. In 4623 regular max pooling, you downsize an input set by taking the maximum value of 4624 smaller N x N subsections of the set (often 2x2), and try to reduce the set by 4625 a factor of N, where N is an integer. Fractional max pooling, as you might 4626 expect from the word "fractional", means that the overall reduction ratio N 4627 does not have to be an integer. 4628 4629 The sizes of the pooling regions are generated randomly but are fairly 4630 uniform. For example, let's look at the height dimension, and the constraints 4631 on the list of rows that will be pool boundaries. 4632 4633 First we define the following: 4634 4635 1. input_row_length : the number of rows from the input set 4636 2. output_row_length : which will be smaller than the input 4637 3. alpha = input_row_length / output_row_length : our reduction ratio 4638 4. K = floor(alpha) 4639 5. row_pooling_sequence : this is the result list of pool boundary rows 4640 4641 Then, row_pooling_sequence should satisfy: 4642 4643 1. a[0] = 0 : the first value of the sequence is 0 4644 2. a[end] = input_row_length : the last value of the sequence is the size 4645 3. K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size 4646 4. length(row_pooling_sequence) = output_row_length+1 4647 4648 Args: 4649 value: A `Tensor`. 4-D with shape `[batch, height, width, channels]`. 4650 pooling_ratio: An int or list of `ints` that has length `1`, `2` or `4`. 4651 Pooling ratio for each dimension of `value`, currently only supports row 4652 and col dimension and should be >= 1.0. For example, a valid pooling ratio 4653 looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements must be 1.0 4654 because we don't allow pooling on batch and channels dimensions. 1.44 and 4655 1.73 are pooling ratio on height and width dimensions respectively. 4656 pseudo_random: An optional `bool`. Defaults to `False`. When set to `True`, 4657 generates the pooling sequence in a pseudorandom fashion, otherwise, in a 4658 random fashion. Check paper (Graham, 2015) for difference between 4659 pseudorandom and random. 4660 overlapping: An optional `bool`. Defaults to `False`. When set to `True`, 4661 it means when pooling, the values at the boundary of adjacent pooling 4662 cells are used by both cells. For example: 4663 `index 0 1 2 3 4` 4664 `value 20 5 16 3 7` 4665 If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used 4666 twice. The result would be [20, 16] for fractional max pooling. 4667 seed: An optional `int`. Defaults to `0`. If set to be non-zero, the 4668 random number generator is seeded by the given seed. Otherwise it is 4669 seeded by a random seed. 4670 name: A name for the operation (optional). 4671 4672 Returns: 4673 A tuple of `Tensor` objects (`output`, `row_pooling_sequence`, 4674 `col_pooling_sequence`). 4675 output: Output `Tensor` after fractional max pooling. Has the same type as 4676 `value`. 4677 row_pooling_sequence: A `Tensor` of type `int64`. 4678 col_pooling_sequence: A `Tensor` of type `int64`. 4679 4680 References: 4681 Fractional Max-Pooling: 4682 [Graham, 2015](https://arxiv.org/abs/1412.6071) 4683 ([pdf](https://arxiv.org/pdf/1412.6071.pdf)) 4684 """ 4685 pooling_ratio = _get_sequence(pooling_ratio, 2, 3, "pooling_ratio") 4686 4687 if seed == 0: 4688 return gen_nn_ops.fractional_max_pool(value, pooling_ratio, pseudo_random, 4689 overlapping, deterministic=False, 4690 seed=0, seed2=0, name=name) 4691 else: 4692 seed1, seed2 = random_seed.get_seed(seed) 4693 return gen_nn_ops.fractional_max_pool(value, pooling_ratio, pseudo_random, 4694 overlapping, deterministic=True, 4695 seed=seed1, seed2=seed2, name=name) 4696 4697 4698@tf_export(v1=["nn.fractional_avg_pool"]) 4699@deprecation.deprecated(date=None, instructions="`seed2` and `deterministic` " 4700 "args are deprecated. Use fractional_avg_pool_v2.") 4701def fractional_avg_pool(value, 4702 pooling_ratio, 4703 pseudo_random=False, 4704 overlapping=False, 4705 deterministic=False, 4706 seed=0, 4707 seed2=0, 4708 name=None): # pylint: disable=redefined-builtin 4709 r"""Performs fractional average pooling on the input. 4710 4711 This is a deprecated version of `fractional_avg_pool`. 4712 4713 Fractional average pooling is similar to Fractional max pooling in the pooling 4714 region generation step. The only difference is that after pooling regions are 4715 generated, a mean operation is performed instead of a max operation in each 4716 pooling region. 4717 4718 Args: 4719 value: A `Tensor`. 4-D with shape `[batch, height, width, channels]`. 4720 pooling_ratio: A list of `floats` that has length >= 4. Pooling ratio for 4721 each dimension of `value`, currently only supports row and col dimension 4722 and should be >= 1.0. For example, a valid pooling ratio looks like [1.0, 4723 1.44, 1.73, 1.0]. The first and last elements must be 1.0 because we don't 4724 allow pooling on batch and channels dimensions. 1.44 and 1.73 are pooling 4725 ratio on height and width dimensions respectively. 4726 pseudo_random: An optional `bool`. Defaults to `False`. When set to `True`, 4727 generates the pooling sequence in a pseudorandom fashion, otherwise, in a 4728 random fashion. Check paper (Graham, 2015) for difference between 4729 pseudorandom and random. 4730 overlapping: An optional `bool`. Defaults to `False`. When set to `True`, 4731 it means when pooling, the values at the boundary of adjacent pooling 4732 cells are used by both cells. For example: 4733 `index 0 1 2 3 4` 4734 `value 20 5 16 3 7` 4735 If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used 4736 twice. The result would be [20, 16] for fractional avg pooling. 4737 deterministic: An optional `bool`. Deprecated; use `fractional_avg_pool_v2` 4738 instead. 4739 seed: An optional `int`. Defaults to `0`. If set to be non-zero, the 4740 random number generator is seeded by the given seed. Otherwise it is 4741 seeded by a random seed. 4742 seed2: An optional `int`. Deprecated; use `fractional_avg_pool_v2` instead. 4743 name: A name for the operation (optional). 4744 4745 Returns: 4746 A tuple of `Tensor` objects (`output`, `row_pooling_sequence`, 4747 `col_pooling_sequence`). 4748 output: Output `Tensor` after fractional avg pooling. Has the same type as 4749 `value`. 4750 row_pooling_sequence: A `Tensor` of type `int64`. 4751 col_pooling_sequence: A `Tensor` of type `int64`. 4752 4753 References: 4754 Fractional Max-Pooling: 4755 [Graham, 2015](https://arxiv.org/abs/1412.6071) 4756 ([pdf](https://arxiv.org/pdf/1412.6071.pdf)) 4757 """ 4758 return gen_nn_ops.fractional_avg_pool(value, pooling_ratio, pseudo_random, 4759 overlapping, deterministic, seed, seed2, 4760 name=name) 4761 4762 4763@tf_export("nn.fractional_avg_pool", v1=[]) 4764def fractional_avg_pool_v2(value, 4765 pooling_ratio, 4766 pseudo_random=False, 4767 overlapping=False, 4768 seed=0, 4769 name=None): # pylint: disable=redefined-builtin 4770 r"""Performs fractional average pooling on the input. 4771 4772 Fractional average pooling is similar to Fractional max pooling in the pooling 4773 region generation step. The only difference is that after pooling regions are 4774 generated, a mean operation is performed instead of a max operation in each 4775 pooling region. 4776 4777 Args: 4778 value: A `Tensor`. 4-D with shape `[batch, height, width, channels]`. 4779 pooling_ratio: A list of `floats` that has length >= 4. Pooling ratio for 4780 each dimension of `value`, currently only supports row and col dimension 4781 and should be >= 1.0. For example, a valid pooling ratio looks like [1.0, 4782 1.44, 1.73, 1.0]. The first and last elements must be 1.0 because we don't 4783 allow pooling on batch and channels dimensions. 1.44 and 1.73 are pooling 4784 ratio on height and width dimensions respectively. 4785 pseudo_random: An optional `bool`. Defaults to `False`. When set to `True`, 4786 generates the pooling sequence in a pseudorandom fashion, otherwise, in a 4787 random fashion. Check paper (Graham, 2015) for difference between 4788 pseudorandom and random. 4789 overlapping: An optional `bool`. Defaults to `False`. When set to `True`, 4790 it means when pooling, the values at the boundary of adjacent pooling 4791 cells are used by both cells. For example: 4792 `index 0 1 2 3 4` 4793 `value 20 5 16 3 7` 4794 If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used 4795 twice. The result would be [20, 16] for fractional avg pooling. 4796 seed: An optional `int`. Defaults to `0`. If set to be non-zero, the 4797 random number generator is seeded by the given seed. Otherwise it is 4798 seeded by a random seed. 4799 name: A name for the operation (optional). 4800 4801 Returns: 4802 A tuple of `Tensor` objects (`output`, `row_pooling_sequence`, 4803 `col_pooling_sequence`). 4804 output: Output `Tensor` after fractional avg pooling. Has the same type as 4805 `value`. 4806 row_pooling_sequence: A `Tensor` of type `int64`. 4807 col_pooling_sequence: A `Tensor` of type `int64`. 4808 4809 References: 4810 Fractional Max-Pooling: 4811 [Graham, 2015](https://arxiv.org/abs/1412.6071) 4812 ([pdf](https://arxiv.org/pdf/1412.6071.pdf)) 4813 """ 4814 if seed == 0: 4815 return gen_nn_ops.fractional_avg_pool(value, pooling_ratio, pseudo_random, 4816 overlapping, deterministic=False, 4817 seed=0, seed2=0, name=name) 4818 else: 4819 seed1, seed2 = random_seed.get_seed(seed) 4820 return gen_nn_ops.fractional_avg_pool(value, pooling_ratio, pseudo_random, 4821 overlapping, deterministic=True, 4822 seed=seed1, seed2=seed2, name=name) 4823 4824 4825@ops.RegisterStatistics("Dilation2D", "flops") 4826def _calc_dilation2d_flops(graph, node): 4827 """Calculates the compute resources needed for Dilation2D.""" 4828 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 4829 input_shape.assert_is_fully_defined() 4830 filter_shape = graph_util.tensor_shape_from_node_def_name( 4831 graph, node.input[1]) 4832 filter_shape.assert_is_fully_defined() 4833 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 4834 output_shape.assert_is_fully_defined() 4835 filter_height = int(filter_shape[0]) 4836 filter_width = int(filter_shape[1]) 4837 output_count = np.prod(output_shape.as_list(), dtype=np.int64) 4838 return ops.OpStats("flops", (output_count * filter_height * filter_width * 2)) 4839 4840 4841@tf_export(v1=["nn.erosion2d"]) 4842def erosion2d(value, kernel, strides, rates, padding, name=None): 4843 """Computes the grayscale erosion of 4-D `value` and 3-D `kernel` tensors. 4844 4845 The `value` tensor has shape `[batch, in_height, in_width, depth]` and the 4846 `kernel` tensor has shape `[kernel_height, kernel_width, depth]`, i.e., 4847 each input channel is processed independently of the others with its own 4848 structuring function. The `output` tensor has shape 4849 `[batch, out_height, out_width, depth]`. The spatial dimensions of the 4850 output tensor depend on the `padding` algorithm. We currently only support the 4851 default "NHWC" `data_format`. 4852 4853 In detail, the grayscale morphological 2-D erosion is given by: 4854 4855 output[b, y, x, c] = 4856 min_{dy, dx} value[b, 4857 strides[1] * y - rates[1] * dy, 4858 strides[2] * x - rates[2] * dx, 4859 c] - 4860 kernel[dy, dx, c] 4861 4862 Duality: The erosion of `value` by the `kernel` is equal to the negation of 4863 the dilation of `-value` by the reflected `kernel`. 4864 4865 Args: 4866 value: A `Tensor`. 4-D with shape `[batch, in_height, in_width, depth]`. 4867 kernel: A `Tensor`. Must have the same type as `value`. 4868 3-D with shape `[kernel_height, kernel_width, depth]`. 4869 strides: A list of `ints` that has length `>= 4`. 4870 1-D of length 4. The stride of the sliding window for each dimension of 4871 the input tensor. Must be: `[1, stride_height, stride_width, 1]`. 4872 rates: A list of `ints` that has length `>= 4`. 4873 1-D of length 4. The input stride for atrous morphological dilation. 4874 Must be: `[1, rate_height, rate_width, 1]`. 4875 padding: A `string` from: `"SAME", "VALID"`. 4876 The type of padding algorithm to use. 4877 name: A name for the operation (optional). If not specified "erosion2d" 4878 is used. 4879 4880 Returns: 4881 A `Tensor`. Has the same type as `value`. 4882 4-D with shape `[batch, out_height, out_width, depth]`. 4883 4884 Raises: 4885 ValueError: If the `value` depth does not match `kernel`' shape, or if 4886 padding is other than `'VALID'` or `'SAME'`. 4887 """ 4888 with ops.name_scope(name, "erosion2d", [value, kernel]) as name: 4889 # Reduce erosion to dilation by duality. 4890 return math_ops.negative( 4891 gen_nn_ops.dilation2d( 4892 input=math_ops.negative(value), 4893 filter=array_ops.reverse_v2(kernel, [0, 1]), 4894 strides=strides, 4895 rates=rates, 4896 padding=padding, 4897 name=name)) 4898 4899 4900@tf_export("nn.erosion2d", v1=[]) 4901def erosion2d_v2(value, 4902 filters, 4903 strides, 4904 padding, 4905 data_format, 4906 dilations, 4907 name=None): 4908 """Computes the grayscale erosion of 4-D `value` and 3-D `filters` tensors. 4909 4910 The `value` tensor has shape `[batch, in_height, in_width, depth]` and the 4911 `filters` tensor has shape `[filters_height, filters_width, depth]`, i.e., 4912 each input channel is processed independently of the others with its own 4913 structuring function. The `output` tensor has shape 4914 `[batch, out_height, out_width, depth]`. The spatial dimensions of the 4915 output tensor depend on the `padding` algorithm. We currently only support the 4916 default "NHWC" `data_format`. 4917 4918 In detail, the grayscale morphological 2-D erosion is given by: 4919 4920 output[b, y, x, c] = 4921 min_{dy, dx} value[b, 4922 strides[1] * y - dilations[1] * dy, 4923 strides[2] * x - dilations[2] * dx, 4924 c] - 4925 filters[dy, dx, c] 4926 4927 Duality: The erosion of `value` by the `filters` is equal to the negation of 4928 the dilation of `-value` by the reflected `filters`. 4929 4930 Args: 4931 value: A `Tensor`. 4-D with shape `[batch, in_height, in_width, depth]`. 4932 filters: A `Tensor`. Must have the same type as `value`. 4933 3-D with shape `[filters_height, filters_width, depth]`. 4934 strides: A list of `ints` that has length `>= 4`. 4935 1-D of length 4. The stride of the sliding window for each dimension of 4936 the input tensor. Must be: `[1, stride_height, stride_width, 1]`. 4937 padding: A `string` from: `"SAME", "VALID"`. 4938 The type of padding algorithm to use. 4939 data_format: A `string`, only `"NHWC"` is currently supported. 4940 dilations: A list of `ints` that has length `>= 4`. 4941 1-D of length 4. The input stride for atrous morphological dilation. 4942 Must be: `[1, rate_height, rate_width, 1]`. 4943 name: A name for the operation (optional). If not specified "erosion2d" 4944 is used. 4945 4946 Returns: 4947 A `Tensor`. Has the same type as `value`. 4948 4-D with shape `[batch, out_height, out_width, depth]`. 4949 4950 Raises: 4951 ValueError: If the `value` depth does not match `filters`' shape, or if 4952 padding is other than `'VALID'` or `'SAME'`. 4953 """ 4954 if data_format != "NHWC": 4955 raise ValueError("Data formats other than NHWC are not yet supported") 4956 4957 with ops.name_scope(name, "erosion2d", [value, filters]) as name: 4958 # Reduce erosion to dilation by duality. 4959 return math_ops.negative( 4960 gen_nn_ops.dilation2d( 4961 input=math_ops.negative(value), 4962 filter=array_ops.reverse_v2(filters, [0, 1]), 4963 strides=strides, 4964 rates=dilations, 4965 padding=padding, 4966 name=name)) 4967 4968 4969@tf_export(v1=["math.in_top_k", "nn.in_top_k"]) 4970def in_top_k(predictions, targets, k, name=None): 4971 r"""Says whether the targets are in the top `K` predictions. 4972 4973 This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the 4974 prediction for the target class is finite (not inf, -inf, or nan) and among 4975 the top `k` predictions among all predictions for example `i`. Note that the 4976 behavior of `InTopK` differs from the `TopK` op in its handling of ties; if 4977 multiple classes have the same prediction value and straddle the top-`k` 4978 boundary, all of those classes are considered to be in the top `k`. 4979 4980 More formally, let 4981 4982 \\(predictions_i\\) be the predictions for all classes for example `i`, 4983 \\(targets_i\\) be the target class for example `i`, 4984 \\(out_i\\) be the output for example `i`, 4985 4986 $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ 4987 4988 Args: 4989 predictions: A `Tensor` of type `float32`. 4990 A `batch_size` x `classes` tensor. 4991 targets: A `Tensor`. Must be one of the following types: `int32`, `int64`. 4992 A `batch_size` vector of class ids. 4993 k: An `int`. Number of top elements to look at for computing precision. 4994 name: A name for the operation (optional). 4995 4996 Returns: 4997 A `Tensor` of type `bool`. Computed Precision at `k` as a `bool Tensor`. 4998 """ 4999 with ops.name_scope(name, "in_top_k"): 5000 return gen_nn_ops.in_top_kv2(predictions, targets, k, name=name) 5001 5002 5003@tf_export("math.in_top_k", "nn.in_top_k", v1=[]) 5004def in_top_k_v2(targets, predictions, k, name=None): 5005 return in_top_k(predictions, targets, k, name) 5006 5007 5008in_top_k_v2.__doc__ = in_top_k.__doc__ 5009 5010 5011tf_export(v1=["nn.quantized_avg_pool"])(gen_nn_ops.quantized_avg_pool) 5012tf_export(v1=["nn.quantized_conv2d"])(gen_nn_ops.quantized_conv2d) 5013tf_export(v1=["nn.quantized_relu_x"])(gen_nn_ops.quantized_relu_x) 5014tf_export(v1=["nn.quantized_max_pool"])(gen_nn_ops.quantized_max_pool) 5015