• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_
17 #define TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_
18 
19 #include <array>
20 
21 #include "tensorflow/core/platform/status.h"
22 #include "tensorflow/core/util/padding.h"
23 
24 namespace tensorflow {
25 // GetWindowedOutputSize(): Given an input tensor, kernel, stride and padding
26 // type, the function computes the output and padding dimensions.
27 //
28 // For example, ignoring batches or multiple features, a 1D convolution
29 // takes as input a 1D tensor of shape (H), and convolves it with a filter of
30 // shape (K).
31 //
32 // It also takes in a few additional parameters:
33 //
34 // Stride (S): the stride with which we apply the filters. This is the offset
35 // between locations where we apply the filters. A larger stride
36 // means that the output will be spatially smaller.
37 //
38 // Padding (P): the padding we apply to the input tensor along each
39 // dimension. This is usually used to make sure that the spatial dimensions
40 // do not shrink when we progress with convolutions. This function supports two
41 // types of padding.
42 //   SAME: the pad value is computed so that the output will have size H/S.
43 //   VALID: no padding is carried out.
44 // If you want to use EXPLICIT padding, GetWindowedOutputSizeVerbose must be
45 // called instead. Note the padded area is zero-filled.
46 //
47 // The output dimensions for convolution and many other operations, when given
48 // all the parameters above, are as follows:
49 // - When Padding = SAME: the output size is (H'), where
50 //     H' = ceil(float(H) / float(S))
51 //   where ceil is the ceiling function. The number of padded cells
52 //   is computed as:
53 //     Pc = ((H' - 1) * S + K - H) / 2
54 //   When the stride is 1, the expression simplifies to
55 //     H' = H, Pc = (K-1)/2.
56 //   This is where SAME comes from - the output has the same size as the input
57 //   has.
58 //
59 // - When Padding = VALID: the output size is computed as
60 //     H' = ceil(float(H - K + 1) / float(S))
61 //   and the number of padded cells is always zero.
62 //   When the stride is 1, the expression simplifies to
63 //     H' = H-K+1.
64 //
65 // For convolution, mathematically, the output value at location (r')
66 // is the inner product of two vectors: the chunk of input at
67 //    ((r'*S-Pr) : (r'*S-Pr+K)),
68 // and the filter.
69 //
70 // For 2D and 3D convolutions, the spatial dimensions are orthogonal, so the
71 // size and padding of each spatial dimension can be computed by calling
72 // GetWindowedOutputSize separately for each dimension.
73 //
74 Status GetWindowedOutputSize(int64 input_size, int64 filter_size, int64 stride,
75                              Padding padding_type, int64* output_size,
76                              int64* padding_size);
77 
78 // The V2 version computes the same outputs with arbitrary dilation_rate.
79 // The output dimensions are computed as follows:
80 // - When adding dilation_rate (D), we compute an effective filter size (K'):
81 //     K' = (K - 1) * D + 1
82 // - When Padding = SAME: the output size is (H'), where
83 //     H' = ceil(float(H) / float(S))
84 //   where ceil is the ceiling function. The number of padded cells
85 //   is computed as:
86 //     Pc = ((H' - 1) * S + K' - H) / 2
87 //   When the stride is 1, the expression simplifies to
88 //     H' = H, Pc = (K'-1)/2.
89 //   This is where SAME comes from - the output has the same size as the input
90 //   has.
91 //
92 // - When Padding = VALID: the output size is computed as
93 //     H' = ceil(float(H - K' + 1) / float(S))
94 //   and the number of padded cells is always zero.
95 //   When the stride is 1, the expression simplifies to
96 //     H' = H-K'+1.
97 //
98 // If you want to use EXPLICIT padding, GetWindowedOutputSizeVerboseV2 must be
99 // called instead
100 //
101 // TODO(b/67112639): Merge V2 versions and the original versions eventually.
102 Status GetWindowedOutputSizeV2(int64 input_size, int64 filter_size,
103                                int64 dilation_rate, int64 stride,
104                                Padding padding_type, int64* output_size,
105                                int64* padding_size);
106 
107 // Returns the same output dimensions as in GetWindowedOutputSize, but returns
108 // verbose padding dimensions (before/after), and EXPLICIT padding is supported.
109 // When padding_type is EXPLICIT, *padding_before and *padding_after must
110 // already point to initialized integers with the padding amounts. Otherwise,
111 // *padding_before and *padding_after are set by this function, and any
112 // excess padding (caused by an odd padding size value) is added to the
113 // 'padding_after' dimension.
114 Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size,
115                                     int64 stride, Padding padding_type,
116                                     int64* output_size, int64* padding_before,
117                                     int64* padding_after);
118 
119 // The V2 version computes the same outputs with arbitrary dilation_rate. For
120 // detailed equations, refer to the comments for GetWindowedOutputSizeV2().
121 Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size,
122                                       int64 dilation_rate, int64 stride,
123                                       Padding padding_type, int64* output_size,
124                                       int64* padding_before,
125                                       int64* padding_after);
126 
127 // Given an input tensor, kernel, stride and padding type, populates the 3D size
128 // of the output tensor and padding to be applied to the input tensor at the
129 // lower end of every dimension. Use for 3D convolutions, where the input data
130 // is padded with zeros, as well as for 3D avg/max pooling, where the input data
131 // is padded with invalid values that are not considered for pooling. EXPLICIT
132 // padding is not supported.
133 Status Get3dOutputSize(const std::array<int64, 3>& input,
134                        const std::array<int64, 3>& window,
135                        const std::array<int64, 3>& strides,
136                        Padding padding_type, std::array<int64, 3>* output_ptr,
137                        std::array<int64, 3>* padding_ptr);
138 
139 // The V2 version computes the same outputs with arbitrary dilation_rate. For
140 // detailed equations, refer to the comments for GetWindowedOutputSizeV2().
141 Status Get3dOutputSizeV2(const std::array<int64, 3>& input,
142                          const std::array<int64, 3>& window,
143                          const std::array<int64, 3>& dilations,
144                          const std::array<int64, 3>& strides,
145                          Padding padding_type, std::array<int64, 3>* output_ptr,
146                          std::array<int64, 3>* padding_ptr);
147 
148 }  // namespace tensorflow
149 #endif  // TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_
150