1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_KERNELS_CONV_2D_H_
17 #define TENSORFLOW_CORE_KERNELS_CONV_2D_H_
18
19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 #include "tensorflow/core/framework/tensor_types.h"
21 #include "tensorflow/core/kernels/eigen_backward_spatial_convolutions.h"
22 #include "tensorflow/core/kernels/eigen_spatial_convolutions.h"
23 #include "tensorflow/core/util/tensor_format.h"
24
25 namespace tensorflow {
26 namespace functor {
27
28 // TODO(yangke): revisit these operations and in particular, see if we can
29 // combine all of them into just one operation without causing nvcc to
30 // timeout.
31 template <typename Device, typename T, int Dims, typename IndexType>
32 struct ShuffleAndReverse {
operatorShuffleAndReverse33 void operator()(const Device& d,
34 typename TTypes<T, Dims, IndexType>::ConstTensor input,
35 const Eigen::DSizes<IndexType, Dims>& order,
36 const Eigen::array<bool, Dims>& reverse_dims,
37 typename TTypes<T, Dims, IndexType>::Tensor output) {
38 output.device(d) = input.shuffle(order).reverse(reverse_dims);
39 }
40 };
41
42 template <typename Device, typename T, int Dims, typename IndexType>
43 struct InflatePadAndShuffle {
operatorInflatePadAndShuffle44 void operator()(
45 const Device& d, typename TTypes<T, Dims, IndexType>::ConstTensor input,
46 const Eigen::DSizes<IndexType, Dims>& strides,
47 const Eigen::array<Eigen::IndexPair<IndexType>, Dims>& pad_dims,
48 const Eigen::DSizes<IndexType, Dims>& order,
49 typename TTypes<T, Dims, IndexType>::Tensor output) {
50 output.device(d) = input.inflate(strides).pad(pad_dims).shuffle(order);
51 }
52 };
53
54 template <typename Device, typename Input, typename Filter, typename Output,
55 typename OutputKernel>
SpatialConvolutionFunc(const Device & d,Output output,Input input,Filter filter,int row_stride,int col_stride,int row_dilation,int col_dilation,const Eigen::PaddingType & padding,const OutputKernel & output_kernel)56 void SpatialConvolutionFunc(const Device& d, Output output, Input input,
57 Filter filter, int row_stride, int col_stride,
58 int row_dilation, int col_dilation,
59 const Eigen::PaddingType& padding,
60 const OutputKernel& output_kernel) {
61 // Need to swap row/col when calling Eigen.
62 output.device(d) =
63 Eigen::SpatialConvolution(input, filter, col_stride, row_stride, padding,
64 col_dilation, row_dilation, output_kernel);
65 }
66
67 template <typename Device, typename T,
68 typename OutputKernel = const Eigen::NoOpOutputKernel>
69 struct SpatialConvolution {
operatorSpatialConvolution70 void operator()(const Device& d, typename TTypes<T, 4>::Tensor output,
71 typename TTypes<T, 4>::ConstTensor input,
72 typename TTypes<T, 4>::ConstTensor filter, int row_stride,
73 int col_stride, int row_dilation, int col_dilation,
74 const Eigen::PaddingType& padding,
75 const OutputKernel& output_kernel = OutputKernel()) {
76 SpatialConvolutionFunc(d, output, input, filter, row_stride, col_stride,
77 row_dilation, col_dilation, padding, output_kernel);
78 }
79 };
80
81 template <typename Device, typename OutputKernel>
82 struct SpatialConvolution<Device, Eigen::half, OutputKernel> {
83 void operator()(const Device& d,
84 typename TTypes<Eigen::half, 4>::Tensor output,
85 typename TTypes<Eigen::half, 4>::ConstTensor input,
86 typename TTypes<Eigen::half, 4>::ConstTensor filter,
87 int row_stride, int col_stride, int row_dilation,
88 int col_dilation, const Eigen::PaddingType& padding,
89 const OutputKernel& output_kernel = OutputKernel()) {
90 output.device(d) =
91 Eigen::SpatialConvolution(input.cast<float>(), filter.cast<float>(),
92 col_stride, row_stride, padding, col_dilation,
93 row_dilation, output_kernel)
94 .template cast<Eigen::half>();
95 }
96 };
97
98 template <typename Device, typename T>
99 struct SpatialConvolutionBackwardInput {
100 void operator()(const Device& d, typename TTypes<T, 4>::Tensor input_backward,
101 typename TTypes<T, 4>::ConstTensor kernel,
102 typename TTypes<T, 4>::ConstTensor output_backward,
103 int row_stride, int col_stride, int row_dilation,
104 int col_dilation) {
105 // Need to swap row/col when calling Eigen.
106 input_backward.device(d) = Eigen::SpatialConvolutionBackwardInput(
107 kernel, output_backward, input_backward.dimension(2),
108 input_backward.dimension(1), col_stride, row_stride, col_dilation,
109 row_dilation);
110 }
111 };
112
113 template <typename Device, typename T>
114 struct SpatialConvolutionBackwardFilter {
115 void operator()(const Device& d,
116 typename TTypes<T, 4>::Tensor kernel_backward,
117 typename TTypes<T, 4>::ConstTensor input,
118 typename TTypes<T, 4>::ConstTensor output_backward,
119 int row_stride, int col_stride, int row_dilation,
120 int col_dilation) {
121 // Need to swap row/col when calling Eigen.
122 kernel_backward.device(d) = Eigen::SpatialConvolutionBackwardKernel(
123 input, output_backward, kernel_backward.dimension(1),
124 kernel_backward.dimension(0), col_stride, row_stride, col_dilation,
125 row_dilation);
126 }
127 };
128
129 // TODO(vrv): Figure out how to use the MatMulFunctor in matmul_op.h.
130 // My initial attempt to do this compiled but failed in the pytest
131 // due to a swigdeps error.
132 template <typename Device, typename T,
133 typename OutputKernel = const Eigen::NoOpOutputKernel>
134 struct MatMulConvFunctor {
135 // Computes on device "d": out = in0 * in1, where * is matrix
136 // multiplication.
137 void operator()(
138 const Device& d, typename TTypes<T, 2>::Tensor out,
139 typename TTypes<T, 2>::ConstTensor in0,
140 typename TTypes<T, 2>::ConstTensor in1,
141 const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1>& dim_pair,
142 const OutputKernel& output_kernel = OutputKernel()) {
143 out.device(d) = in0.contract(in1, dim_pair, output_kernel);
144 }
145 };
146
147 // Shuffles a filter tensor from TensorFlow format HWIO to dst_filter_format.
148 //
149 // Note: Currently OIHW is the only supported destination format. Support for
150 // OHWI format will be added in a follow-up change.
151 template <typename Device, typename T, typename IndexType, int NDIMS>
152 struct TransformFilter {
153 void operator()(const Device& d, FilterTensorFormat dst_filter_format,
154 typename TTypes<T, NDIMS, IndexType>::ConstTensor in,
155 typename TTypes<T, NDIMS, IndexType>::Tensor out) {
156 // Merge the spatial dimensions together to speed up the shuffle operation.
157 Eigen::DSizes<IndexType, 3> merged_dims;
158 merged_dims[0] = in.dimension(0); // spatial dimensions
159 for (int i = 1; i < NDIMS - 2; ++i) {
160 merged_dims[0] *= in.dimension(i);
161 }
162 merged_dims[1] = in.dimension(NDIMS - 2); // input filters
163 merged_dims[2] = in.dimension(NDIMS - 1); // output filters
164
165 DCHECK(dst_filter_format == FORMAT_OIHW)
166 << "Unsupported destination filter format: "
167 << ToString(dst_filter_format);
168 // Source filter format is FORMAT_HWIO and spatial dimensions HW are merged
169 // in the beginning.
170 Eigen::DSizes<IndexType, 3> shuffling_perm =
171 Eigen::DSizes<IndexType, 3>(2, 1, 0);
172
173 Eigen::DSizes<IndexType, NDIMS> expanded_dims;
174 int out_index = 0;
175 for (int merged_dim = 0; merged_dim < merged_dims.rank(); ++merged_dim) {
176 if (shuffling_perm[merged_dim] == 0) {
177 for (int spatial_dim = 0; spatial_dim < NDIMS - 2; ++spatial_dim) {
178 expanded_dims[out_index++] = in.dimension(spatial_dim);
179 }
180 } else {
181 constexpr int kLastSpatialDim = NDIMS - 3;
182 expanded_dims[out_index++] =
183 in.dimension(kLastSpatialDim + shuffling_perm[merged_dim]);
184 }
185 }
186
187 out.device(d) =
188 in.reshape(merged_dims).shuffle(shuffling_perm).reshape(expanded_dims);
189 }
190 };
191
192 template <typename Device, typename T, typename IndexType>
193 struct TransformDepth {
194 void operator()(const Device& d,
195 typename TTypes<T, 4, IndexType>::ConstTensor in,
196 const Eigen::DSizes<IndexType, 4>& shuffle,
197 typename TTypes<T, 4, IndexType>::Tensor out) {
198 Eigen::DSizes<IndexType, 3> merged_dims;
199 Eigen::DSizes<IndexType, 4> expanded_dims;
200 Eigen::DSizes<IndexType, 3> new_shuffle;
201
202 // Merge dimensions that won't be shuffled together to speed things up.
203 if (shuffle[1] == 2 && shuffle[2] == 3) {
204 merged_dims[0] = in.dimension(0);
205 merged_dims[1] = in.dimension(1);
206 merged_dims[2] = in.dimension(2) * in.dimension(3);
207 new_shuffle[0] = shuffle[0];
208 new_shuffle[1] = 2;
209 new_shuffle[2] = shuffle[3];
210 expanded_dims[0] = in.dimension(shuffle[0]);
211 expanded_dims[1] = in.dimension(2);
212 expanded_dims[2] = in.dimension(3);
213 expanded_dims[3] = in.dimension(shuffle[3]);
214 } else if (shuffle[0] == 2 && shuffle[1] == 3) {
215 merged_dims[0] = in.dimension(0);
216 merged_dims[1] = in.dimension(1);
217 merged_dims[2] = in.dimension(2) * in.dimension(3);
218 new_shuffle[0] = 2;
219 new_shuffle[1] = shuffle[2];
220 new_shuffle[2] = shuffle[3];
221 expanded_dims[0] = in.dimension(2);
222 expanded_dims[1] = in.dimension(3);
223 expanded_dims[2] = in.dimension(shuffle[2]);
224 expanded_dims[3] = in.dimension(shuffle[3]);
225 } else if (shuffle[0] == 0 && shuffle[1] == 3 && shuffle[2] == 1 &&
226 shuffle[3] == 2) {
227 merged_dims[0] = in.dimension(0);
228 merged_dims[1] = in.dimension(1) * in.dimension(2);
229 merged_dims[2] = in.dimension(3);
230 new_shuffle[0] = 0;
231 new_shuffle[1] = 2;
232 new_shuffle[2] = 1;
233 expanded_dims[0] = in.dimension(0);
234 expanded_dims[1] = in.dimension(3);
235 expanded_dims[2] = in.dimension(1);
236 expanded_dims[3] = in.dimension(2);
237 } else {
238 assert(false && "unexpected shuffle");
239 }
240
241 out.device(d) =
242 in.reshape(merged_dims).shuffle(new_shuffle).reshape(expanded_dims);
243 }
244 };
245
246 template <typename Device, typename T, typename IndexType, int NDIMS>
247 struct PadInput {
248 void operator()(const Device& d,
249 typename TTypes<T, NDIMS, IndexType>::ConstTensor in,
250 const std::array<int, NDIMS - 2>& padding_left,
251 const std::array<int, NDIMS - 2>& padding_right,
252 typename TTypes<T, NDIMS, IndexType>::Tensor out,
253 TensorFormat format) {
254 Eigen::array<Eigen::IndexPair<IndexType>, NDIMS> padding;
255 padding[GetTensorDimIndex<NDIMS - 2>(format, 'N')] = {0, 0};
256 for (int i = 0; i < NDIMS - 2; ++i) {
257 padding[GetTensorDimIndex<NDIMS - 2>(format, '0' + i)] = {
258 padding_left[i], padding_right[i]};
259 }
260 padding[GetTensorDimIndex<NDIMS - 2>(format, 'C')] = {0, 0};
261 out.device(d) = in.pad(padding);
262 }
263 };
264
265 // Converts a tensor from:
266 // [batch, <spatial>, filters]
267 // to:
268 // [batch, filters, <spatial>]
269 template <typename Device, typename T, int NDIMS>
270 struct NHWCToNCHW {
271 void operator()(const Device& d, typename TTypes<T, NDIMS>::ConstTensor in,
272 typename TTypes<T, NDIMS>::Tensor out);
273 };
274
275 // Converts a tensor from:
276 // [batch, filters, <spatial>]
277 // to:
278 // [batch, <spatial>, filters]
279 template <typename Device, typename T, int NDIMS>
280 struct NCHWToNHWC {
281 void operator()(const Device& d, typename TTypes<T, NDIMS>::ConstTensor in,
282 typename TTypes<T, NDIMS>::Tensor out);
283 };
284
285 // Converts a tensor from:
286 // [dim0, dim1, dim2]
287 // to:
288 // [dim0, dim2, dim1]
289 template <typename Device, typename T, bool conjugate = false>
290 struct SwapDimension1And2InTensor3 {
291 void operator()(const Device& d, const T* in,
292 const gtl::ArraySlice<int64>& input_dims, T* out);
293 };
294
295 // Converts a tensor from:
296 // [dim0, dim1, dim2]
297 // to:
298 // [dim2, dim1, dim0]
299 template <typename Device, typename T, bool conjugate = false>
300 struct SwapDimension0And2InTensor3 {
301 void operator()(const Device& d, const T* in,
302 const gtl::ArraySlice<int64>& input_dims, T* out);
303 };
304
305 // Transforms back filter from OIHW to HWOI format to reverse effect of
306 // TransformFilter above.
307 // TODO(hinsu): Support reverse transformation from filter format OHWI as well.
308 template <typename Device, typename T, int NDIMS>
309 struct ReverseTransformFilter {
310 void operator()(const Device& d, typename TTypes<T, NDIMS>::ConstTensor in,
311 typename TTypes<T, NDIMS>::Tensor out);
312 };
313
314 } // namespace functor
315
316 template <class T>
317 class ConvAlgorithmMap;
318
319 template <>
320 class ConvAlgorithmMap<Eigen::ThreadPoolDevice> {};
321 } // namespace tensorflow
322
323 #endif // TENSORFLOW_CORE_KERNELS_CONV_2D_H_
324