1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_KERNELS_CONV_2D_H_ 17 #define TENSORFLOW_CORE_KERNELS_CONV_2D_H_ 18 19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 20 #include "tensorflow/core/framework/tensor_types.h" 21 #include "tensorflow/core/kernels/eigen_backward_spatial_convolutions.h" 22 #include "tensorflow/core/kernels/eigen_spatial_convolutions.h" 23 #include "tensorflow/core/util/tensor_format.h" 24 25 namespace tensorflow { 26 namespace functor { 27 28 template <typename Device, typename Input, typename Filter, typename Output, 29 typename OutputKernel> 30 void SpatialConvolutionFunc(const Device& d, Output output, Input input, 31 Filter filter, int row_stride, int col_stride, 32 int row_dilation, int col_dilation, 33 const Eigen::PaddingType& padding, 34 const OutputKernel& output_kernel, 35 int padding_top = 0, int padding_bottom = 0, 36 int padding_left = 0, int padding_right = 0) { 37 // Need to swap row/col, padding_top/padding_left, and 38 // padding_bottom/padding_right when calling Eigen. Eigen expects the tensor 39 // in NWHC format, but the tensor given is in NHWC. 40 output.device(d) = Eigen::SpatialConvolution( 41 input, filter, col_stride, row_stride, padding, col_dilation, 42 row_dilation, output_kernel, padding_left, padding_right, padding_top, 43 padding_bottom); 44 } 45 46 template <typename Device, typename T, 47 typename OutputKernel = const Eigen::NoOpOutputKernel> 48 struct SpatialConvolution { operatorSpatialConvolution49 void operator()(const Device& d, typename TTypes<T, 4>::Tensor output, 50 typename TTypes<T, 4>::ConstTensor input, 51 typename TTypes<T, 4>::ConstTensor filter, int row_stride, 52 int col_stride, int row_dilation, int col_dilation, 53 const Eigen::PaddingType& padding, 54 const OutputKernel& output_kernel = OutputKernel()) { 55 SpatialConvolutionFunc(d, output, input, filter, row_stride, col_stride, 56 row_dilation, col_dilation, padding, output_kernel); 57 } operatorSpatialConvolution58 void operator()(const Device& d, typename TTypes<T, 4>::Tensor output, 59 typename TTypes<T, 4>::ConstTensor input, 60 typename TTypes<T, 4>::ConstTensor filter, int row_stride, 61 int col_stride, int row_dilation, int col_dilation, 62 int padding_top, int padding_bottom, int padding_left, 63 int padding_right, 64 const OutputKernel& output_kernel = OutputKernel()) { 65 SpatialConvolutionFunc( 66 d, output, input, filter, row_stride, col_stride, row_dilation, 67 col_dilation, Eigen::PaddingType::PADDING_VALID, output_kernel, 68 padding_top, padding_bottom, padding_left, padding_right); 69 } 70 }; 71 72 template <typename Device, typename OutputKernel> 73 struct SpatialConvolution<Device, Eigen::half, OutputKernel> { 74 void operator()(const Device& d, 75 typename TTypes<Eigen::half, 4>::Tensor output, 76 typename TTypes<Eigen::half, 4>::ConstTensor input, 77 typename TTypes<Eigen::half, 4>::ConstTensor filter, 78 int row_stride, int col_stride, int row_dilation, 79 int col_dilation, const Eigen::PaddingType& padding, 80 const OutputKernel& output_kernel = OutputKernel()) { 81 output.device(d) = 82 Eigen::SpatialConvolution(input.cast<float>(), filter.cast<float>(), 83 col_stride, row_stride, padding, col_dilation, 84 row_dilation, output_kernel) 85 .template cast<Eigen::half>(); 86 } 87 void operator()(const Device& d, 88 typename TTypes<Eigen::half, 4>::Tensor output, 89 typename TTypes<Eigen::half, 4>::ConstTensor input, 90 typename TTypes<Eigen::half, 4>::ConstTensor filter, 91 int row_stride, int col_stride, int row_dilation, 92 int col_dilation, int padding_top, int padding_bottom, 93 int padding_left, int padding_right, 94 const OutputKernel& output_kernel = OutputKernel()) { 95 output.device(d) = 96 Eigen::SpatialConvolution( 97 input.cast<float>(), filter.cast<float>(), col_stride, row_stride, 98 Eigen::PaddingType::PADDING_VALID, col_dilation, row_dilation, 99 output_kernel, padding_left, padding_right, padding_top, 100 padding_bottom) 101 .template cast<Eigen::half>(); 102 } 103 }; 104 105 template <typename Device, typename T> 106 struct SpatialConvolutionBackwardInputFunc { 107 void operator()(const Device& d, typename TTypes<T, 4>::Tensor input_backward, 108 typename TTypes<T, 4>::ConstTensor filter, 109 typename TTypes<T, 4>::ConstTensor output_backward, 110 Eigen::DenseIndex col_stride, Eigen::DenseIndex row_stride, 111 Eigen::DenseIndex col_dilation, 112 Eigen::DenseIndex row_dilation) { 113 input_backward.device(d) = Eigen::SpatialConvolutionBackwardInput( 114 filter, output_backward, input_backward.dimension(2), 115 input_backward.dimension(1), col_stride, row_stride, col_dilation, 116 row_dilation); 117 } 118 }; 119 120 // GPU version requires all tensors to be indexable by int32. 121 template <typename T> 122 struct SpatialConvolutionBackwardInputFunc<Eigen::GpuDevice, T> { 123 void operator()(const Eigen::GpuDevice& d, 124 typename TTypes<T, 4>::Tensor input_backward, 125 typename TTypes<T, 4>::ConstTensor filter, 126 typename TTypes<T, 4>::ConstTensor output_backward, 127 Eigen::DenseIndex col_stride, Eigen::DenseIndex row_stride, 128 Eigen::DenseIndex col_dilation, 129 Eigen::DenseIndex row_dilation) { 130 To32Bit(input_backward).device(d) = Eigen::SpatialConvolutionBackwardInput( 131 To32Bit(filter), To32Bit(output_backward), input_backward.dimension(2), 132 input_backward.dimension(1), col_stride, row_stride, col_dilation, 133 row_dilation); 134 } 135 }; 136 137 template <typename Device, typename T> 138 struct SpatialConvolutionBackwardInputWithExplicitPaddingFunc { 139 void operator()(const Device& d, typename TTypes<T, 4>::Tensor input_backward, 140 typename TTypes<T, 4>::ConstTensor filter, 141 typename TTypes<T, 4>::ConstTensor output_backward, 142 Eigen::DenseIndex padded_cols, Eigen::DenseIndex padded_rows, 143 Eigen::DenseIndex col_stride, Eigen::DenseIndex row_stride, 144 Eigen::DenseIndex col_dilation, 145 Eigen::DenseIndex row_dilation, Eigen::DenseIndex pad_left, 146 Eigen::DenseIndex pad_top) { 147 // We have to slice the result of a spatial convolution backward 148 // input, before assigning it to the `input_backward` to remove padding. 149 // 150 // TODO(ezhulenev): Pass explicit paddings to Eigen and do not materialize 151 // intermediate result in memory before slicing. 152 input_backward.device(d) = 153 Eigen::SpatialConvolutionBackwardInput( 154 filter, output_backward, padded_cols, padded_rows, col_stride, 155 row_stride, col_dilation, row_dilation) 156 .eval() 157 .slice(Eigen::DSizes<Eigen::DenseIndex, 4>{0, pad_left, pad_top, 0}, 158 input_backward.dimensions()); 159 } 160 }; 161 162 // GPU version requires all tensors to be indexable by int32. 163 template <typename T> 164 struct SpatialConvolutionBackwardInputWithExplicitPaddingFunc<Eigen::GpuDevice, 165 T> { 166 void operator()(const Eigen::GpuDevice& d, 167 typename TTypes<T, 4>::Tensor input_backward, 168 typename TTypes<T, 4>::ConstTensor filter, 169 typename TTypes<T, 4>::ConstTensor output_backward, 170 Eigen::DenseIndex padded_cols, Eigen::DenseIndex padded_rows, 171 Eigen::DenseIndex col_stride, Eigen::DenseIndex row_stride, 172 Eigen::DenseIndex col_dilation, 173 Eigen::DenseIndex row_dilation, Eigen::DenseIndex pad_left, 174 Eigen::DenseIndex pad_top) { 175 To32Bit(input_backward).device(d) = 176 Eigen::SpatialConvolutionBackwardInput( 177 To32Bit(filter), To32Bit(output_backward), padded_cols, padded_rows, 178 col_stride, row_stride, col_dilation, row_dilation) 179 .eval() 180 .slice(Eigen::DSizes<Eigen::DenseIndex, 4>{0, pad_left, pad_top, 0}, 181 input_backward.dimensions()); 182 } 183 }; 184 185 // TODO(vrv): Figure out how to use the MatMulFunctor in matmul_op.h. 186 // My initial attempt to do this compiled but failed in the pytest 187 // due to a swigdeps error. 188 template <typename Device, typename T, 189 typename OutputKernel = const Eigen::NoOpOutputKernel> 190 struct MatMulConvFunctor { 191 // Computes on device "d": out = in0 * in1, where * is matrix 192 // multiplication. 193 void operator()( 194 const Device& d, typename TTypes<T, 2>::Tensor out, 195 typename TTypes<T, 2>::ConstTensor in0, 196 typename TTypes<T, 2>::ConstTensor in1, 197 const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1>& dim_pair, 198 const OutputKernel& output_kernel = OutputKernel()) { 199 out.device(d) = in0.contract(in1, dim_pair, output_kernel); 200 } 201 }; 202 203 // Shuffles a filter tensor from TensorFlow format HWIO to dst_filter_format. 204 // 205 // Note: Currently supports OIHW and OHWI destination formats. 206 template <typename Device, typename T, typename IndexType, int NDIMS> 207 struct TransformFilter { 208 void operator()(const Device& d, FilterTensorFormat dst_filter_format, 209 typename TTypes<T, NDIMS, IndexType>::ConstTensor in, 210 typename TTypes<T, NDIMS, IndexType>::Tensor out) { 211 // NOTE: Source filter format is always HWIO. 212 Eigen::DSizes<IndexType, NDIMS - 2> spatial_dims; 213 for (int i = 0; i < spatial_dims.rank(); ++i) { 214 spatial_dims[i] = in.dimension(i); 215 } 216 217 // Merge the spatial dimensions together to speed up the shuffle operation. 218 Eigen::DSizes<IndexType, 3> merged_dims; 219 merged_dims[0] = spatial_dims.TotalSize(); // product of spatial dims [H*W] 220 merged_dims[1] = in.dimension(NDIMS - 2); // input filters [I] 221 merged_dims[2] = in.dimension(NDIMS - 1); // output filters [O] 222 223 // Shuffle tensor with merged spatial dimensions. 224 Eigen::DSizes<IndexType, 3> shuffling_perm; 225 // Expand shuffled tensor into final dimensions. 226 Eigen::DSizes<IndexType, NDIMS> expanded_dims; 227 228 if (dst_filter_format == FORMAT_OIHW) { 229 shuffling_perm = Eigen::DSizes<IndexType, 3>(2, 1, 0); 230 231 expanded_dims[0] = merged_dims[2]; // [O] 232 expanded_dims[1] = merged_dims[1]; // [I] 233 for (int i = 0; i < spatial_dims.rank(); ++i) { 234 expanded_dims[2 + i] = spatial_dims[i]; 235 } 236 237 } else if (dst_filter_format == FORMAT_OHWI) { 238 shuffling_perm = Eigen::DSizes<IndexType, 3>(2, 0, 1); 239 240 expanded_dims[0] = merged_dims[2]; // [O] 241 expanded_dims[NDIMS - 1] = merged_dims[1]; // [I] 242 for (int i = 0; i < spatial_dims.rank(); ++i) { 243 expanded_dims[1 + i] = spatial_dims[i]; 244 } 245 246 } else { 247 DCHECK(false) << "Unsupported destination filter format: " 248 << ToString(dst_filter_format); 249 } 250 251 out.device(d) = 252 in.reshape(merged_dims).shuffle(shuffling_perm).reshape(expanded_dims); 253 } 254 }; 255 256 // TODO This functor is not used anywhere and should be removed, 257 // but it defines some eigen templates that are referenced in other kernels. 258 template <typename Device, typename T, typename IndexType> 259 struct TransformDepth { 260 void operator()(const Device& d, 261 typename TTypes<T, 4, IndexType>::ConstTensor in, 262 const Eigen::DSizes<IndexType, 4>& shuffle, 263 typename TTypes<T, 4, IndexType>::Tensor out) { 264 Eigen::DSizes<IndexType, 3> merged_dims; 265 Eigen::DSizes<IndexType, 4> expanded_dims; 266 Eigen::DSizes<IndexType, 3> new_shuffle; 267 268 // Merge dimensions that won't be shuffled together to speed things up. 269 if (shuffle[1] == 2 && shuffle[2] == 3) { 270 merged_dims[0] = in.dimension(0); 271 merged_dims[1] = in.dimension(1); 272 merged_dims[2] = in.dimension(2) * in.dimension(3); 273 new_shuffle[0] = shuffle[0]; 274 new_shuffle[1] = 2; 275 new_shuffle[2] = shuffle[3]; 276 expanded_dims[0] = in.dimension(shuffle[0]); 277 expanded_dims[1] = in.dimension(2); 278 expanded_dims[2] = in.dimension(3); 279 expanded_dims[3] = in.dimension(shuffle[3]); 280 } else if (shuffle[0] == 2 && shuffle[1] == 3) { 281 merged_dims[0] = in.dimension(0); 282 merged_dims[1] = in.dimension(1); 283 merged_dims[2] = in.dimension(2) * in.dimension(3); 284 new_shuffle[0] = 2; 285 new_shuffle[1] = shuffle[2]; 286 new_shuffle[2] = shuffle[3]; 287 expanded_dims[0] = in.dimension(2); 288 expanded_dims[1] = in.dimension(3); 289 expanded_dims[2] = in.dimension(shuffle[2]); 290 expanded_dims[3] = in.dimension(shuffle[3]); 291 } else if (shuffle[0] == 0 && shuffle[1] == 3 && shuffle[2] == 1 && 292 shuffle[3] == 2) { 293 merged_dims[0] = in.dimension(0); 294 merged_dims[1] = in.dimension(1) * in.dimension(2); 295 merged_dims[2] = in.dimension(3); 296 new_shuffle[0] = 0; 297 new_shuffle[1] = 2; 298 new_shuffle[2] = 1; 299 expanded_dims[0] = in.dimension(0); 300 expanded_dims[1] = in.dimension(3); 301 expanded_dims[2] = in.dimension(1); 302 expanded_dims[3] = in.dimension(2); 303 } else { 304 assert(false && "unexpected shuffle"); 305 } 306 307 out.device(d) = 308 in.reshape(merged_dims).shuffle(new_shuffle).reshape(expanded_dims); 309 } 310 }; 311 312 // Note on the use of const reference for the "padding_value" argument 313 // 314 // In the ROCm TF build, 315 // ++ the call(s) to the functor are in the files (conv_*.cc) that are compiled 316 // by the "CPU" compiler, while the 317 // ++ the GPUDevice specific template instantiations are in the files that are 318 // compiled by the "GPU" compiler. 319 // 320 // For T == Eigen::half, the value of the "padding_value" argument (when it was 321 // pass-by-value) was getting corrupted, leading to regressions in the 322 // convolution unit tests. 323 // 324 // I do not understand the exact reason for the this, but based on similar past 325 // issues, it is likely due to a combination of 326 // ++ an ABI incompatibility between the "old" CPU compiler (gcc 5.4 for 327 // Ubuntu 16.04, gcc 7.5 for Ubuntu 18.04) and the "new" ROCm GPU compiler 328 // (hipclang which is based on latest clang), AND 329 // ++ Eigen::half having the same size but different internals on the CPU and 330 // GPU sides (unsigned short on CPU, union {unsigned short, _Float16} on GPU 331 // 332 // Changing the "padding value" argument to be a const reference type seems to 333 // suppress the bug 334 template <typename Device, typename T, typename IndexType, int NDIMS> 335 struct PadInput { 336 void operator()(const Device& d, 337 typename TTypes<T, NDIMS, IndexType>::ConstTensor in, 338 const std::array<int, NDIMS - 2>& padding_left, 339 const std::array<int, NDIMS - 2>& padding_right, 340 typename TTypes<T, NDIMS, IndexType>::Tensor out, 341 TensorFormat format, const T& padding_value) { 342 Eigen::array<Eigen::IndexPair<IndexType>, NDIMS> padding; 343 padding[GetTensorDimIndex<NDIMS - 2>(format, 'N')] = {0, 0}; 344 for (int i = 0; i < NDIMS - 2; ++i) { 345 padding[GetTensorDimIndex<NDIMS - 2>(format, '0' + i)] = { 346 padding_left[i], padding_right[i]}; 347 } 348 padding[GetTensorDimIndex<NDIMS - 2>(format, 'C')] = {0, 0}; 349 out.device(d) = in.pad(padding, padding_value); 350 } 351 }; 352 353 // Converts a tensor from: 354 // [batch, <spatial>, filters] 355 // to: 356 // [batch, filters, <spatial>] 357 template <typename Device, typename T, int NDIMS> 358 struct NHWCToNCHW { 359 void operator()(const Device& d, typename TTypes<T, NDIMS>::ConstTensor in, 360 typename TTypes<T, NDIMS>::Tensor out); 361 }; 362 363 // Converts a tensor from: 364 // [batch, filters, <spatial>] 365 // to: 366 // [batch, <spatial>, filters] 367 template <typename Device, typename T, int NDIMS> 368 struct NCHWToNHWC { 369 void operator()(const Device& d, typename TTypes<T, NDIMS>::ConstTensor in, 370 typename TTypes<T, NDIMS>::Tensor out); 371 }; 372 373 // Converts a tensor from: 374 // [dim0, dim1, dim2] 375 // to: 376 // [dim0, dim2, dim1] 377 template <typename Device, typename T, bool conjugate = false> 378 struct SwapDimension1And2InTensor3 { 379 void operator()(const Device& d, const T* in, 380 const gtl::ArraySlice<int64>& input_dims, T* out); 381 }; 382 383 // Converts a tensor from: 384 // [dim0, dim1, dim2] 385 // to: 386 // [dim2, dim1, dim0] 387 template <typename Device, typename T, bool conjugate = false> 388 struct SwapDimension0And2InTensor3 { 389 void operator()(const Device& d, const T* in, 390 const gtl::ArraySlice<int64>& input_dims, T* out); 391 }; 392 393 // Transforms back filter from OIHW or OHWI to HWOI format to reverse effect of 394 // TransformFilter above. 395 template <typename Device, typename T, int NDIMS> 396 struct ReverseTransformFilter { 397 void operator()(const Device& d, FilterTensorFormat src_filter_format, 398 typename TTypes<T, NDIMS>::ConstTensor in, 399 typename TTypes<T, NDIMS>::Tensor out); 400 }; 401 402 } // namespace functor 403 404 template <class T> 405 class ConvAlgorithmMap; 406 407 template <> 408 class ConvAlgorithmMap<Eigen::ThreadPoolDevice> {}; 409 } // namespace tensorflow 410 411 #endif // TENSORFLOW_CORE_KERNELS_CONV_2D_H_ 412