1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/nn_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #include "tensorflow/core/kernels/avgpooling_op.h" 21 22 #include <vector> 23 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 24 #include "tensorflow/core/framework/numeric_op.h" 25 #include "tensorflow/core/framework/op_kernel.h" 26 #include "tensorflow/core/framework/register_types.h" 27 #include "tensorflow/core/framework/tensor.h" 28 #include "tensorflow/core/framework/tensor_shape.h" 29 #include "tensorflow/core/framework/tensor_slice.h" 30 #include "tensorflow/core/kernels/eigen_pooling.h" 31 #include "tensorflow/core/kernels/ops_util.h" 32 #include "tensorflow/core/kernels/pooling_ops_common.h" 33 #include "tensorflow/core/lib/core/errors.h" 34 #include "tensorflow/core/lib/gtl/array_slice.h" 35 #include "tensorflow/core/platform/logging.h" 36 #include "tensorflow/core/util/padding.h" 37 #include "tensorflow/core/util/tensor_format.h" 38 39 #if GOOGLE_CUDA 40 #include "tensorflow/core/kernels/maxpooling_op_gpu.h" 41 #include "tensorflow/core/kernels/pooling_ops_common_gpu.h" 42 #endif // GOOGLE_CUDA 43 44 namespace tensorflow { 45 46 typedef Eigen::ThreadPoolDevice CPUDevice; 47 typedef Eigen::GpuDevice GPUDevice; 48 49 template <typename Device, typename T> 50 class AvgPoolingOp : public UnaryOp<T> { 51 public: AvgPoolingOp(OpKernelConstruction * context)52 explicit AvgPoolingOp(OpKernelConstruction* context) : UnaryOp<T>(context) { 53 string data_format; 54 OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); 55 OP_REQUIRES(context, FormatFromString(data_format, &data_format_), 56 errors::InvalidArgument("Invalid data format")); 57 OP_REQUIRES( 58 context, data_format_ == FORMAT_NHWC, 59 errors::InvalidArgument("Default AvgPoolingOp only supports NHWC ", 60 "on device type ", 61 DeviceTypeString(context->device_type()))); 62 OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); 63 OP_REQUIRES(context, ksize_.size() == 4, 64 errors::InvalidArgument("Sliding window ksize field must " 65 "specify 4 dimensions")); 66 OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); 67 OP_REQUIRES(context, stride_.size() == 4, 68 errors::InvalidArgument("Sliding window stride field must " 69 "specify 4 dimensions")); 70 OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); 71 OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, 72 errors::Unimplemented( 73 "Pooling is not yet supported on the batch dimension.")); 74 } 75 Compute(OpKernelContext * context)76 void Compute(OpKernelContext* context) override { 77 const Tensor& tensor_in = context->input(0); 78 PoolParameters params{context, ksize_, stride_, 79 padding_, data_format_, tensor_in.shape()}; 80 if (!context->status().ok()) { 81 return; 82 } 83 OP_REQUIRES(context, params.depth_window == 1, 84 errors::Unimplemented("Non-spatial pooling is not " 85 "yet supported. Volunteers? :)")); 86 87 // For avgpooling, tensor_in should have 4 dimensions. 88 OP_REQUIRES(context, tensor_in.dims() == 4, 89 errors::InvalidArgument("tensor_in must be 4-dimensional")); 90 91 Tensor* output = nullptr; 92 OP_REQUIRES_OK(context, context->allocate_output( 93 0, params.forward_output_shape(), &output)); 94 95 SpatialAvgPool<Device, T>(context, output, tensor_in, params, padding_); 96 } 97 98 private: 99 std::vector<int32> ksize_; 100 std::vector<int32> stride_; 101 Padding padding_; 102 TensorFormat data_format_; 103 }; 104 105 REGISTER_KERNEL_BUILDER( 106 Name("AvgPool").Device(DEVICE_CPU).TypeConstraint<double>("T"), 107 AvgPoolingOp<CPUDevice, double>); 108 REGISTER_KERNEL_BUILDER( 109 Name("AvgPool").Device(DEVICE_CPU).TypeConstraint<float>("T"), 110 AvgPoolingOp<CPUDevice, float>); 111 REGISTER_KERNEL_BUILDER( 112 Name("AvgPool").Device(DEVICE_CPU).TypeConstraint<Eigen::half>("T"), 113 AvgPoolingOp<CPUDevice, Eigen::half>); 114 115 #if GOOGLE_CUDA 116 template <typename T> 117 class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> { 118 public: 119 typedef GPUDevice Device; AvgPoolingOp(OpKernelConstruction * context)120 explicit AvgPoolingOp(OpKernelConstruction* context) : UnaryOp<T>(context) { 121 string data_format; 122 OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); 123 OP_REQUIRES(context, FormatFromString(data_format, &data_format_), 124 errors::InvalidArgument("Invalid data format")); 125 OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); 126 OP_REQUIRES(context, ksize_.size() == 4, 127 errors::InvalidArgument("Sliding window ksize field must " 128 "specify 4 dimensions")); 129 OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); 130 OP_REQUIRES(context, stride_.size() == 4, 131 errors::InvalidArgument("Sliding window stride field must " 132 "specify 4 dimensions")); 133 OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); 134 const int32 ksize_n = GetTensorDim(ksize_, data_format_, 'N'); 135 const int32 stride_n = GetTensorDim(stride_, data_format_, 'N'); 136 OP_REQUIRES(context, ksize_n == 1 && stride_n == 1, 137 errors::Unimplemented( 138 "Pooling is not yet supported on the batch dimension.")); 139 } 140 Compute(OpKernelContext * context)141 void Compute(OpKernelContext* context) override { 142 const Tensor& tensor_in = context->input(0); 143 PoolParameters params{context, ksize_, stride_, 144 padding_, data_format_, tensor_in.shape()}; 145 if (!context->status().ok()) { 146 return; 147 } 148 OP_REQUIRES(context, params.depth_window == 1, 149 errors::Unimplemented("Non-spatial pooling is not " 150 "yet supported. Volunteers? :)")); 151 152 // For avgpooling, tensor_in should have 4 dimensions. 153 OP_REQUIRES(context, tensor_in.dims() == 4, 154 errors::InvalidArgument("tensor_in must be 4-dimensional")); 155 156 TensorShape output_shape = params.forward_output_shape(); 157 158 if (data_format_ == FORMAT_NCHW) { 159 DnnPoolingOp<T>::Compute(context, se::dnn::PoolingMode::kAverage, ksize_, 160 stride_, padding_, data_format_, tensor_in, 161 output_shape, 162 /*propagate_nans=*/false); 163 } else { 164 Tensor* output = nullptr; 165 OP_REQUIRES_OK(context, 166 context->allocate_output(0, output_shape, &output)); 167 Eigen::PaddingType pt = BrainPadding2EigenPadding(padding_); 168 functor::SpatialAvgPooling<Device, T>()( 169 context->eigen_device<Device>(), output->tensor<T, 4>(), 170 tensor_in.tensor<T, 4>(), params.window_rows, params.window_cols, 171 params.row_stride, params.col_stride, pt); 172 } 173 } 174 175 private: 176 std::vector<int32> ksize_; 177 std::vector<int32> stride_; 178 Padding padding_; 179 TensorFormat data_format_; 180 }; 181 182 // Forward declarations of the functor specializations for GPU. 183 namespace functor { 184 #define DECLARE_GPU_SPEC(T) \ 185 template <> \ 186 void SpatialAvgPooling<GPUDevice, T>::operator()( \ 187 const GPUDevice& d, typename TTypes<T, 4>::Tensor output, \ 188 typename TTypes<T, 4>::ConstTensor input, int window_rows, \ 189 int window_cols, int row_stride, int col_stride, \ 190 const Eigen::PaddingType& padding); \ 191 extern template struct SpatialAvgPooling<GPUDevice, T>; 192 193 DECLARE_GPU_SPEC(Eigen::half); 194 DECLARE_GPU_SPEC(float); 195 DECLARE_GPU_SPEC(double); 196 #undef DECLARE_GPU_SPEC 197 } // namespace functor 198 199 REGISTER_KERNEL_BUILDER( 200 Name("AvgPool").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), 201 AvgPoolingOp<GPUDevice, Eigen::half>); 202 REGISTER_KERNEL_BUILDER( 203 Name("AvgPool").Device(DEVICE_GPU).TypeConstraint<float>("T"), 204 AvgPoolingOp<GPUDevice, float>); 205 REGISTER_KERNEL_BUILDER( 206 Name("AvgPool").Device(DEVICE_GPU).TypeConstraint<double>("T"), 207 AvgPoolingOp<GPUDevice, double>); 208 #endif // GOOGLE_CUDA 209 210 // The operation to compute AvgPool gradients. 211 // It takes two inputs: 212 // - The original input tensor shape 213 // - Backprop tensor for output 214 // It produces one output: backprop tensor for input. 215 template <typename Device, class T> 216 class AvgPoolingGradOp : public OpKernel { 217 public: AvgPoolingGradOp(OpKernelConstruction * context)218 explicit AvgPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) { 219 string data_format; 220 OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); 221 OP_REQUIRES(context, FormatFromString(data_format, &data_format_), 222 errors::InvalidArgument("Invalid data format")); 223 OP_REQUIRES( 224 context, data_format_ == FORMAT_NHWC, 225 errors::InvalidArgument("Default AvgPoolingGradOp only supports NHWC ", 226 "on device type ", 227 DeviceTypeString(context->device_type()))); 228 OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); 229 OP_REQUIRES(context, ksize_.size() == 4, 230 errors::InvalidArgument("Sliding window ksize field must " 231 "specify 4 dimensions")); 232 OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); 233 OP_REQUIRES(context, stride_.size() == 4, 234 errors::InvalidArgument("Sliding window strides field must " 235 "specify 4 dimensions")); 236 OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); 237 OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, 238 errors::Unimplemented( 239 "Pooling is not yet supported on the batch dimension.")); 240 } 241 Compute(OpKernelContext * context)242 void Compute(OpKernelContext* context) override { 243 const Tensor& tensor_in_shape = context->input(0); 244 const Tensor& out_backprop = context->input(1); 245 // For avgpooling, tensor_in_shape should have 1 dimension, and 4 elements. 246 OP_REQUIRES( 247 context, 248 tensor_in_shape.dims() == 1 && tensor_in_shape.NumElements() == 4, 249 errors::InvalidArgument("out_backprop must be 1-dimensional and 4 " 250 "elements")); 251 // For avgpooling, out_backprop should have 4 dimensions. 252 OP_REQUIRES(context, out_backprop.dims() == 4, 253 errors::InvalidArgument("out_backprop must be 4-dimensional")); 254 const int64 out_backprop_batch = out_backprop.dim_size(0); 255 const int64 out_backprop_rows = out_backprop.dim_size(1); 256 const int64 out_backprop_cols = out_backprop.dim_size(2); 257 const int64 out_backprop_depth = out_backprop.dim_size(3); 258 259 TensorShape output_shape; 260 auto shape_vec = tensor_in_shape.vec<int32>(); 261 for (int64 i = 0; i < tensor_in_shape.NumElements(); ++i) { 262 output_shape.AddDim(shape_vec(i)); 263 } 264 const int64 in_rows = output_shape.dim_size(1); 265 const int64 in_cols = output_shape.dim_size(2); 266 267 Tensor* output = nullptr; 268 OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); 269 output->flat<T>().setZero(); 270 271 const int window_rows = ksize_[1]; 272 const int window_cols = ksize_[2]; 273 const int depth_window = ksize_[3]; 274 275 const int row_stride = stride_[1]; 276 const int col_stride = stride_[2]; 277 278 // We (will) use different code for spatial pooling and 279 // non-spatial pooling. 280 // 281 // Spatial pooling is when depth_window = 1 282 OP_REQUIRES(context, depth_window == 1, 283 errors::Unimplemented("Non-spatial pooling is not " 284 "yet supported. Volunteers? :)")); 285 286 int64 out_height, out_width, pad_rows, pad_cols; 287 OP_REQUIRES_OK(context, 288 GetWindowedOutputSize(in_rows, window_rows, row_stride, 289 padding_, &out_height, &pad_rows)); 290 OP_REQUIRES_OK(context, 291 GetWindowedOutputSize(in_cols, window_cols, col_stride, 292 padding_, &out_width, &pad_cols)); 293 294 const T* out_backprop_ptr = out_backprop.flat<T>().data(); 295 T* input_backprop_ptr = output->flat<T>().data(); 296 297 auto shard = [context, out_backprop_ptr, input_backprop_ptr, 298 out_backprop_rows, out_backprop_cols, out_backprop_depth, 299 in_rows, in_cols, window_rows, window_cols, row_stride, 300 col_stride, pad_rows, pad_cols](int64 start, int64 limit) { 301 for (int64 b = start; b < limit; ++b) { 302 for (int64 r = 0; r < out_backprop_rows; ++r) { 303 // Calculates row broadcast size. For SAME padding, current 304 // index could be in the padding area, and r*row_stride + 305 // window_rows could be beyond the input tensor's boundary. In 306 // such cases, change the starting index and reduce the 307 // broadcast size. 308 int rindex, rsize; 309 OP_REQUIRES_OK(context, 310 GetBroadcastSize(r, in_rows, window_rows, row_stride, 311 pad_rows, &rindex, &rsize)); 312 for (int64 c = 0; c < out_backprop_cols; ++c) { 313 // Calculates col broadcast size. For SAME padding, current 314 // index could be in the padding area, and c*col_stride + 315 // window_cols could be beyond the input tensor's boundary. In 316 // such cases, change the starting index and reduce the 317 // broadcast size. 318 int cindex, csize; 319 OP_REQUIRES_OK(context, 320 GetBroadcastSize(c, in_cols, window_cols, col_stride, 321 pad_cols, &cindex, &csize)); 322 323 T divide_coeff(1.0 / (rsize * csize)); 324 int64 output_index = 325 (b * out_backprop_rows + r) * out_backprop_cols + c; 326 for (int64 r_dst = rindex; r_dst < rindex + rsize; ++r_dst) { 327 for (int64 c_dst = cindex; c_dst < cindex + csize; ++c_dst) { 328 int64 input_index = (b * in_rows + r_dst) * in_cols + c_dst; 329 const T* output_offset = 330 out_backprop_ptr + output_index * out_backprop_depth; 331 T* input_offset = 332 input_backprop_ptr + input_index * out_backprop_depth; 333 for (int64 d = 0; d < out_backprop_depth; ++d) { 334 *input_offset += *output_offset * divide_coeff; 335 ++output_offset; 336 ++input_offset; 337 } 338 } 339 } 340 } 341 } 342 } 343 }; 344 345 const DeviceBase::CpuWorkerThreads& worker_threads = 346 *(context->device()->tensorflow_cpu_worker_threads()); 347 const int64 shard_cost = 348 window_rows * window_cols * depth_window * in_rows * in_rows * in_cols; 349 Shard(worker_threads.num_threads, worker_threads.workers, 350 out_backprop_batch, shard_cost, shard); 351 } 352 353 private: 354 std::vector<int32> ksize_; 355 std::vector<int32> stride_; 356 Padding padding_; 357 TensorFormat data_format_; 358 }; 359 360 #define REGISTER_CPU_KERNEL(T) \ 361 REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") \ 362 .Device(DEVICE_CPU) \ 363 .TypeConstraint<T>("T") \ 364 .HostMemory("orig_input_shape"), \ 365 AvgPoolingGradOp<CPUDevice, T>); 366 367 TF_CALL_float(REGISTER_CPU_KERNEL); 368 TF_CALL_double(REGISTER_CPU_KERNEL); 369 TF_CALL_half(REGISTER_CPU_KERNEL); 370 371 #if GOOGLE_CUDA 372 373 // A CUDNN based AvgPoolingGrad implementation. It includes the padding as the 374 // candidates for the pooling operation. 375 template <class T> 376 class AvgPoolingGradOp<GPUDevice, T> : public OpKernel { 377 public: 378 typedef GPUDevice Device; 379 AvgPoolingGradOp(OpKernelConstruction * context)380 explicit AvgPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) { 381 string data_format; 382 OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); 383 OP_REQUIRES(context, FormatFromString(data_format, &data_format_), 384 errors::InvalidArgument("Invalid data format")); 385 OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); 386 OP_REQUIRES(context, ksize_.size() == 4, 387 errors::InvalidArgument("Sliding window ksize field must " 388 "specify 4 dimensions")); 389 OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); 390 OP_REQUIRES(context, stride_.size() == 4, 391 errors::InvalidArgument("Sliding window strides field must " 392 "specify 4 dimensions")); 393 OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); 394 const int32 ksize_n = GetTensorDim(ksize_, data_format_, 'N'); 395 const int32 stride_n = GetTensorDim(stride_, data_format_, 'N'); 396 OP_REQUIRES(context, ksize_n == 1 && stride_n == 1, 397 errors::Unimplemented( 398 "Pooling is not yet supported on the batch dimension.")); 399 } 400 Compute(OpKernelContext * context)401 void Compute(OpKernelContext* context) override { 402 const Tensor& tensor_in_shape = context->input(0); 403 const Tensor& out_backprop = context->input(1); 404 // For avgpooling, tensor_in_shape should have 1 dimension, and 4 elements. 405 OP_REQUIRES( 406 context, 407 tensor_in_shape.dims() == 1 && tensor_in_shape.NumElements() == 4, 408 errors::InvalidArgument("out_backprop must be 1-dimensional and 4 " 409 "elements")); 410 // For avgpooling, out_backprop should have 4 dimensions. 411 OP_REQUIRES(context, out_backprop.dims() == 4, 412 errors::InvalidArgument("out_backprop must be 4-dimensional")); 413 414 TensorShape output_shape; 415 auto shape_vec = tensor_in_shape.vec<int32>(); 416 for (int64 i = 0; i < tensor_in_shape.NumElements(); ++i) { 417 output_shape.AddDim(shape_vec(i)); 418 } 419 420 DnnPoolingGradOp<T>::Compute(context, se::dnn::PoolingMode::kAverage, 421 ksize_, stride_, padding_, data_format_, 422 nullptr, nullptr, out_backprop, output_shape, 423 /*propagate_nans=*/false); 424 } 425 426 private: 427 std::vector<int32> ksize_; 428 std::vector<int32> stride_; 429 Padding padding_; 430 TensorFormat data_format_; 431 }; 432 433 REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") 434 .Device(DEVICE_GPU) 435 .TypeConstraint<double>("T") 436 .HostMemory("orig_input_shape") 437 .Label("cudnn"), 438 AvgPoolingGradOp<GPUDevice, double>); 439 REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") 440 .Device(DEVICE_GPU) 441 .TypeConstraint<float>("T") 442 .HostMemory("orig_input_shape") 443 .Label("cudnn"), 444 AvgPoolingGradOp<GPUDevice, float>); 445 REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") 446 .Device(DEVICE_GPU) 447 .TypeConstraint<Eigen::half>("T") 448 .HostMemory("orig_input_shape") 449 .Label("cudnn"), 450 AvgPoolingGradOp<GPUDevice, Eigen::half>); 451 452 // A custom GPU kernel based AvgPoolingGrad implementation. It includes the 453 // padding as the candidates for the pooling operation. 454 template <class T> 455 class AvgPoolingGradOpCustomGPUKernel : public OpKernel { 456 public: 457 typedef GPUDevice Device; 458 AvgPoolingGradOpCustomGPUKernel(OpKernelConstruction * context)459 explicit AvgPoolingGradOpCustomGPUKernel(OpKernelConstruction* context) 460 : OpKernel(context) { 461 string data_format; 462 OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); 463 OP_REQUIRES(context, FormatFromString(data_format, &data_format_), 464 errors::InvalidArgument("Invalid data format")); 465 OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); 466 OP_REQUIRES(context, ksize_.size() == 4, 467 errors::InvalidArgument("Sliding window ksize field must " 468 "specify 4 dimensions")); 469 OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); 470 OP_REQUIRES(context, stride_.size() == 4, 471 errors::InvalidArgument("Sliding window strides field must " 472 "specify 4 dimensions")); 473 OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); 474 const int32 ksize_n = GetTensorDim(ksize_, data_format_, 'N'); 475 const int32 stride_n = GetTensorDim(stride_, data_format_, 'N'); 476 OP_REQUIRES(context, ksize_n == 1 && stride_n == 1, 477 errors::Unimplemented( 478 "Pooling is not yet supported on the batch dimension.")); 479 } 480 Compute(OpKernelContext * context)481 void Compute(OpKernelContext* context) override { 482 const Tensor& tensor_in_shape = context->input(0); 483 const Tensor& out_backprop = context->input(1); 484 // For avgpooling, tensor_in_shape should have 1 dimension, and 4 elements. 485 OP_REQUIRES( 486 context, 487 tensor_in_shape.dims() == 1 && tensor_in_shape.NumElements() == 4, 488 errors::InvalidArgument("out_backprop must be 1-dimensional and 4 " 489 "elements")); 490 // For avgpooling, out_backprop should have 4 dimensions. 491 OP_REQUIRES(context, out_backprop.dims() == 4, 492 errors::InvalidArgument("out_backprop must be 4-dimensional")); 493 TensorShape output_shape; 494 auto shape_vec = tensor_in_shape.vec<int32>(); 495 for (int64 i = 0; i < tensor_in_shape.NumElements(); ++i) { 496 output_shape.AddDim(shape_vec(i)); 497 } 498 499 if (data_format_ == FORMAT_NHWC) { 500 const int64 out_backprop_batch = out_backprop.dim_size(0); 501 const int64 out_backprop_rows = out_backprop.dim_size(1); 502 const int64 out_backprop_cols = out_backprop.dim_size(2); 503 const int64 out_backprop_depth = out_backprop.dim_size(3); 504 505 const int64 in_rows = output_shape.dim_size(1); 506 const int64 in_cols = output_shape.dim_size(2); 507 Tensor* output = nullptr; 508 OP_REQUIRES_OK(context, 509 context->allocate_output(0, output_shape, &output)); 510 511 const int window_rows = ksize_[1]; 512 const int window_cols = ksize_[2]; 513 const int depth_window = ksize_[3]; 514 515 const int row_stride = stride_[1]; 516 const int col_stride = stride_[2]; 517 518 // We (will) use different code for spatial pooling and 519 // non-spatial pooling. 520 // 521 // Spatial pooling is when depth_window = 1 522 OP_REQUIRES(context, depth_window == 1, 523 errors::Unimplemented("Non-spatial pooling is not " 524 "yet supported. Volunteers? :)")); 525 526 int64 out_height, out_width, pad_rows, pad_cols; 527 OP_REQUIRES_OK(context, 528 GetWindowedOutputSize(in_rows, window_rows, row_stride, 529 padding_, &out_height, &pad_rows)); 530 OP_REQUIRES_OK(context, 531 GetWindowedOutputSize(in_cols, window_cols, col_stride, 532 padding_, &out_width, &pad_cols)); 533 534 RunAvePoolBackwardNHWC<T>(out_backprop.flat<T>().data(), // top_diff 535 out_backprop_batch, // num 536 in_rows, // height 537 in_cols, // width 538 out_backprop_depth, // channels 539 out_backprop_rows, // pooled_height 540 out_backprop_cols, // pooled_width 541 window_rows, // kernel_h 542 window_cols, // kernel_w 543 row_stride, // stride_h 544 col_stride, // stride_w 545 pad_rows, // pad_t 546 pad_cols, // pad_l 547 output->flat<T>().data(), // bottom_diff 548 context->eigen_gpu_device()); // d 549 } else { 550 DnnPoolingGradOp<T>::Compute(context, se::dnn::PoolingMode::kAverage, 551 ksize_, stride_, padding_, data_format_, 552 nullptr, nullptr, out_backprop, output_shape, 553 /*propagate_nans=*/false); 554 } 555 } 556 557 private: 558 std::vector<int32> ksize_; 559 std::vector<int32> stride_; 560 Padding padding_; 561 TensorFormat data_format_; 562 }; 563 564 REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") 565 .Device(DEVICE_GPU) 566 .TypeConstraint<float>("T") 567 .HostMemory("orig_input_shape"), 568 AvgPoolingGradOpCustomGPUKernel<float>); 569 REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") 570 .Device(DEVICE_GPU) 571 .TypeConstraint<double>("T") 572 .HostMemory("orig_input_shape"), 573 AvgPoolingGradOpCustomGPUKernel<double>); 574 REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") 575 .Device(DEVICE_GPU) 576 .TypeConstraint<Eigen::half>("T") 577 .HostMemory("orig_input_shape"), 578 AvgPoolingGradOpCustomGPUKernel<Eigen::half>); 579 580 #endif // GOOGLE_CUDA 581 582 } // namespace tensorflow 583