1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #define EIGEN_USE_THREADS
16
17 #include "tensorflow/core/kernels/pooling_ops_3d.h"
18
19 #include <array>
20
21 #include "third_party/eigen3/Eigen/Core"
22 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
23 #include "tensorflow/core/framework/kernel_shape_util.h"
24 #include "tensorflow/core/framework/numeric_op.h"
25 #include "tensorflow/core/framework/op_kernel.h"
26 #include "tensorflow/core/framework/register_types.h"
27 #include "tensorflow/core/framework/tensor.h"
28 #include "tensorflow/core/framework/tensor_shape.h"
29 #include "tensorflow/core/framework/tensor_slice.h"
30 #include "tensorflow/core/kernels/eigen_pooling.h"
31 #include "tensorflow/core/kernels/ops_util.h"
32 #include "tensorflow/core/lib/core/errors.h"
33 #include "tensorflow/core/util/padding.h"
34 #include "tensorflow/core/util/tensor_format.h"
35 #include "tensorflow/core/util/work_sharder.h"
36
37 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
38 #include "tensorflow/core/kernels/cudnn_pooling_gpu.h"
39 #include "tensorflow/core/kernels/pooling_ops_3d_gpu.h"
40 #endif
41
42
43 namespace tensorflow {
44
45 typedef Eigen::ThreadPoolDevice CPUDevice;
46 typedef Eigen::GpuDevice GPUDevice;
47
Pool3dParameters(OpKernelContext * context,const std::vector<int32> & ksize,const std::vector<int32> & stride,Padding padding,TensorFormat data_format,const TensorShape & tensor_in_shape)48 Pool3dParameters::Pool3dParameters(OpKernelContext* context,
49 const std::vector<int32>& ksize,
50 const std::vector<int32>& stride,
51 Padding padding, TensorFormat data_format,
52 const TensorShape& tensor_in_shape) {
53 // For maxpooling, tensor_in should have 4 dimensions.
54 OP_REQUIRES(context, tensor_in_shape.dims() == 5,
55 errors::InvalidArgument("tensor_in must be 4-dimensional"));
56
57 this->data_format = data_format;
58 depth = GetTensorDim(tensor_in_shape, data_format, 'C');
59 tensor_in_planes = GetTensorDim(tensor_in_shape, data_format, '0');
60 tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, '1');
61 tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, '2');
62 tensor_in_batch = GetTensorDim(tensor_in_shape, data_format, 'N');
63 window_planes = GetTensorDim(ksize, data_format, '0');
64 window_rows = GetTensorDim(ksize, data_format, '1');
65 window_cols = GetTensorDim(ksize, data_format, '2');
66 depth_window = GetTensorDim(ksize, data_format, 'C');
67 plane_stride = GetTensorDim(stride, data_format, '0');
68 row_stride = GetTensorDim(stride, data_format, '1');
69 col_stride = GetTensorDim(stride, data_format, '2');
70 depth_stride = GetTensorDim(stride, data_format, 'C');
71
72 // We only support 3D pooling across plane/width/height. Depthwise
73 // pooling is not supported.
74 OP_REQUIRES(
75 context, depth_window == 1 && depth_stride == 1,
76 errors::Unimplemented(
77 "Pooling3d only supports pooling across plane/width/height."));
78
79 OP_REQUIRES_OK(context, GetWindowedOutputSize(tensor_in_planes, window_planes,
80 plane_stride, padding,
81 &out_plane, &pad_planes));
82 OP_REQUIRES_OK(context,
83 GetWindowedOutputSize(tensor_in_rows, window_rows, row_stride,
84 padding, &out_height, &pad_rows));
85 OP_REQUIRES_OK(context,
86 GetWindowedOutputSize(tensor_in_cols, window_cols, col_stride,
87 padding, &out_width, &pad_cols));
88 }
89
forward_output_shape()90 TensorShape Pool3dParameters::forward_output_shape() {
91 return ShapeFromFormat(data_format, tensor_in_batch,
92 {{out_plane, out_height, out_width}}, depth);
93 }
94
95 template <typename T>
96 struct LaunchPoolingOp<CPUDevice, T, AVG> {
launchtensorflow::LaunchPoolingOp97 static void launch(OpKernelContext* context, const Tensor& tensor_in,
98 const std::array<int64, 3>& window,
99 const std::array<int64, 3>& stride,
100 const std::array<int64, 3>& padding,
101 TensorFormat data_format, Padding padding_type,
102 Tensor* output) {
103 output->tensor<T, 5>().device(context->eigen_device<CPUDevice>()) =
104 Eigen::CuboidAvgPooling(tensor_in.tensor<T, 5>(), window[0], window[1],
105 window[2], stride[0], stride[1], stride[2],
106 BrainPadding2EigenPadding(padding_type));
107 }
108 };
109
110 template <typename T>
111 struct LaunchPoolingOp<CPUDevice, T, MAX> {
launchtensorflow::LaunchPoolingOp112 static void launch(OpKernelContext* context, const Tensor& tensor_in,
113 const std::array<int64, 3>& window,
114 const std::array<int64, 3>& stride,
115 const std::array<int64, 3>& padding,
116 TensorFormat data_format, Padding padding_type,
117 Tensor* output) {
118 output->tensor<T, 5>().device(context->eigen_device<CPUDevice>()) =
119 Eigen::CuboidMaxPooling(tensor_in.tensor<T, 5>(), window[0], window[1],
120 window[2], stride[0], stride[1], stride[2],
121 BrainPadding2EigenPadding(padding_type));
122 }
123 };
124
125 template <typename Device, typename T, PoolingType Type>
126 class Pooling3DOp : public UnaryOp<T> {
127 public:
Pooling3DOp(OpKernelConstruction * context)128 explicit Pooling3DOp(OpKernelConstruction* context) : UnaryOp<T>(context) {
129 string data_format;
130 OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
131 OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
132 errors::InvalidArgument("Invalid data format"));
133 if (context->device_type() == DEVICE_CPU) {
134 OP_REQUIRES(
135 context, data_format_ == FORMAT_NHWC,
136 errors::InvalidArgument("Default Pooling3DOp only supports NDHWC ",
137 "on device type ",
138 DeviceTypeString(context->device_type())));
139 }
140 OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
141 OP_REQUIRES(context, ksize_.size() == 5,
142 errors::InvalidArgument("Sliding window ksize field must "
143 "specify 5 dimensions"));
144 OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
145 OP_REQUIRES(context, stride_.size() == 5,
146 errors::InvalidArgument("Sliding window stride field must "
147 "specify 5 dimensions"));
148 OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
149 OP_REQUIRES(context,
150 (GetTensorDim(ksize_, data_format_, 'N') == 1 &&
151 GetTensorDim(stride_, data_format_, 'N') == 1),
152 errors::Unimplemented(
153 "Pooling is not yet supported on the batch dimension."));
154 OP_REQUIRES(context,
155 (GetTensorDim(ksize_, data_format_, 'C') == 1 &&
156 GetTensorDim(stride_, data_format_, 'C') == 1),
157 errors::Unimplemented(
158 "Pooling is not yet supported on the depth dimension."));
159 }
160
Compute(OpKernelContext * context)161 void Compute(OpKernelContext* context) override {
162 const Tensor& tensor_in = context->input(0);
163
164 OP_REQUIRES(context, tensor_in.dims() == 5,
165 errors::InvalidArgument("tensor_in must be 5-dimensional"));
166 const int64 depth = GetTensorDim(tensor_in, data_format_, 'C');
167 const int64 in_batch = GetTensorDim(tensor_in, data_format_, 'N');
168
169 // Dimension order for these arrays is: x, y, z.
170 std::array<int64, 3> input_size{
171 {GetTensorDim(tensor_in, data_format_, '2'),
172 GetTensorDim(tensor_in, data_format_, '1'),
173 GetTensorDim(tensor_in, data_format_, '0')}};
174 std::array<int64, 3> window{{GetTensorDim(ksize_, data_format_, '2'),
175 GetTensorDim(ksize_, data_format_, '1'),
176 GetTensorDim(ksize_, data_format_, '0')}};
177 std::array<int64, 3> stride{{GetTensorDim(stride_, data_format_, '2'),
178 GetTensorDim(stride_, data_format_, '1'),
179 GetTensorDim(stride_, data_format_, '0')}};
180 std::array<int64, 3> padding, out;
181
182 OP_REQUIRES_OK(context, Get3dOutputSize(input_size, window, stride,
183 padding_, &out, &padding));
184
185 TensorShape out_shape = ShapeFromFormat(data_format_, in_batch,
186 {{out[2], out[1], out[0]}}, depth);
187 Tensor* output;
188 OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
189 if (out_shape.num_elements() == 0) return;
190 LaunchPoolingOp<Device, T, Type>::launch(context, tensor_in, window, stride,
191 padding, data_format_, padding_,
192 output);
193 }
194
195 private:
196 std::vector<int32> ksize_;
197 std::vector<int32> stride_;
198 Padding padding_;
199 TensorFormat data_format_;
200 };
201
202 template <typename T>
203 struct LaunchMaxPooling3dGradOp<CPUDevice, T> {
launchtensorflow::LaunchMaxPooling3dGradOp204 static void launch(OpKernelContext* context, const Tensor& tensor_in,
205 const Tensor& tensor_out, const Tensor& out_backprop,
206 const std::array<int64, 3>& window,
207 const std::array<int64, 3>& stride,
208 const std::array<int64, 3>& out,
209 const std::array<int64, 3>& padding,
210 TensorFormat data_format, Tensor* output) {
211 output->flat<T>().setZero();
212 for (int64 p = 0; p < out_backprop.dim_size(3); ++p) {
213 // Calculate broadcast size for planes/rows/cols. For SAME padding,
214 // current index could be in the padding area, and
215 // p * stride_planes + window_planes
216 // could be beyond the input tensor's boundary. In such cases, change
217 // the starting index and reduce the broadcast size.
218 //
219 // The same procedure is repeated for every spatial dimension in the
220 // nested loops below.
221 int pindex, psize;
222 std::array<int64, 3> input_size{{tensor_in.dim_size(3),
223 tensor_in.dim_size(2),
224 tensor_in.dim_size(1)}};
225 OP_REQUIRES_OK(context,
226 GetBroadcastSize(p, input_size[0], window[0], stride[0],
227 padding[0], &pindex, &psize));
228 for (int64 r = 0; r < out_backprop.dim_size(2); ++r) {
229 int rindex, rsize;
230 OP_REQUIRES_OK(context,
231 GetBroadcastSize(r, input_size[1], window[1], stride[1],
232 padding[1], &rindex, &rsize));
233 for (int64 c = 0; c < out_backprop.dim_size(1); ++c) {
234 int cindex, csize;
235 OP_REQUIRES_OK(
236 context, GetBroadcastSize(c, input_size[2], window[2], stride[2],
237 padding[2], &cindex, &csize));
238 TensorSlice src{{0, -1}, {c, 1}, {r, 1}, {p, 1}, {0, -1}};
239 TensorSlice dst{{0, -1},
240 {cindex, csize},
241 {rindex, rsize},
242 {pindex, psize},
243 {0, -1}};
244 Eigen::DSizes<Eigen::DenseIndex, 5> src_indices;
245 Eigen::DSizes<Eigen::DenseIndex, 5> src_sizes;
246 Eigen::DSizes<Eigen::DenseIndex, 5> dst_indices;
247 Eigen::DSizes<Eigen::DenseIndex, 5> dst_sizes;
248 src.FillIndicesAndSizes<5>(out_backprop.shape(), &src_indices,
249 &src_sizes);
250 dst.FillIndicesAndSizes<5>(tensor_in.shape(), &dst_indices,
251 &dst_sizes);
252
253 #if !defined(EIGEN_HAS_INDEX_LIST)
254 Eigen::array<int, 5> bcast = {1, csize, rsize, psize, 1};
255 #else
256 Eigen::IndexList<Eigen::type2index<1>, int, int, int,
257 Eigen::type2index<1>>
258 bcast;
259 bcast.set(1, csize);
260 bcast.set(2, rsize);
261 bcast.set(3, psize);
262 #endif
263
264 // Slice from tensor_in.
265 Eigen::Tensor<T, 5, Eigen::RowMajor> tensor_in_slice(dst_sizes);
266 tensor_in_slice.device(context->eigen_cpu_device()) =
267 tensor_in.tensor<T, 5>().slice(dst_indices, dst_sizes);
268
269 // Slice from tensor_out.
270 Eigen::Tensor<T, 5, Eigen::RowMajor> tensor_out_slice(src_sizes);
271 tensor_out_slice.device(context->eigen_cpu_device()) =
272 tensor_out.tensor<T, 5>().slice(src_indices, src_sizes);
273
274 // Backprop slice.
275 Eigen::Tensor<T, 5, Eigen::RowMajor> out_backprop_slice(src_sizes);
276 out_backprop_slice.device(context->eigen_cpu_device()) =
277 out_backprop.tensor<T, 5>().slice(src_indices, src_sizes);
278
279 // The true backprop slice: if an element is the max, choose
280 // the backprop slice; otherwise set to 0.
281 Eigen::Tensor<T, 5, Eigen::RowMajor> select_slice(dst_sizes);
282 Eigen::Tensor<T, 5, Eigen::RowMajor> mat0(dst_sizes);
283 mat0.setZero();
284 select_slice =
285 ((tensor_in_slice - tensor_out_slice.broadcast(bcast)).abs() <
286 tensor_in_slice.constant(1e-5))
287 .select(out_backprop_slice.broadcast(bcast), mat0);
288
289 output->tensor<T, 5>()
290 .slice(dst_indices, dst_sizes)
291 .device(context->eigen_cpu_device()) += select_slice;
292 }
293 }
294 }
295 }
296 };
297
298 template <class Device, class T>
299 class MaxPooling3dGradOp : public OpKernel {
300 public:
MaxPooling3dGradOp(OpKernelConstruction * context)301 explicit MaxPooling3dGradOp(OpKernelConstruction* context)
302 : OpKernel(context) {
303 string data_format;
304 OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
305 OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
306 errors::InvalidArgument("Invalid data format"));
307 if (context->device_type() == DEVICE_CPU) {
308 OP_REQUIRES(
309 context, data_format_ == FORMAT_NHWC,
310 errors::InvalidArgument(
311 "Default MaxPooling3dGradOp only supports NDHWC ",
312 "on device type ", DeviceTypeString(context->device_type())));
313 }
314 OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
315 OP_REQUIRES(context, ksize_.size() == 5,
316 errors::InvalidArgument("Sliding window ksize field must "
317 "specify 5 dimensions"));
318 OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
319 OP_REQUIRES(context, stride_.size() == 5,
320 errors::InvalidArgument("Sliding window stride field must "
321 "specify 5 dimensions"));
322 OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
323 OP_REQUIRES(context,
324 (GetTensorDim(ksize_, data_format_, 'N') == 1 &&
325 GetTensorDim(stride_, data_format_, 'N') == 1),
326 errors::Unimplemented(
327 "Pooling is not yet supported on the batch dimension."));
328 OP_REQUIRES(context,
329 (GetTensorDim(ksize_, data_format_, 'C') == 1 &&
330 GetTensorDim(stride_, data_format_, 'C') == 1),
331 errors::Unimplemented(
332 "Pooling is not yet supported on the depth dimension."));
333 }
334
Compute(OpKernelContext * context)335 void Compute(OpKernelContext* context) override {
336 const Tensor& tensor_in = context->input(0);
337 const Tensor& tensor_out = context->input(1);
338 const Tensor& out_backprop = context->input(2);
339 OP_REQUIRES(context, tensor_in.dims() == 5,
340 errors::InvalidArgument("tensor_in must be 5-dimensional"));
341 OP_REQUIRES(context, tensor_out.dims() == 5,
342 errors::InvalidArgument("tensor_out must be 5-dimensional"));
343 OP_REQUIRES(context, out_backprop.dims() == 5,
344 errors::InvalidArgument("out_backprop must be 5-dimensional"));
345
346 const TensorShape& output_shape = tensor_in.shape();
347 Tensor* input_backprop;
348 OP_REQUIRES_OK(context,
349 context->allocate_output(0, output_shape, &input_backprop));
350 std::array<int64, 3> input_size{
351 {GetTensorDim(output_shape, data_format_, '2'),
352 GetTensorDim(output_shape, data_format_, '1'),
353 GetTensorDim(output_shape, data_format_, '0')}};
354 std::array<int64, 3> window{{GetTensorDim(ksize_, data_format_, '2'),
355 GetTensorDim(ksize_, data_format_, '1'),
356 GetTensorDim(ksize_, data_format_, '0')}};
357 std::array<int64, 3> stride{{GetTensorDim(stride_, data_format_, '2'),
358 GetTensorDim(stride_, data_format_, '1'),
359 GetTensorDim(stride_, data_format_, '0')}};
360 std::array<int64, 3> out, padding;
361
362 OP_REQUIRES_OK(context, Get3dOutputSize(input_size, window, stride,
363 padding_, &out, &padding));
364 LaunchMaxPooling3dGradOp<Device, T>::launch(
365 context, tensor_in, tensor_out, out_backprop, window, stride, out,
366 padding, data_format_, input_backprop);
367 }
368
369 private:
370 std::vector<int32> ksize_;
371 std::vector<int32> stride_;
372 Padding padding_;
373 TensorFormat data_format_;
374 };
375
376 template <typename T>
377 struct LaunchAvgPooling3dGradOp<CPUDevice, T> {
launchtensorflow::LaunchAvgPooling3dGradOp378 static void launch(OpKernelContext* context,
379 const TensorShape& tensor_in_shape,
380 const Tensor& out_backprop,
381 const std::array<int64, 3>& window,
382 const std::array<int64, 3>& stride,
383 const std::array<int64, 3>& output_shape,
384 const std::array<int64, 3>& padding,
385 TensorFormat data_format, Tensor* output) {
386 output->flat<T>().setZero();
387 std::array<int64, 3> input_size = {{tensor_in_shape.dim_size(3),
388 tensor_in_shape.dim_size(2),
389 tensor_in_shape.dim_size(1)}};
390 for (int64 p = 0; p < out_backprop.dim_size(3); ++p) {
391 // Calculate broadcast size for planes/rows/cols. For SAME padding,
392 // current index could be in the padding area, and
393 // p * stride_planes + window_planes
394 // could be beyond the input tensor's boundary. In such cases, change
395 // the starting index and reduce the broadcast size.
396 //
397 // The same procedure is repeated for every spatial dimension in the
398 // nested loops below.
399 int pindex, psize;
400 OP_REQUIRES_OK(context,
401 GetBroadcastSize(p, input_size[0], window[0], stride[0],
402 padding[0], &pindex, &psize));
403 for (int64 r = 0; r < out_backprop.dim_size(2); ++r) {
404 int rindex, rsize;
405 OP_REQUIRES_OK(context,
406 GetBroadcastSize(r, input_size[1], window[1], stride[1],
407 padding[1], &rindex, &rsize));
408 for (int64 c = 0; c < out_backprop.dim_size(1); ++c) {
409 int cindex, csize;
410 OP_REQUIRES_OK(
411 context, GetBroadcastSize(c, input_size[2], window[2], stride[2],
412 padding[2], &cindex, &csize));
413 TensorSlice src{{0, -1}, {c, 1}, {r, 1}, {p, 1}, {0, -1}};
414 TensorSlice dst{{0, -1},
415 {cindex, csize},
416 {rindex, rsize},
417 {pindex, psize},
418 {0, -1}};
419 Eigen::DSizes<Eigen::DenseIndex, 5> src_indices;
420 Eigen::DSizes<Eigen::DenseIndex, 5> src_sizes;
421 Eigen::DSizes<Eigen::DenseIndex, 5> dst_indices;
422 Eigen::DSizes<Eigen::DenseIndex, 5> dst_sizes;
423 src.FillIndicesAndSizes<5>(out_backprop.shape(), &src_indices,
424 &src_sizes);
425 dst.FillIndicesAndSizes<5>(tensor_in_shape, &dst_indices, &dst_sizes);
426 #if !defined(EIGEN_HAS_INDEX_LIST)
427 Eigen::array<int, 5> bcast = {1, csize, rsize, psize, 1};
428 #else
429 Eigen::IndexList<Eigen::type2index<1>, int, int, int,
430 Eigen::type2index<1>>
431 bcast;
432 bcast.set(1, csize);
433 bcast.set(2, rsize);
434 bcast.set(3, psize);
435 #endif
436 Eigen::Tensor<T, 5, Eigen::RowMajor> slices(src_sizes);
437 slices.device(context->eigen_cpu_device()) =
438 out_backprop.tensor<T, 5>().slice(src_indices, src_sizes);
439 // Divide by the size of the actual patch (psize * rsize * csize).
440 float divide_size = rsize * csize * psize * 1.0f;
441 slices *= slices.constant(1.0f / divide_size);
442
443 output->tensor<T, 5>()
444 .slice(dst_indices, dst_sizes)
445 .device(context->eigen_cpu_device()) += slices.broadcast(bcast);
446 }
447 }
448 }
449 }
450 };
451
452 template <class Device, class T>
453 class AvgPooling3dGradOp : public OpKernel {
454 public:
AvgPooling3dGradOp(OpKernelConstruction * context)455 explicit AvgPooling3dGradOp(OpKernelConstruction* context)
456 : OpKernel(context) {
457 string data_format;
458 OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
459 OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
460 errors::InvalidArgument("Invalid data format"));
461 if (context->device_type() == DEVICE_CPU) {
462 OP_REQUIRES(
463 context, data_format_ == FORMAT_NHWC,
464 errors::InvalidArgument(
465 "Default AvgPooling3dGradOp only supports NDHWC ",
466 "on device type ", DeviceTypeString(context->device_type())));
467 }
468 OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
469 OP_REQUIRES(context, ksize_.size() == 5,
470 errors::InvalidArgument("Sliding window ksize field must "
471 "specify 5 dimensions"));
472 OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
473 OP_REQUIRES(context, stride_.size() == 5,
474 errors::InvalidArgument("Sliding window stride field must "
475 "specify 5 dimensions"));
476 OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
477 OP_REQUIRES(context,
478 (GetTensorDim(ksize_, data_format_, 'N') == 1 &&
479 GetTensorDim(stride_, data_format_, 'N') == 1),
480 errors::Unimplemented(
481 "Pooling is not yet supported on the batch dimension."));
482 OP_REQUIRES(context,
483 (GetTensorDim(ksize_, data_format_, 'C') == 1 &&
484 GetTensorDim(stride_, data_format_, 'C') == 1),
485 errors::Unimplemented(
486 "Pooling is not yet supported on the depth dimension."));
487 }
488
Compute(OpKernelContext * context)489 void Compute(OpKernelContext* context) override {
490 const Tensor& tensor_in_shape = context->input(0);
491 const Tensor& out_backprop = context->input(1);
492 OP_REQUIRES(
493 context,
494 tensor_in_shape.dims() == 1 && tensor_in_shape.NumElements() == 5,
495 errors::InvalidArgument("tensor_in must be 1-dimensional and 5 "
496 "elements"));
497 OP_REQUIRES(context, out_backprop.dims() == 5,
498 errors::InvalidArgument("out_backprop must be 5-dimensional"));
499
500 TensorShape output_shape;
501 auto shape_vec = tensor_in_shape.vec<int32>();
502 for (int64 i = 0; i < tensor_in_shape.NumElements(); ++i) {
503 output_shape.AddDim(shape_vec(i));
504 }
505
506 Tensor* output;
507 OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
508
509 // Dimension order for these arrays is x, y, z.
510 std::array<int64, 3> input_size{
511 {GetTensorDim(output_shape, data_format_, '2'),
512 GetTensorDim(output_shape, data_format_, '1'),
513 GetTensorDim(output_shape, data_format_, '0')}};
514 std::array<int64, 3> window{{GetTensorDim(ksize_, data_format_, '2'),
515 GetTensorDim(ksize_, data_format_, '1'),
516 GetTensorDim(ksize_, data_format_, '0')}};
517 std::array<int64, 3> stride{{GetTensorDim(stride_, data_format_, '2'),
518 GetTensorDim(stride_, data_format_, '1'),
519 GetTensorDim(stride_, data_format_, '0')}};
520 std::array<int64, 3> padding, out;
521
522 OP_REQUIRES_OK(context, Get3dOutputSize(input_size, window, stride,
523 padding_, &out, &padding));
524
525 LaunchAvgPooling3dGradOp<Device, T>::launch(
526 context, output_shape, out_backprop, window, stride, out, padding,
527 data_format_, output);
528 }
529
530 private:
531 std::vector<int32> ksize_;
532 std::vector<int32> stride_;
533 Padding padding_;
534 TensorFormat data_format_;
535 };
536
537 template <typename T>
538 struct LaunchMaxPooling3dGradGradOp<CPUDevice, T> {
launchtensorflow::LaunchMaxPooling3dGradGradOp539 static void launch(OpKernelContext* context, const Pool3dParameters& params,
540 const Tensor& tensor_in, const Tensor& tensor_out,
541 const Tensor& tensor_top_diff,
542 Tensor* tensor_bottom_diff) {
543 OP_REQUIRES(
544 context, params.data_format == FORMAT_NHWC,
545 errors::InvalidArgument("Default MaxPooling3dGradGradOp only supports",
546 "NDHWC on CPU device type"));
547
548 typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
549 ConstEigenMatrixMap;
550 typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
551 EigenMatrixMap;
552
553 ConstEigenMatrixMap in_mat(tensor_in.flat<T>().data(), params.depth,
554 params.tensor_in_planes * params.tensor_in_cols *
555 params.tensor_in_rows *
556 params.tensor_in_batch);
557 ConstEigenMatrixMap out_mat(tensor_out.flat<T>().data(), params.depth,
558 params.out_plane * params.out_width *
559 params.out_height * params.tensor_in_batch);
560 ConstEigenMatrixMap top_diff_mat(
561 tensor_top_diff.flat<T>().data(), params.depth,
562 params.tensor_in_planes * params.tensor_in_cols *
563 params.tensor_in_rows * params.tensor_in_batch);
564 EigenMatrixMap bottom_diff_mat(
565 tensor_bottom_diff->flat<T>().data(), params.depth,
566 params.out_plane * params.out_width * params.out_height *
567 params.tensor_in_batch);
568
569 const DeviceBase::CpuWorkerThreads& worker_threads =
570 *(context->device()->tensorflow_cpu_worker_threads());
571
572 auto shard = [¶ms, &in_mat, &out_mat, &top_diff_mat, &bottom_diff_mat](
573 int64 start, int64 limit) {
574 const int32 depth = params.depth;
575 const int32 in_planes = params.tensor_in_planes;
576 const int32 in_rows = params.tensor_in_rows;
577 const int32 in_cols = params.tensor_in_cols;
578 const int32 pad_planes = params.pad_planes;
579 const int32 pad_rows = params.pad_rows;
580 const int32 pad_cols = params.pad_cols;
581 const int32 window_planes = params.window_planes;
582 const int32 window_rows = params.window_rows;
583 const int32 window_cols = params.window_cols;
584 const int32 plane_stride = params.plane_stride;
585 const int32 row_stride = params.row_stride;
586 const int32 col_stride = params.col_stride;
587 const int32 out_plane = params.out_plane;
588 const int32 out_height = params.out_height;
589 const int32 out_width = params.out_width;
590
591 {
592 // Initializes the output grad backprop tensor with 0.
593 const int32 output_image_size =
594 out_plane * out_height * out_width * params.depth;
595 EigenMatrixMap bottom_diff_shard(
596 bottom_diff_mat.data() + start * output_image_size, 1,
597 (limit - start) * output_image_size);
598 bottom_diff_shard.setZero();
599 }
600
601 for (int b = start; b < limit; ++b) {
602 for (int pp = 0; pp < out_plane; ++pp) {
603 for (int ph = 0; ph < out_height; ++ph) {
604 for (int pw = 0; pw < out_width; ++pw) {
605 // (p_start, p_end) * (h_start, h_end) * (w_start, w_end) is the
606 // range that the input vector projects to.
607 int p_start = pp * plane_stride - pad_planes;
608 const int p_end = std::min(p_start + window_planes, in_planes);
609 int h_start = ph * row_stride - pad_rows;
610 const int h_end = std::min(h_start + window_rows, in_rows);
611 int w_start = pw * col_stride - pad_cols;
612 const int w_end = std::min(w_start + window_cols, in_cols);
613 p_start = std::max(p_start, 0);
614 h_start = std::max(h_start, 0);
615 w_start = std::max(w_start, 0);
616 const int out_index =
617 ((b * out_plane + pp) * out_height + ph) * out_width + pw;
618 // Find value corresponding to the input maximum in top_diff.
619 for (int d = 0; d < depth; ++d) {
620 const T& output_ref = out_mat.coeffRef(d, out_index);
621 bool should_stop = false;
622 for (int p = p_start; p < p_end && !should_stop; ++p) {
623 for (int h = h_start; h < h_end && !should_stop; ++h) {
624 for (int w = w_start; w < w_end && !should_stop; ++w) {
625 const int in_index =
626 ((b * in_planes + p) * in_rows + h) * in_cols + w;
627 const T& input_ref = in_mat.coeffRef(d, in_index);
628 if (output_ref == input_ref) {
629 T& bottom_diff_ref =
630 bottom_diff_mat.coeffRef(d, out_index);
631 bottom_diff_ref = top_diff_mat.coeffRef(d, in_index);
632 should_stop = true;
633 }
634 }
635 }
636 }
637 }
638 }
639 }
640 }
641 }
642 };
643 const int64 shard_cost =
644 params.out_plane * params.out_height * params.out_width * params.depth *
645 params.window_planes * params.window_rows * params.window_cols;
646 Shard(worker_threads.num_threads, worker_threads.workers,
647 params.tensor_in_batch, shard_cost, shard);
648 }
649 };
650
651 template <class Device, class T>
652 class MaxPooling3dGradGradOp : public OpKernel {
653 public:
MaxPooling3dGradGradOp(OpKernelConstruction * context)654 explicit MaxPooling3dGradGradOp(OpKernelConstruction* context)
655 : OpKernel(context) {
656 string data_format;
657 OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
658 OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
659 errors::InvalidArgument("Invalid data format"));
660 OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
661 OP_REQUIRES(context, ksize_.size() == 5,
662 errors::InvalidArgument("Sliding window ksize field must "
663 "specify 5 dimensions"));
664 OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
665 OP_REQUIRES(context, stride_.size() == 5,
666 errors::InvalidArgument("Sliding window strides field must "
667 "specify 5 dimensions"));
668 OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
669 OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
670 errors::Unimplemented(
671 "Pooling is not yet supported on the batch dimension."));
672 const int32 ksize_c = GetTensorDim(ksize_, data_format_, 'C');
673 const int32 stride_c = GetTensorDim(stride_, data_format_, 'C');
674 OP_REQUIRES(context, ksize_c == 1 && stride_c == 1,
675 errors::Unimplemented("MaxPooling3dGradGrad is not yet "
676 "supported on the depth dimension."));
677 }
678
Compute(OpKernelContext * context)679 void Compute(OpKernelContext* context) override {
680 const Tensor& tensor_in = context->input(0);
681 const Tensor& tensor_out = context->input(1);
682 const Tensor& out_grad_backprop = context->input(2);
683
684 // For maxpooling3d, tensor_in should have 5 dimensions.
685 OP_REQUIRES(context, tensor_in.dims() == 5,
686 errors::InvalidArgument("tensor_in must be 5-dimensional"));
687 OP_REQUIRES(context, tensor_out.dims() == 5,
688 errors::InvalidArgument("tensor_out must be 5-dimensional"));
689 // For maxpooling3d, out_grad_backprop should have 5 dimensions.
690 OP_REQUIRES(
691 context, out_grad_backprop.dims() == 5,
692 errors::InvalidArgument("out_grad_backprop must be 5-dimensional"));
693
694 Pool3dParameters params{context, ksize_, stride_,
695 padding_, data_format_, tensor_in.shape()};
696
697 Tensor* output = nullptr;
698 OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
699 {2}, 0, tensor_out.shape(), &output));
700
701 LaunchMaxPooling3dGradGradOp<Device, T>::launch(
702 context, params, tensor_in, tensor_out, out_grad_backprop, output);
703 }
704
705 private:
706 std::vector<int32> ksize_;
707 std::vector<int32> stride_;
708 Padding padding_;
709 TensorFormat data_format_;
710 };
711
712 #define REGISTER_KERNELS(D, T) \
713 REGISTER_KERNEL_BUILDER( \
714 Name("MaxPool3D").Device(DEVICE_##D).TypeConstraint<T>("T"), \
715 Pooling3DOp<D##Device, T, MAX>); \
716 REGISTER_KERNEL_BUILDER(Name("MaxPool3DGrad") \
717 .Device(DEVICE_##D) \
718 .TypeConstraint<T>("T") \
719 .TypeConstraint<T>("TInput"), \
720 MaxPooling3dGradOp<D##Device, T>); \
721 REGISTER_KERNEL_BUILDER( \
722 Name("MaxPool3DGradGrad").Device(DEVICE_##D).TypeConstraint<T>("T"), \
723 MaxPooling3dGradGradOp<D##Device, T>); \
724 REGISTER_KERNEL_BUILDER( \
725 Name("AvgPool3D").Device(DEVICE_##D).TypeConstraint<T>("T"), \
726 Pooling3DOp<D##Device, T, AVG>); \
727 REGISTER_KERNEL_BUILDER(Name("AvgPool3DGrad") \
728 .Device(DEVICE_##D) \
729 .TypeConstraint<T>("T") \
730 .HostMemory("orig_input_shape"), \
731 AvgPooling3dGradOp<D##Device, T>);
732
733 #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T)
734 TF_CALL_float(REGISTER_CPU_KERNELS);
735 #undef REGISTER_CPU_KERNELS
736
737 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
738
739 template <typename T>
740 struct LaunchPoolingOp<GPUDevice, T, AVG> {
launchtensorflow::LaunchPoolingOp741 static void launch(OpKernelContext* context, const Tensor& tensor_in,
742 const std::array<int64, 3>& window,
743 const std::array<int64, 3>& stride,
744 const std::array<int64, 3>& padding,
745 TensorFormat data_format, Padding padding_type,
746 Tensor* output) {
747 DnnPooling3dOp<T>::Compute(context, se::dnn::PoolingMode::kAverage, window,
748 stride, padding, data_format, tensor_in, output);
749 }
750 };
751
752 template <typename T>
753 struct LaunchPoolingOp<GPUDevice, T, MAX> {
launchtensorflow::LaunchPoolingOp754 static void launch(OpKernelContext* context, const Tensor& tensor_in,
755 const std::array<int64, 3>& window,
756 const std::array<int64, 3>& stride,
757 const std::array<int64, 3>& padding,
758 TensorFormat data_format, Padding padding_type,
759 Tensor* output) {
760 DnnPooling3dOp<T>::Compute(context, se::dnn::PoolingMode::kMaximum, window,
761 stride, padding, data_format, tensor_in, output);
762 }
763 };
764
765 template <typename T>
766 struct LaunchMaxPooling3dGradOp<GPUDevice, T> {
launchtensorflow::LaunchMaxPooling3dGradOp767 static void launch(OpKernelContext* context, const Tensor& tensor_in,
768 const Tensor& tensor_out, const Tensor& out_backprop,
769 const std::array<int64, 3>& window,
770 const std::array<int64, 3>& stride,
771 const std::array<int64, 3>& out,
772 const std::array<int64, 3>& padding,
773 TensorFormat data_format, Tensor* input_backprop) {
774 const TensorShape output_shape = tensor_in.shape();
775 DnnPooling3dGradOp<T>::Compute(context, se::dnn::PoolingMode::kMaximum,
776 window, stride, padding, out, data_format,
777 out_backprop, output_shape, &tensor_in,
778 &tensor_out, input_backprop);
779 }
780 };
781
782 template <typename T>
783 struct LaunchAvgPooling3dGradOp<GPUDevice, T> {
launchtensorflow::LaunchAvgPooling3dGradOp784 static void launch(OpKernelContext* context,
785 const TensorShape& tensor_in_shape,
786 const Tensor& out_backprop,
787 const std::array<int64, 3>& window,
788 const std::array<int64, 3>& stride,
789 const std::array<int64, 3>& out,
790 const std::array<int64, 3>& padding,
791 TensorFormat data_format, Tensor* output) {
792 DnnPooling3dGradOp<T>::Compute(
793 context, se::dnn::PoolingMode::kAverage, window, stride, padding, out,
794 data_format, out_backprop, tensor_in_shape, nullptr, nullptr, output);
795 }
796 };
797
798 template <typename T>
799 struct LaunchMaxPooling3dGradGradOp<GPUDevice, T> {
launchtensorflow::LaunchMaxPooling3dGradGradOp800 static void launch(OpKernelContext* context, const Pool3dParameters& params,
801 const Tensor& tensor_in, const Tensor& tensor_out,
802 const Tensor& tensor_top_diff,
803 Tensor* tensor_bottom_diff) {
804 bool status = functor::MaxPool3dGradBackward<T>()(
805 params.data_format, tensor_in.flat<T>().data(),
806 tensor_out.flat<T>().data(), params.tensor_in_batch, params.out_plane,
807 params.out_height, params.out_width, params.depth,
808 params.tensor_in_planes, params.tensor_in_rows, params.tensor_in_cols,
809 params.window_planes, params.window_rows, params.window_cols,
810 params.plane_stride, params.row_stride, params.col_stride,
811 params.pad_planes, params.pad_rows, params.pad_cols,
812 tensor_top_diff.flat<T>().data(), tensor_bottom_diff->flat<T>().data(),
813 context->eigen_gpu_device());
814 if (!status) {
815 context->SetStatus(
816 errors::Internal("Failed launching MaxPool3dGradBackward"));
817 }
818 }
819 };
820
821 #define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T)
822 TF_CALL_float(REGISTER_GPU_KERNELS) TF_CALL_half(REGISTER_GPU_KERNELS)
823 #undef REGISTER_GPU_KERNELS
824
825 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
826
827
828 #undef REGISTER_KERNELS
829
830 } // namespace tensorflow
831