1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #define USE_EIGEN_TENSOR
17 #define EIGEN_USE_THREADS
18
19 #include <array>
20
21 #include "tensorflow/core/framework/register_types.h"
22 #include "tensorflow/core/kernels/conv_2d.h"
23 #include "tensorflow/core/kernels/conv_3d.h"
24 #include "tensorflow/core/kernels/conv_ops_gpu.h"
25 #include "tensorflow/core/kernels/cudnn_pooling_gpu.h"
26
27 typedef Eigen::GpuDevice GPUDevice;
28
29 namespace tensorflow {
30
31 #if GOOGLE_CUDA
32
33 template <typename T>
Compute(OpKernelContext * context,se::dnn::PoolingMode pooling_mode,const std::array<int64,3> & window,const std::array<int64,3> & stride,const std::array<int64,3> & padding,TensorFormat data_format,const Tensor & tensor_in,Tensor * output)34 void DnnPooling3dOp<T>::Compute(OpKernelContext* context,
35 se::dnn::PoolingMode pooling_mode,
36 const std::array<int64, 3>& window,
37 const std::array<int64, 3>& stride,
38 const std::array<int64, 3>& padding,
39 TensorFormat data_format,
40 const Tensor& tensor_in, Tensor* output) {
41 const auto in_shape = tensor_in.shape();
42 const auto out_shape = output->shape();
43
44 const int64 in_batch = GetTensorDim(tensor_in, data_format, 'N');
45 const int64 in_features = GetTensorDim(tensor_in, data_format, 'C');
46
47 Tensor transformed_input;
48 if (data_format == FORMAT_NHWC) {
49 OP_REQUIRES_OK(context, context->allocate_temp(
50 DataTypeToEnum<T>::value,
51 ShapeFromFormat(FORMAT_NCHW, tensor_in.shape(),
52 data_format),
53 &transformed_input));
54 functor::NHWCToNCHW<GPUDevice, T, 5>()(context->eigen_device<GPUDevice>(),
55 tensor_in.tensor<T, 5>(),
56 transformed_input.tensor<T, 5>());
57 } else {
58 transformed_input = tensor_in;
59 }
60 Tensor transformed_output;
61 if (data_format == FORMAT_NHWC) {
62 OP_REQUIRES_OK(context,
63 context->allocate_temp(
64 DataTypeToEnum<T>::value,
65 ShapeFromFormat(FORMAT_NCHW, out_shape, data_format),
66 &transformed_output));
67 } else {
68 transformed_output = *output;
69 }
70
71 se::dnn::PoolingDescriptor pooling_desc(3);
72 pooling_desc.set_pooling_mode(pooling_mode);
73 se::dnn::BatchDescriptor input_desc(3);
74 input_desc.set_count(in_batch)
75 .set_feature_map_count(in_features)
76 .set_layout(se::dnn::DataLayout::kBatchDepthYX);
77 se::dnn::BatchDescriptor output_desc(3);
78 output_desc.set_count(in_batch)
79 .set_feature_map_count(in_features)
80 .set_layout(se::dnn::DataLayout::kBatchDepthYX);
81 for (size_t i = 0; i < window.size(); ++i) {
82 const auto dim_i = static_cast<se::dnn::DimIndex>(i);
83 pooling_desc.set_window(dim_i, window[i]);
84 pooling_desc.set_stride(dim_i, stride[i]);
85 pooling_desc.set_padding(dim_i, padding[i]);
86 input_desc.set_spatial_dim(dim_i,
87 GetTensorDim(tensor_in, data_format, '2' - i));
88 output_desc.set_spatial_dim(dim_i,
89 GetTensorDim(out_shape, data_format, '2' - i));
90 }
91
92 auto input_data = AsDeviceMemory(transformed_input.template flat<T>().data(),
93 transformed_input.template flat<T>().size());
94 auto output_data =
95 AsDeviceMemory(transformed_output.template flat<T>().data(),
96 transformed_output.template flat<T>().size());
97
98 auto* stream = context->op_device_context()->stream();
99 OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
100
101 bool status = stream
102 ->ThenPoolForward(pooling_desc, input_desc, input_data,
103 output_desc, &output_data)
104 .ok();
105 OP_REQUIRES(context, status,
106 errors::Internal("cudnn PoolForward launch failed"));
107
108 if (data_format == FORMAT_NHWC) {
109 auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; };
110 functor::NCHWToNHWC<GPUDevice, T, 5>()(
111 context->eigen_device<GPUDevice>(),
112 toConstTensor(transformed_output).template tensor<T, 5>(),
113 output->tensor<T, 5>());
114 }
115 }
116
117 template <typename T>
Compute(OpKernelContext * context,se::dnn::PoolingMode pooling_mode,const std::array<int64,3> & window,const std::array<int64,3> & stride,const std::array<int64,3> & padding,const std::array<int64,3> & output_size,TensorFormat data_format,const Tensor & out_backprop,const TensorShape & tensor_in_shape,const Tensor * tensor_in,const Tensor * tensor_out,Tensor * input_backprop)118 void DnnPooling3dGradOp<T>::Compute(
119 OpKernelContext* context, se::dnn::PoolingMode pooling_mode,
120 const std::array<int64, 3>& window, const std::array<int64, 3>& stride,
121 const std::array<int64, 3>& padding,
122 const std::array<int64, 3>& output_size, TensorFormat data_format,
123 const Tensor& out_backprop, const TensorShape& tensor_in_shape,
124 const Tensor* tensor_in, const Tensor* tensor_out, Tensor* input_backprop) {
125 CHECK((pooling_mode != se::dnn::PoolingMode::kMaximum) ||
126 (tensor_in && tensor_out))
127 << "For MaxPoolGrad, both tensor_in and tensor_out needs to be "
128 "specified";
129
130 const int64 in_batch = GetTensorDim(tensor_in_shape, data_format, 'N');
131 const int64 in_features = GetTensorDim(tensor_in_shape, data_format, 'C');
132
133 Tensor transformed_input;
134 TensorShape transformed_input_shape;
135 if (data_format == FORMAT_NHWC || tensor_in == nullptr) {
136 transformed_input_shape =
137 ShapeFromFormat(FORMAT_NCHW, tensor_in_shape, data_format);
138 OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::value,
139 transformed_input_shape,
140 &transformed_input));
141 } else {
142 transformed_input = *tensor_in;
143 }
144 Tensor transformed_output;
145 TensorShape transformed_output_shape;
146 if (data_format == FORMAT_NHWC || tensor_out == nullptr) {
147 transformed_output_shape =
148 ShapeFromFormat(FORMAT_NCHW, out_backprop.shape(), data_format);
149 OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::value,
150 transformed_output_shape,
151 &transformed_output));
152 } else {
153 transformed_output = *tensor_out;
154 }
155 Tensor transformed_input_backprop;
156 if (data_format == FORMAT_NHWC) {
157 OP_REQUIRES_OK(context,
158 context->allocate_temp(DataTypeToEnum<T>::value,
159 transformed_input_shape,
160 &transformed_input_backprop));
161 } else {
162 transformed_input_backprop = *input_backprop;
163 }
164 Tensor transformed_output_backprop;
165 if (data_format == FORMAT_NHWC) {
166 OP_REQUIRES_OK(context,
167 context->allocate_temp(DataTypeToEnum<T>::value,
168 transformed_output_shape,
169 &transformed_output_backprop));
170 } else {
171 transformed_output_backprop = out_backprop;
172 }
173 if (data_format == FORMAT_NHWC) {
174 if (tensor_in != nullptr) {
175 functor::NHWCToNCHW<GPUDevice, T, 5>()(context->eigen_device<GPUDevice>(),
176 tensor_in->tensor<T, 5>(),
177 transformed_input.tensor<T, 5>());
178 }
179 if (tensor_out != nullptr) {
180 functor::NHWCToNCHW<GPUDevice, T, 5>()(context->eigen_device<GPUDevice>(),
181 tensor_out->tensor<T, 5>(),
182 transformed_output.tensor<T, 5>());
183 }
184 functor::NHWCToNCHW<GPUDevice, T, 5>()(
185 context->eigen_device<GPUDevice>(), out_backprop.tensor<T, 5>(),
186 transformed_output_backprop.tensor<T, 5>());
187 }
188
189 se::dnn::PoolingDescriptor pooling_desc(3);
190 pooling_desc.set_pooling_mode(pooling_mode);
191
192 se::dnn::BatchDescriptor orig_output_desc(3);
193 orig_output_desc.set_count(in_batch)
194 .set_feature_map_count(in_features)
195 .set_layout(se::dnn::DataLayout::kBatchDepthYX);
196
197 se::dnn::BatchDescriptor orig_input_desc(3);
198 orig_input_desc.set_count(in_batch)
199 .set_feature_map_count(in_features)
200 .set_layout(se::dnn::DataLayout::kBatchDepthYX);
201
202 for (size_t i = 0; i < window.size(); ++i) {
203 const auto dim_i = static_cast<se::dnn::DimIndex>(i);
204 pooling_desc.set_window(dim_i, window[i]);
205 pooling_desc.set_stride(dim_i, stride[i]);
206 pooling_desc.set_padding(dim_i, padding[i]);
207 orig_input_desc.set_spatial_dim(
208 dim_i, GetTensorDim(tensor_in_shape, data_format, '2' - i));
209 orig_output_desc.set_spatial_dim(dim_i, output_size[i]);
210 }
211
212 auto orig_output_data =
213 AsDeviceMemory(transformed_output.template flat<T>().data(),
214 transformed_output.template flat<T>().size());
215 auto orig_input_data =
216 AsDeviceMemory(transformed_input.template flat<T>().data(),
217 transformed_input.template flat<T>().size());
218 auto output_backprop_data =
219 AsDeviceMemory(transformed_output_backprop.template flat<T>().data(),
220 transformed_output_backprop.template flat<T>().size());
221 auto input_backprop_data =
222 AsDeviceMemory(transformed_input_backprop.template flat<T>().data(),
223 transformed_input_backprop.template flat<T>().size());
224
225 auto* stream = context->op_device_context()->stream();
226 OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
227
228 bool status =
229 stream
230 ->ThenPoolBackward(pooling_desc, orig_input_desc, orig_input_data,
231 orig_output_desc, orig_output_data,
232 output_backprop_data, &input_backprop_data)
233 .ok();
234 OP_REQUIRES(context, status,
235 errors::Internal("cudnn PoolBackward launch failed"));
236
237 if (data_format == FORMAT_NHWC) {
238 auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; };
239 functor::NCHWToNHWC<GPUDevice, T, 5>()(
240 context->eigen_device<GPUDevice>(),
241 toConstTensor(transformed_input_backprop).template tensor<T, 5>(),
242 input_backprop->tensor<T, 5>());
243 }
244 }
245
246 #define DEFINE_DNN_OPS(T) \
247 template class DnnPooling3dOp<T>; \
248 template class DnnPooling3dGradOp<T>;
249 TF_CALL_float(DEFINE_DNN_OPS) TF_CALL_half(DEFINE_DNN_OPS)
250 #undef DEFINE_DNN_OPS
251
252 #endif // GOOGLE_CUDA
253
254 } // namespace tensorflow
255