1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #define EIGEN_USE_THREADS 16 17 #include <algorithm> 18 #include <cmath> 19 #include <random> 20 #include <vector> 21 22 #include "tensorflow/core/kernels/fractional_pool_common.h" 23 24 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 25 #include "tensorflow/core/framework/numeric_op.h" 26 #include "tensorflow/core/framework/op_kernel.h" 27 #include "tensorflow/core/lib/random/random.h" 28 #include "tensorflow/core/platform/logging.h" 29 #include "tensorflow/core/platform/mutex.h" 30 #include "tensorflow/core/util/guarded_philox_random.h" 31 32 namespace tensorflow { 33 typedef Eigen::ThreadPoolDevice CPUDevice; 34 35 template <typename T> 36 class FractionalAvgPoolOp : public OpKernel { 37 public: FractionalAvgPoolOp(OpKernelConstruction * context)38 explicit FractionalAvgPoolOp(OpKernelConstruction* context) 39 : OpKernel(context) { 40 OP_REQUIRES_OK(context, context->GetAttr("pooling_ratio", &pooling_ratio_)); 41 OP_REQUIRES_OK(context, context->GetAttr("pseudo_random", &pseudo_random_)); 42 OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_)); 43 OP_REQUIRES(context, pooling_ratio_.size() == 4, 44 errors::InvalidArgument( 45 "pooling_ratio field must specify 4 dimensions")); 46 OP_REQUIRES( 47 context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1, 48 errors::Unimplemented("Fractional average pooling is not yet " 49 "supported on the batch nor channel dimension.")); 50 OP_REQUIRES_OK(context, context->GetAttr("deterministic", &deterministic_)); 51 OP_REQUIRES_OK(context, context->GetAttr("seed", &seed_)); 52 OP_REQUIRES_OK(context, context->GetAttr("seed2", &seed2_)); 53 if (deterministic_) { 54 // If both seeds are not set when deterministic_ is true, force set seeds. 55 if ((seed_ == 0) && (seed2_ == 0)) { 56 seed_ = random::New64(); 57 seed2_ = random::New64(); 58 } 59 } else { 60 OP_REQUIRES( 61 context, (seed_ == 0) && (seed2_ == 0), 62 errors::InvalidArgument( 63 "Both seed and seed2 should be 0 if deterministic is false.")); 64 } 65 } 66 Compute(OpKernelContext * context)67 void Compute(OpKernelContext* context) override { 68 typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> 69 ConstEigenMatrixMap; 70 typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> 71 EigenMatrixMap; 72 73 constexpr int tensor_in_and_out_dims = 4; 74 75 const Tensor& tensor_in = context->input(0); 76 OP_REQUIRES(context, tensor_in.dims() == tensor_in_and_out_dims, 77 errors::InvalidArgument("tensor_in must be 4-dimensional")); 78 79 std::vector<int> input_size(tensor_in_and_out_dims); 80 std::vector<int> output_size(tensor_in_and_out_dims); 81 for (int i = 0; i < tensor_in_and_out_dims; ++i) { 82 input_size[i] = tensor_in.dim_size(i); 83 OP_REQUIRES( 84 context, pooling_ratio_[i] <= input_size[i], 85 errors::InvalidArgument( 86 "Pooling ratio cannot be bigger than input tensor dim size.")); 87 } 88 // Output size. 89 for (int i = 0; i < tensor_in_and_out_dims; ++i) { 90 output_size[i] = 91 static_cast<int>(std::floor(input_size[i] / pooling_ratio_[i])); 92 DCHECK_GT(output_size[i], 0); 93 } 94 95 // Generate pooling sequence. 96 std::vector<int64> row_cum_seq; 97 std::vector<int64> col_cum_seq; 98 GuardedPhiloxRandom generator; 99 generator.Init(seed_, seed2_); 100 row_cum_seq = GeneratePoolingSequence(input_size[1], output_size[1], 101 &generator, pseudo_random_); 102 col_cum_seq = GeneratePoolingSequence(input_size[2], output_size[2], 103 &generator, pseudo_random_); 104 105 // Prepare output. 106 Tensor* output_tensor = nullptr; 107 OP_REQUIRES_OK(context, context->allocate_output( 108 0, 109 TensorShape({output_size[0], output_size[1], 110 output_size[2], output_size[3]}), 111 &output_tensor)); 112 Tensor* output_row_seq_tensor = nullptr; 113 OP_REQUIRES_OK(context, 114 context->allocate_output( 115 1, TensorShape({static_cast<int64>(row_cum_seq.size())}), 116 &output_row_seq_tensor)); 117 Tensor* output_col_seq_tensor = nullptr; 118 OP_REQUIRES_OK(context, 119 context->allocate_output( 120 2, TensorShape({static_cast<int64>(col_cum_seq.size())}), 121 &output_col_seq_tensor)); 122 123 ConstEigenMatrixMap in_mat(tensor_in.flat<T>().data(), input_size[3], 124 input_size[2] * input_size[1] * input_size[0]); 125 126 EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size[3], 127 output_size[2] * output_size[1] * output_size[0]); 128 // out_count corresponds to number of elements in each pooling cell. 129 Eigen::Matrix<T, Eigen::Dynamic, 1> out_count(out_mat.cols()); 130 131 // Initializes the output tensor and out_count with 0. 132 out_mat.setZero(); 133 out_count.setZero(); 134 135 auto output_row_seq_flat = output_row_seq_tensor->flat<int64>(); 136 auto output_col_seq_flat = output_col_seq_tensor->flat<int64>(); 137 138 // Set output tensors. 139 for (int i = 0; i < row_cum_seq.size(); ++i) { 140 output_row_seq_flat(i) = row_cum_seq[i]; 141 } 142 143 for (int i = 0; i < col_cum_seq.size(); ++i) { 144 output_col_seq_flat(i) = col_cum_seq[i]; 145 } 146 147 // For both input and output, 148 // 0: batch 149 // 1: row / row 150 // 2: col / col 151 // 3: depth / channel 152 const int64_t row_max = input_size[1] - 1; 153 const int64_t col_max = input_size[2] - 1; 154 for (int64_t b = 0; b < input_size[0]; ++b) { 155 // row sequence. 156 for (int64_t hs = 0; hs < row_cum_seq.size() - 1; ++hs) { 157 // row start and end. 158 const int64_t row_start = row_cum_seq[hs]; 159 int64_t row_end = 160 overlapping_ ? row_cum_seq[hs + 1] : row_cum_seq[hs + 1] - 1; 161 row_end = std::min(row_end, row_max); 162 163 // col sequence. 164 for (int64_t ws = 0; ws < col_cum_seq.size() - 1; ++ws) { 165 const int64_t out_offset = 166 (b * output_size[1] + hs) * output_size[2] + ws; 167 // col start and end. 168 const int64_t col_start = col_cum_seq[ws]; 169 int64_t col_end = 170 overlapping_ ? col_cum_seq[ws + 1] : col_cum_seq[ws + 1] - 1; 171 col_end = std::min(col_end, col_max); 172 for (int64_t h = row_start; h <= row_end; ++h) { 173 for (int64_t w = col_start; w <= col_end; ++w) { 174 const int64_t in_offset = 175 (b * input_size[1] + h) * input_size[2] + w; 176 out_mat.col(out_offset) += in_mat.col(in_offset); 177 out_count(out_offset)++; 178 } 179 } 180 } 181 } 182 } 183 DCHECK_GT(out_count.minCoeff(), 0); 184 out_mat.array().rowwise() /= out_count.transpose().array(); 185 } 186 187 private: 188 bool deterministic_; 189 int64 seed_; 190 int64 seed2_; 191 std::vector<float> pooling_ratio_; 192 bool pseudo_random_; 193 bool overlapping_; 194 }; 195 196 #define REGISTER_FRACTIONALAVGPOOL(type) \ 197 REGISTER_KERNEL_BUILDER( \ 198 Name("FractionalAvgPool").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 199 FractionalAvgPoolOp<type>) 200 201 REGISTER_FRACTIONALAVGPOOL(int32); 202 REGISTER_FRACTIONALAVGPOOL(int64); 203 REGISTER_FRACTIONALAVGPOOL(float); 204 REGISTER_FRACTIONALAVGPOOL(double); 205 206 #undef REGISTER_FRACTIONALAVGPOOL 207 208 template <class T> 209 class FractionalAvgPoolGradOp : public OpKernel { 210 public: FractionalAvgPoolGradOp(OpKernelConstruction * context)211 explicit FractionalAvgPoolGradOp(OpKernelConstruction* context) 212 : OpKernel(context) { 213 OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_)); 214 } 215 Compute(OpKernelContext * context)216 void Compute(OpKernelContext* context) override { 217 // Here's the basic idea: 218 // Batch and depth dimension are independent from row and col dimension. And 219 // because FractionalAvgPool currently only support pooling along row and 220 // col, we can basically think of this 4D tensor backpropagation as 221 // operation of a series of 2D planes. 222 // 223 // For each element of a 'slice' (2D plane) of output_backprop, we need to 224 // figure out its contributors when doing FractionalAvgPool operation. This 225 // can be done based on row_pooling_sequence, col_pooling_seq and 226 // overlapping. 227 // Once we figure out the original contributors, we just need to evenly 228 // divide the value of this element among these contributors. 229 // 230 // Internally, we divide the out_backprop tensor and store it in a temporary 231 // tensor of double type. And cast it to the corresponding type. 232 typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> 233 ConstEigenMatrixMap; 234 typedef Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>> 235 EigenDoubleMatrixMap; 236 237 // Grab the inputs. 238 const Tensor& orig_input_tensor_shape = context->input(0); 239 OP_REQUIRES(context, 240 orig_input_tensor_shape.dims() == 1 && 241 orig_input_tensor_shape.NumElements() == 4, 242 errors::InvalidArgument("original input tensor shape must be" 243 "1-dimensional and 4 elements")); 244 const Tensor& out_backprop = context->input(1); 245 const Tensor& row_seq_tensor = context->input(2); 246 const Tensor& col_seq_tensor = context->input(3); 247 248 const int64_t out_batch = out_backprop.dim_size(0); 249 const int64_t out_rows = out_backprop.dim_size(1); 250 const int64_t out_cols = out_backprop.dim_size(2); 251 const int64_t out_depth = out_backprop.dim_size(3); 252 253 OP_REQUIRES(context, row_seq_tensor.NumElements() > out_rows, 254 errors::InvalidArgument("Given out_backprop shape ", 255 out_backprop.shape().DebugString(), 256 ", row_seq_tensor must have at least ", 257 out_rows + 1, " elements, but got ", 258 row_seq_tensor.NumElements())); 259 OP_REQUIRES(context, col_seq_tensor.NumElements() > out_cols, 260 errors::InvalidArgument("Given out_backprop shape ", 261 out_backprop.shape().DebugString(), 262 ", col_seq_tensor must have at least ", 263 out_cols + 1, " elements, but got ", 264 col_seq_tensor.NumElements())); 265 266 auto row_seq_tensor_flat = row_seq_tensor.flat<int64>(); 267 auto col_seq_tensor_flat = col_seq_tensor.flat<int64>(); 268 auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat<int64>(); 269 270 const int64_t in_batch = orig_input_tensor_shape_flat(0); 271 const int64_t in_rows = orig_input_tensor_shape_flat(1); 272 const int64_t in_cols = orig_input_tensor_shape_flat(2); 273 const int64_t in_depth = orig_input_tensor_shape_flat(3); 274 OP_REQUIRES( 275 context, in_batch != 0, 276 errors::InvalidArgument("Batch dimension of input must not be 0")); 277 OP_REQUIRES( 278 context, in_rows != 0, 279 errors::InvalidArgument("Rows dimension of input must not be 0")); 280 OP_REQUIRES( 281 context, in_cols != 0, 282 errors::InvalidArgument("Columns dimension of input must not be 0")); 283 OP_REQUIRES( 284 context, in_depth != 0, 285 errors::InvalidArgument("Depth dimension of input must not be 0")); 286 287 constexpr int tensor_in_and_out_dims = 4; 288 // Transform orig_input_tensor_shape into TensorShape 289 TensorShape in_shape; 290 for (auto i = 0; i < tensor_in_and_out_dims; ++i) { 291 in_shape.AddDim(orig_input_tensor_shape_flat(i)); 292 } 293 294 // Create intermediate in_backprop. 295 Tensor in_backprop_tensor_temp; 296 OP_REQUIRES_OK(context, context->forward_input_or_allocate_temp( 297 {0}, DataTypeToEnum<double>::v(), in_shape, 298 &in_backprop_tensor_temp)); 299 in_backprop_tensor_temp.flat<double>().setZero(); 300 // Transform 4D tensor to 2D matrix. 301 EigenDoubleMatrixMap in_backprop_tensor_temp_mat( 302 in_backprop_tensor_temp.flat<double>().data(), in_depth, 303 in_cols * in_rows * in_batch); 304 ConstEigenMatrixMap out_backprop_mat(out_backprop.flat<T>().data(), 305 out_depth, 306 out_cols * out_rows * out_batch); 307 // Loop through each element of out_backprop and evenly distribute the 308 // element to the corresponding pooling cell. 309 const int64_t in_max_row_index = in_rows - 1; 310 const int64_t in_max_col_index = in_cols - 1; 311 for (int64_t b = 0; b < out_batch; ++b) { 312 for (int64_t r = 0; r < out_rows; ++r) { 313 const int64_t in_row_start = row_seq_tensor_flat(r); 314 int64_t in_row_end = overlapping_ ? row_seq_tensor_flat(r + 1) 315 : row_seq_tensor_flat(r + 1) - 1; 316 in_row_end = std::min(in_row_end, in_max_row_index); 317 for (int64_t c = 0; c < out_cols; ++c) { 318 const int64_t in_col_start = col_seq_tensor_flat(c); 319 int64_t in_col_end = overlapping_ ? col_seq_tensor_flat(c + 1) 320 : col_seq_tensor_flat(c + 1) - 1; 321 in_col_end = std::min(in_col_end, in_max_col_index); 322 323 const int64_t num_elements_in_pooling_cell = 324 (in_row_end - in_row_start + 1) * (in_col_end - in_col_start + 1); 325 const int64_t out_index = (b * out_rows + r) * out_cols + c; 326 // Now we can evenly distribute out_backprop(b, h, w, *) to 327 // in_backprop(b, hs:he, ws:we, *). 328 for (int64_t in_r = in_row_start; in_r <= in_row_end; ++in_r) { 329 for (int64_t in_c = in_col_start; in_c <= in_col_end; ++in_c) { 330 const int64_t in_index = (b * in_rows + in_r) * in_cols + in_c; 331 // Walk through each channel (depth). 332 for (int64_t d = 0; d < out_depth; ++d) { 333 const double out_backprop_element = static_cast<double>( 334 out_backprop_mat.coeffRef(d, out_index)); 335 double& in_backprop_ref = 336 in_backprop_tensor_temp_mat.coeffRef(d, in_index); 337 in_backprop_ref += 338 out_backprop_element / num_elements_in_pooling_cell; 339 } 340 } 341 } 342 } 343 } 344 } 345 346 // Depending on the type, cast double to type T. 347 Tensor* in_backprop_tensor = nullptr; 348 OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( 349 {0}, 0, in_shape, &in_backprop_tensor)); 350 auto in_backprop_tensor_flat = in_backprop_tensor->flat<T>(); 351 auto in_backprop_tensor_temp_flat = in_backprop_tensor_temp.flat<double>(); 352 for (int64_t i = 0; i < in_backprop_tensor_flat.size(); ++i) { 353 in_backprop_tensor_flat(i) = 354 static_cast<T>(in_backprop_tensor_temp_flat(i)); 355 } 356 } 357 358 private: 359 bool overlapping_; 360 }; 361 362 #define REGISTER_FRACTIONALAVGPOOLGRAD(type) \ 363 REGISTER_KERNEL_BUILDER(Name("FractionalAvgPoolGrad") \ 364 .Device(DEVICE_CPU) \ 365 .TypeConstraint<type>("T"), \ 366 FractionalAvgPoolGradOp<type>) 367 368 REGISTER_FRACTIONALAVGPOOLGRAD(int32); 369 REGISTER_FRACTIONALAVGPOOLGRAD(int64); 370 REGISTER_FRACTIONALAVGPOOLGRAD(float); 371 REGISTER_FRACTIONALAVGPOOLGRAD(double); 372 373 #undef REGISTER_FRACTIONALAVGPOOLGRAD 374 } // namespace tensorflow 375