1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // This is an internal header file intended to only be included as the 17 // front-matter in the implementation files of various reduction ops. It 18 // is a header file because we split the various reduction ops into their 19 // own compilation units to get more parallelism in compilation. 20 21 #ifndef TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_ 22 #define TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_ 23 24 #define EIGEN_USE_THREADS 25 26 #include "third_party/eigen3/Eigen/Core" 27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 28 29 #include "tensorflow/core/framework/numeric_op.h" 30 #include "tensorflow/core/framework/op_kernel.h" 31 #include "tensorflow/core/framework/register_types.h" 32 #include "tensorflow/core/framework/tensor.h" 33 #include "tensorflow/core/framework/types.h" 34 #include "tensorflow/core/kernels/reduction_ops.h" 35 #include "tensorflow/core/kernels/transpose_functor.h" 36 #include "tensorflow/core/lib/core/status.h" 37 #include "tensorflow/core/lib/gtl/inlined_vector.h" 38 #include "tensorflow/core/platform/logging.h" 39 40 namespace tensorflow { 41 42 typedef Eigen::ThreadPoolDevice CPUDevice; 43 typedef Eigen::GpuDevice GPUDevice; 44 #ifdef TENSORFLOW_USE_SYCL 45 typedef Eigen::SyclDevice SYCLDevice; 46 #endif // TENSORFLOW_USE_SYCL 47 48 template <typename Device> 49 struct Constants { 50 // Derive Index type. int (32-bit) or long (64-bit) depending on the 51 // compile-time configuration. "float" here is not relevant. 52 // TODO(zhifengc): Moves the definition to TTypes. 53 typedef TTypes<float>::Tensor::Index Index; 54 Eigen::array<Index, 1> kZero; 55 Eigen::array<Index, 1> kOne; 56 Eigen::array<Index, 2> kZeroTwo; 57 ConstantsConstants58 Constants() { 59 kZero[0] = 0; 60 kOne[0] = 1; 61 kZeroTwo[0] = 0; 62 kZeroTwo[1] = 2; 63 } 64 }; 65 66 #if defined(EIGEN_HAS_INDEX_LIST) 67 struct ConstantsBase { 68 const Eigen::IndexList<Eigen::type2index<0>> kZero; 69 const Eigen::IndexList<Eigen::type2index<1>> kOne; 70 const Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<2>> kZeroTwo; 71 }; 72 template <> 73 struct Constants<CPUDevice> : ConstantsBase {}; 74 #ifdef TENSORFLOW_USE_SYCL 75 template <> 76 struct Constants<SYCLDevice> : ConstantsBase {}; 77 #endif // TENSORFLOW_USE_SYCL 78 #endif // EIGEN_HAS_INDEX_LIST 79 80 class ReductionHelper { 81 public: 82 ReductionHelper() : reduce_first_axis_(false) {} 83 84 Status Simplify(const Tensor& data, const Tensor& axis, const bool keep_dims); 85 86 // We need to do roughly: 87 // tmp_out = allocate(out_reshape()) 88 // tmp_out.reshape(out_reshape) = data.reshape(data_reshape).reduce(axes) 89 // out = tmp_out.reshape(out_shape) 90 91 // The reduction result must be allocated with this shape. 92 TensorShape out_reshape() const; 93 94 // The final output shape must be allocated with this shape. 95 TensorShape out_shape() const; 96 97 // The reduction is on a reshaped tensor of this rank. 98 int ndims() const { return data_reshape_.size(); } 99 100 // True if need to reduce the 0-th dimension. 101 bool reduce_first_axis() const { return reduce_first_axis_; } 102 103 // The output is reshaped. 104 template <typename T, int N> 105 typename TTypes<T, N>::Tensor out(Tensor* out) { 106 return out->shaped<T, N>(out_reshape_); 107 } 108 109 // The input is reshaped. 110 template <typename T, int N> 111 typename TTypes<T, N>::ConstTensor in(const Tensor& data) { 112 return data.shaped<T, N>(data_reshape_); 113 } 114 115 // Shape of shuffled input 116 TensorShape data_reshape() const { 117 TensorShape shape; 118 for (auto s : data_reshape_) shape.AddDim(s); 119 return shape; 120 } 121 122 // Shape with all reduction dimensions at the end 123 TensorShape shuffled_shape(); 124 125 // Permutation of reduced dims needed to put reduction dimensions at the end 126 gtl::InlinedVector<int32, 8> permutation(); 127 128 private: 129 bool reduce_first_axis_; // True if need to reduce the 0-th dimension. 130 gtl::InlinedVector<int64, 4> data_reshape_; // Reshape data before reduction. 131 gtl::InlinedVector<int64, 4> out_shape_; // The final output shape. 132 gtl::InlinedVector<int64, 4> out_reshape_; // Reshape output for reduction. 133 }; 134 135 // For operations where the output is a reduction function along some 136 // dimensions of the input. 137 template <typename Device, class T, typename Tperm, typename Reducer> 138 class ReductionOp : public OpKernel { 139 public: 140 explicit ReductionOp(OpKernelConstruction* ctx) : OpKernel(ctx) { 141 const DataType dt = DataTypeToEnum<T>::v(); 142 const DataType pt = DataTypeToEnum<Tperm>::v(); 143 OP_REQUIRES_OK(ctx, ctx->MatchSignature({dt, pt}, {dt})); 144 145 OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_)); 146 } 147 148 void Compute(OpKernelContext* ctx) override { 149 const Tensor& data = ctx->input(0); 150 const Tensor& axes = ctx->input(1); 151 VLOG(1) << "data shape: " << data.shape().DebugString(); 152 VLOG(1) << "axes : " << axes.SummarizeValue(10); 153 154 ReductionHelper helper; 155 OP_REQUIRES_OK(ctx, helper.Simplify(data, axes, keep_dims_)); 156 CHECK_GE(helper.ndims(), 0); 157 158 if (helper.ndims() == 0 || 159 (helper.ndims() == 1 && !helper.reduce_first_axis())) { 160 // Special case. Reduces nothing. It is unclear why this is 161 // necessary, but tests fail without it. Look into why this 162 // case occurs. 163 Tensor out; 164 if (!out.CopyFrom(data, helper.out_shape())) { 165 ctx->SetStatus(errors::Internal("Error during reduction copy.")); 166 } 167 ctx->set_output(0, out); 168 return; 169 } 170 171 // We must allocate temp tensors using the same alloc attr as 172 // output(0) because it is returned as output(0) in the end. 173 const AllocatorAttributes alloc_attr = ctx->output_alloc_attr(0); 174 175 // A temporary tensor whose size matches the size of the reduced 176 // output. 177 Tensor tmp_out; 178 OP_REQUIRES_OK( 179 ctx, ctx->allocate_temp(ctx->expected_output_dtype(0), 180 helper.out_reshape(), &tmp_out, alloc_attr)); 181 182 typedef functor::ReduceFunctor<Device, Reducer> Functor; 183 Constants<Device> constants; 184 const Device& d = ctx->eigen_device<Device>(); 185 Reducer reducer; 186 187 if (tmp_out.NumElements() == 0) { 188 // Nothing to do, fall through to final reshaping. 189 } else if (data.NumElements() == 0) { 190 // Degenerate reduction where the input is empty but the output is 191 // nonempty (thus tmp_out.NumElements() > 0), and we must fill the output 192 // with identity elements. Example: tf.reduce_sum(tf.zeros((0, 3)), [0]). 193 // Eigen sometimes crashes in this case, so we do it manually. 194 Functor::FillIdentity(d, tmp_out.flat<T>(), reducer); 195 } else if ((helper.ndims() == 1) && helper.reduce_first_axis()) { 196 // Reduce to a scalar. 197 Functor::Reduce(ctx, helper.out<T, 0>(&tmp_out), helper.in<T, 1>(data), 198 constants.kZero, reducer); 199 } else if ((helper.ndims() == 2) && helper.reduce_first_axis()) { 200 // Can be viewed as a reduction of a matrix along 1st dimension. 201 Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 2>(data), 202 constants.kZero, reducer); 203 } else if ((helper.ndims() == 2) && !helper.reduce_first_axis()) { 204 // Can be viewed as a reduction of a matrix along 2nd dimension. 205 Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 2>(data), 206 constants.kOne, reducer); 207 } else if ((helper.ndims() == 3) && helper.reduce_first_axis()) { 208 // Can be viewed as a reduction of a 3D tensor along 1st and 3rd 209 // dimensions. 210 Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 3>(data), 211 constants.kZeroTwo, reducer); 212 } else if ((helper.ndims() == 3) && !helper.reduce_first_axis()) { 213 // Can be viewed as a reduction of a 3D tensor along 2nd dimension. 214 Functor::Reduce(ctx, helper.out<T, 2>(&tmp_out), helper.in<T, 3>(data), 215 constants.kOne, reducer); 216 } else { 217 // If we don't hit one of the cases above, transpose the data so that 218 // all reduced dimensions are last and reuse the 2-D -> 1-D case. 219 Tensor data_reshaped; 220 CHECK(data_reshaped.CopyFrom(data, helper.data_reshape())); 221 Tensor shuffled; 222 OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value, 223 helper.shuffled_shape(), &shuffled, 224 alloc_attr)); 225 OP_REQUIRES_OK( 226 ctx, DoTranspose(d, data_reshaped, helper.permutation(), &shuffled)); 227 const int64 unreduced = tmp_out.NumElements(); 228 const int64 reduced = shuffled.NumElements() / unreduced; 229 const Tensor& const_shuffled = shuffled; 230 Functor::Reduce(ctx, tmp_out.flat<T>(), 231 const_shuffled.shaped<T, 2>({unreduced, reduced}), 232 constants.kOne, reducer); 233 } 234 235 // Set the real output using the contents of the reduction but the 236 // real expected output shape. The number of elements should 237 // match between the two shapes. 238 Tensor out; 239 if (!out.CopyFrom(tmp_out, helper.out_shape())) { 240 ctx->SetStatus(errors::Internal("Error during reduction copy.")); 241 } 242 ctx->set_output(0, out); 243 } 244 245 private: 246 // True if the number of dimensions should be maintained. 247 bool keep_dims_; 248 }; 249 250 namespace functor { 251 252 template <typename Device, typename Reducer> 253 struct ReduceFunctorBase { 254 template <typename OUT_T, typename IN_T, typename ReductionAxes> 255 static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, 256 const ReductionAxes& reduction_axes, 257 const Reducer& reducer) { 258 const Device& d = ctx->eigen_device<Device>(); 259 ReduceEigenImpl<Device, OUT_T, IN_T, ReductionAxes, Reducer> reducer_impl; 260 reducer_impl(d, out, in, reduction_axes, reducer); 261 } 262 263 template <typename OUT_T> 264 static void FillIdentity(const Device& d, OUT_T out, const Reducer& reducer) { 265 FillIdentityEigenImpl(d, out, reducer); 266 } 267 }; 268 269 template <typename Reducer> 270 struct ReduceFunctor<CPUDevice, Reducer> 271 : ReduceFunctorBase<CPUDevice, Reducer> {}; 272 #if TENSORFLOW_USE_SYCL 273 template <typename Reducer> 274 struct ReduceFunctor<SYCLDevice, Reducer> 275 : ReduceFunctorBase<SYCLDevice, Reducer> {}; 276 #endif // TENSORFLOW_USE_SYCL 277 278 } // namespace functor 279 } // namespace tensorflow 280 281 #endif // TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_ 282