1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // This is an internal header file intended to only be included as the 17 // front-matter in the implementation files of various reduction ops. It 18 // is a header file because we split the various reduction ops into their 19 // own compilation units to get more parallelism in compilation. 20 21 #ifndef TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_ 22 #define TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_ 23 24 #define EIGEN_USE_THREADS 25 26 #include "third_party/eigen3/Eigen/Core" 27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 28 29 #include "tensorflow/core/framework/numeric_op.h" 30 #include "tensorflow/core/framework/op_kernel.h" 31 #include "tensorflow/core/framework/register_types.h" 32 #include "tensorflow/core/framework/tensor.h" 33 #include "tensorflow/core/framework/types.h" 34 #include "tensorflow/core/kernels/reduction_ops.h" 35 #include "tensorflow/core/kernels/transpose_functor.h" 36 #include "tensorflow/core/lib/core/status.h" 37 #include "tensorflow/core/lib/gtl/inlined_vector.h" 38 #include "tensorflow/core/platform/logging.h" 39 40 namespace tensorflow { 41 42 typedef Eigen::ThreadPoolDevice CPUDevice; 43 typedef Eigen::GpuDevice GPUDevice; 44 45 template <typename Device> 46 struct Constants { 47 // Derive Index type. int (32-bit) or long (64-bit) depending on the 48 // compile-time configuration. "float" here is not relevant. 49 // TODO(zhifengc): Moves the definition to TTypes. 50 typedef TTypes<float>::Tensor::Index Index; 51 Eigen::array<Index, 1> kZero; 52 Eigen::array<Index, 1> kOne; 53 Eigen::array<Index, 2> kZeroTwo; 54 ConstantsConstants55 Constants() { 56 kZero[0] = 0; 57 kOne[0] = 1; 58 kZeroTwo[0] = 0; 59 kZeroTwo[1] = 2; 60 } 61 }; 62 63 #if defined(EIGEN_HAS_INDEX_LIST) 64 struct ConstantsBase { 65 const Eigen::IndexList<Eigen::type2index<0>> kZero; 66 const Eigen::IndexList<Eigen::type2index<1>> kOne; 67 const Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<2>> kZeroTwo; 68 }; 69 template <> 70 struct Constants<CPUDevice> : ConstantsBase {}; 71 #endif // EIGEN_HAS_INDEX_LIST 72 73 class ReductionHelper { 74 public: 75 ReductionHelper() : reduce_first_axis_(false) {} 76 77 Status Simplify(const Tensor& data, const Tensor& axis, const bool keep_dims); 78 79 // We need to do roughly: 80 // tmp_out = allocate(out_reshape()) 81 // tmp_out.reshape(out_reshape) = data.reshape(data_reshape).reduce(axes) 82 // out = tmp_out.reshape(out_shape) 83 84 // The reduction result must be allocated with this shape. 85 TensorShape out_reshape() const; 86 87 // The final output shape must be allocated with this shape. 88 TensorShape out_shape() const; 89 90 // The reduction is on a reshaped tensor of this rank. 91 int ndims() const { return data_reshape_.size(); } 92 93 // True if need to reduce the 0-th dimension. 94 bool reduce_first_axis() const { return reduce_first_axis_; } 95 96 // The output is reshaped. 97 template <typename T, int N> 98 typename TTypes<T, N>::Tensor out(Tensor* out) { 99 return out->shaped<T, N>(out_reshape_); 100 } 101 102 // The input is reshaped. 103 template <typename T, int N> 104 typename TTypes<T, N>::ConstTensor in(const Tensor& data) { 105 return data.shaped<T, N>(data_reshape_); 106 } 107 108 // Shape of shuffled input 109 TensorShape data_reshape() const { 110 TensorShape shape; 111 for (auto s : data_reshape_) shape.AddDim(s); 112 return shape; 113 } 114 115 // Shape with all reduction dimensions at the end 116 TensorShape shuffled_shape(); 117 118 // Permutation of reduced dims needed to put reduction dimensions at the end 119 gtl::InlinedVector<int32, 8> permutation(); 120 121 private: 122 bool reduce_first_axis_; // True if need to reduce the 0-th dimension. 123 gtl::InlinedVector<int64, 4> data_reshape_; // Reshape data before reduction. 124 gtl::InlinedVector<int64, 4> out_shape_; // The final output shape. 125 gtl::InlinedVector<int64, 4> out_reshape_; // Reshape output for reduction. 126 }; 127 128 // For operations where the output is a reduction function along some 129 // dimensions of the input. 130 template <typename Device, class T, typename Tperm, typename Reducer> 131 class ReductionOp : public OpKernel { 132 public: 133 explicit ReductionOp(OpKernelConstruction* ctx) : OpKernel(ctx) { 134 const DataType dt = DataTypeToEnum<T>::v(); 135 const DataType pt = DataTypeToEnum<Tperm>::v(); 136 OP_REQUIRES_OK(ctx, ctx->MatchSignature({dt, pt}, {dt})); 137 138 OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_)); 139 } 140 141 void Compute(OpKernelContext* ctx) override { 142 const Tensor& data = ctx->input(0); 143 const Tensor& axes = ctx->input(1); 144 VLOG(1) << "data shape: " << data.shape().DebugString(); 145 VLOG(1) << "axes : " << axes.SummarizeValue(10); 146 147 ReductionHelper helper; 148 OP_REQUIRES_OK(ctx, helper.Simplify(data, axes, keep_dims_)); 149 CHECK_GE(helper.ndims(), 0); 150 151 bool is_scalar_identity = functor::ReducerTraits<Reducer>::IsScalarIdentity; 152 bool is_trivial = helper.ndims() == 0 || 153 (helper.ndims() == 1 && !helper.reduce_first_axis()); 154 if (is_scalar_identity && is_trivial) { 155 Tensor out; 156 // Special case. Reduces nothing and does not alter the input values. 157 if (!out.CopyFrom(data, helper.out_shape())) { 158 ctx->SetStatus(errors::Internal("Error during reduction copy.")); 159 } 160 ctx->set_output(0, out); 161 return; 162 } 163 164 // We must allocate temp tensors using the same alloc attr as 165 // output(0) because it is returned as output(0) in the end. 166 const AllocatorAttributes alloc_attr = ctx->output_alloc_attr(0); 167 168 Tensor tmp_out; 169 typedef functor::ReduceFunctor<Device, Reducer> Functor; 170 Constants<Device> constants; 171 const Device& d = ctx->eigen_device<Device>(); 172 Reducer reducer; 173 174 if (data.NumElements() > 0 && is_trivial && !is_scalar_identity) { 175 OP_REQUIRES_OK(ctx, ctx->allocate_temp(ctx->expected_output_dtype(0), 176 TensorShape({data.NumElements()}), 177 &tmp_out, alloc_attr)); 178 Functor::Reduce(ctx, tmp_out.flat<T>(), 179 data.shaped<T, 2>({1, data.NumElements()}), 180 constants.kZero, reducer); 181 } else { 182 // A temporary tensor whose size matches the size of the reduced 183 // output. 184 OP_REQUIRES_OK( 185 ctx, ctx->allocate_temp(ctx->expected_output_dtype(0), 186 helper.out_reshape(), &tmp_out, alloc_attr)); 187 188 if (tmp_out.NumElements() == 0) { 189 // Nothing to do, fall through to final reshaping. 190 } else if (data.NumElements() == 0) { 191 // Degenerate reduction where the input is empty but the output is 192 // nonempty (thus tmp_out.NumElements() > 0), and we must fill the 193 // output with identity elements. Example: tf.reduce_sum(tf.zeros((0, 194 // 3)), [0]). Eigen sometimes crashes in this case, so we do it 195 // manually. 196 Functor::FillIdentity(d, tmp_out.flat<T>(), reducer); 197 } else if ((helper.ndims() == 1) && helper.reduce_first_axis()) { 198 // Reduce to a scalar. 199 Functor::Reduce(ctx, helper.out<T, 0>(&tmp_out), helper.in<T, 1>(data), 200 constants.kZero, reducer); 201 } else if ((helper.ndims() == 2) && helper.reduce_first_axis()) { 202 // Can be viewed as a reduction of a matrix along 1st dimension. 203 Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 2>(data), 204 constants.kZero, reducer); 205 } else if ((helper.ndims() == 2) && !helper.reduce_first_axis()) { 206 // Can be viewed as a reduction of a matrix along 2nd dimension. 207 Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 2>(data), 208 constants.kOne, reducer); 209 } else if ((helper.ndims() == 3) && helper.reduce_first_axis()) { 210 // Can be viewed as a reduction of a 3D tensor along 1st and 3rd 211 // dimensions. 212 Functor::Reduce(ctx, helper.out<T, 1>(&tmp_out), helper.in<T, 3>(data), 213 constants.kZeroTwo, reducer); 214 } else if ((helper.ndims() == 3) && !helper.reduce_first_axis()) { 215 // Can be viewed as a reduction of a 3D tensor along 2nd dimension. 216 Functor::Reduce(ctx, helper.out<T, 2>(&tmp_out), helper.in<T, 3>(data), 217 constants.kOne, reducer); 218 } else { 219 // If we don't hit one of the cases above, transpose the data so that 220 // all reduced dimensions are last and reuse the 2-D -> 1-D case. 221 Tensor data_reshaped; 222 OP_REQUIRES(ctx, data_reshaped.CopyFrom(data, helper.data_reshape()), 223 errors::Internal("Error during reduction copy.")); 224 Tensor shuffled; 225 OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value, 226 helper.shuffled_shape(), 227 &shuffled, alloc_attr)); 228 OP_REQUIRES_OK(ctx, DoTranspose(d, data_reshaped, helper.permutation(), 229 &shuffled)); 230 const int64 unreduced = tmp_out.NumElements(); 231 const int64 reduced = shuffled.NumElements() / unreduced; 232 const Tensor& const_shuffled = shuffled; 233 Functor::Reduce(ctx, tmp_out.flat<T>(), 234 const_shuffled.shaped<T, 2>({unreduced, reduced}), 235 constants.kOne, reducer); 236 } 237 } 238 239 // Set the real output using the contents of the reduction but the 240 // real expected output shape. The number of elements should 241 // match between the two shapes. 242 Tensor out; 243 OP_REQUIRES(ctx, out.CopyFrom(tmp_out, helper.out_shape()), 244 errors::Internal("Error during reduction copy.")); 245 ctx->set_output(0, out); 246 } 247 248 private: 249 // True if the number of dimensions should be maintained. 250 bool keep_dims_; 251 }; 252 253 namespace functor { 254 255 template <typename Device, typename Reducer> 256 struct ReduceFunctorBase { 257 template <typename OUT_T, typename IN_T, typename ReductionAxes> 258 static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, 259 const ReductionAxes& reduction_axes, 260 const Reducer& reducer) { 261 const Device& d = ctx->eigen_device<Device>(); 262 ReduceEigenImpl<Device, OUT_T, IN_T, ReductionAxes, Reducer> reducer_impl; 263 reducer_impl(d, out, in, reduction_axes, reducer); 264 } 265 266 template <typename OUT_T> 267 static void FillIdentity(const Device& d, OUT_T out, const Reducer& reducer) { 268 FillIdentityEigenImpl(d, out, reducer); 269 } 270 }; 271 272 template <typename Reducer> 273 struct ReduceFunctor<CPUDevice, Reducer> 274 : ReduceFunctorBase<CPUDevice, Reducer> {}; 275 276 } // namespace functor 277 } // namespace tensorflow 278 279 #endif // TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_H_ 280