1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_MULTITHREADED_CONV_H_
17 #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_MULTITHREADED_CONV_H_
18
19 #include <assert.h>
20 #include <stdint.h>
21 #include <sys/types.h>
22 #include <algorithm>
23 #include <cmath>
24 #include <limits>
25 #include <memory>
26 #include <tuple>
27 #include <type_traits>
28
29 #include "tensorflow/lite/c/builtin_op_data.h"
30 #include "tensorflow/lite/kernels/internal/common.h"
31 #include "tensorflow/lite/kernels/internal/optimized/eigen_spatial_convolutions.h"
32 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
33 #include "tensorflow/lite/kernels/internal/types.h"
34
35 namespace tflite {
36 namespace multithreaded_ops {
37
38 // Shorthands for the types we need when interfacing with the EigenTensor
39 // library.
40 typedef Eigen::TensorMap<
41 Eigen::Tensor<float, 2, Eigen::RowMajor, Eigen::DenseIndex>, Eigen::Aligned>
42 EigenMatrix;
43 typedef Eigen::TensorMap<
44 Eigen::Tensor<const float, 2, Eigen::RowMajor, Eigen::DenseIndex>,
45 Eigen::Aligned>
46 ConstEigenMatrix;
47
48 typedef Eigen::TensorMap<
49 Eigen::Tensor<float, 4, Eigen::RowMajor, Eigen::DenseIndex>, Eigen::Aligned>
50 EigenTensor;
51 typedef Eigen::TensorMap<
52 Eigen::Tensor<const float, 4, Eigen::RowMajor, Eigen::DenseIndex>,
53 Eigen::Aligned>
54 ConstEigenTensor;
55
56 // Utility functions we need for the EigenTensor API.
57 template <typename Device, typename T>
58 struct MatMulConvFunctor {
59 // Computes on device "d": out = in0 * in1, where * is matrix
60 // multiplication.
operatorMatMulConvFunctor61 void operator()(
62 const Device& d, EigenMatrix out, ConstEigenMatrix in0,
63 ConstEigenMatrix in1,
64 const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1>& dim_pair) {
65 out.device(d) = in0.contract(in1, dim_pair);
66 }
67 };
68
69 template <class T>
70 class EigenTensorConvFunctor {
71 private:
RuntimePadding2EigenPadding(PaddingType padding)72 Eigen::PaddingType RuntimePadding2EigenPadding(PaddingType padding) {
73 switch (padding) {
74 case PaddingType::kValid:
75 return Eigen::PADDING_VALID;
76 case PaddingType::kSame:
77 return Eigen::PADDING_SAME;
78 case PaddingType::kNone:
79 assert(false); // should never get here.
80 return Eigen::PADDING_VALID;
81 }
82 return Eigen::PADDING_SAME; // Prevent compiler warning about missing
83 // return
84 }
85
86 public:
operator()87 void operator()(const Eigen::ThreadPoolDevice& device, const T* input_data,
88 int input_batches, int input_height, int input_width,
89 int input_depth, const T* filter_data, int filter_height,
90 int filter_width, int filter_count, int stride_rows,
91 int stride_cols, int pad_width, int pad_height,
92 PaddingType padding, T* output_data, int output_height,
93 int output_width) {
94 const bool is_1x1_kernel = (filter_height == 1 && filter_width == 1 &&
95 stride_rows == 1 && stride_cols == 1);
96 if (is_1x1_kernel) {
97 // For 1x1 kernel, the 2D convolution is reduced to matrix
98 // multiplication.
99 const int conv_width = output_height * output_width;
100 Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
101 dim_pair[0] = Eigen::IndexPair<Eigen::DenseIndex>(1, 0);
102 EigenMatrix output(output_data, input_batches * conv_width, filter_count);
103 ConstEigenMatrix input(input_data, input_batches * conv_width,
104 input_depth);
105 ConstEigenMatrix filter(filter_data, input_depth, filter_count);
106 MatMulConvFunctor<Eigen::ThreadPoolDevice, T>()(device, output, input,
107 filter, dim_pair);
108 } else if (filter_height == input_height && filter_width == input_width &&
109 pad_width == 0 && pad_height == 0) {
110 // If the input data and filter have the same height/width,
111 // the 2D convolution is reduced to matrix multiplication.
112 const int k = // Length of reduction dimension.
113 filter_width * filter_height * input_depth;
114 Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
115 dim_pair[0] = Eigen::IndexPair<Eigen::DenseIndex>(1, 0);
116 EigenMatrix output(output_data, input_batches, filter_count);
117 ConstEigenMatrix input(input_data, input_batches, k);
118 ConstEigenMatrix filter(filter_data, k, filter_count);
119 MatMulConvFunctor<Eigen::ThreadPoolDevice, T>()(device, output, input,
120 filter, dim_pair);
121 } else {
122 EigenTensor output(output_data, input_batches, output_height,
123 output_width, filter_count);
124 ConstEigenTensor input(input_data, input_batches, input_height,
125 input_width, input_depth);
126 ConstEigenTensor filter(filter_data, filter_height, filter_width,
127 input_depth, filter_count);
128 output.device(device) =
129 Eigen::SpatialConvolution(input, filter, stride_cols, stride_rows,
130 RuntimePadding2EigenPadding(padding));
131 }
132 }
133 };
134
Conv(const Eigen::ThreadPoolDevice & device,const ConvParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & filter_shape,const float * filter_data,const RuntimeShape & bias_shape,const float * bias_data,const RuntimeShape & output_shape,float * output_data,const RuntimeShape & im2col_shape,float * im2col_data)135 inline void Conv(const Eigen::ThreadPoolDevice& device,
136 const ConvParams& params, const RuntimeShape& input_shape,
137 const float* input_data, const RuntimeShape& filter_shape,
138 const float* filter_data, const RuntimeShape& bias_shape,
139 const float* bias_data, const RuntimeShape& output_shape,
140 float* output_data, const RuntimeShape& im2col_shape,
141 float* im2col_data) {
142 // Nest profiling under "Conv", to aggregate with other kernels.
143 ruy::profiler::ScopeLabel label("Conv");
144 ruy::profiler::ScopeLabel inner_label("Multithreaded EigenTensor");
145
146 // im2col data should not be generated for the multi-thread supporting case.
147 TFLITE_DCHECK(!im2col_data);
148 (void)im2col_shape;
149 const int stride_width = params.stride_width;
150 const int stride_height = params.stride_height;
151 const PaddingType padding = params.padding_type;
152 const int pad_width = params.padding_values.width;
153 const int pad_height = params.padding_values.height;
154 const float output_activation_min = params.float_activation_min;
155 const float output_activation_max = params.float_activation_max;
156 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
157 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
158 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
159
160 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
161 const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
162 const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
163 const int input_height = input_shape.Dims(1);
164 const int input_width = input_shape.Dims(2);
165 const int filter_height = filter_shape.Dims(1);
166 const int filter_width = filter_shape.Dims(2);
167 const int output_height = output_shape.Dims(1);
168 const int output_width = output_shape.Dims(2);
169 EigenTensorConvFunctor<float> conv_functor;
170 conv_functor(device, input_data, batches, input_height, input_width,
171 input_depth, filter_data, filter_height, filter_width,
172 output_depth, stride_height, stride_width, pad_height, pad_width,
173 padding, output_data, output_height, output_width);
174
175 optimized_ops::AddBiasAndEvalActivationFunction(
176 output_activation_min, output_activation_max, bias_shape, bias_data,
177 output_shape, output_data);
178 }
179
180 } // namespace multithreaded_ops
181 } // namespace tflite
182
183 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_MULTITHREADED_CONV_H_
184