1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
17
18 #include "tensorflow/lite/kernels/internal/common.h"
19
20 namespace tflite {
21 namespace reference_integer_ops {
22
23 // Fixed-point per-channel-quantization transpose convolution reference kernel.
TransposeConv(const ConvParams & params,const int32_t * output_multiplier,const int32_t * output_shift,const RuntimeShape & input_shape,const int8_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const int32_t * bias_data,const RuntimeShape & output_shape,int8_t * output_data,const RuntimeShape & im2col_shape,int8_t * im2col_data,int32_t * scratch_buffer)24 inline void TransposeConv(
25 const ConvParams& params, const int32_t* output_multiplier,
26 const int32_t* output_shift, const RuntimeShape& input_shape,
27 const int8_t* input_data, const RuntimeShape& filter_shape,
28 const int8_t* filter_data, const RuntimeShape& bias_shape,
29 const int32_t* bias_data, const RuntimeShape& output_shape,
30 int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
31 int32_t* scratch_buffer) {
32 const int stride_width = params.stride_width;
33 const int stride_height = params.stride_height;
34 const int pad_width = params.padding_values.width;
35 const int pad_height = params.padding_values.height;
36 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
37 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
38 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
39 (void)im2col_data; // only used in optimized code.
40 (void)im2col_shape; // only used in optimized code.
41
42 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
43 const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
44 const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
45 if (bias_data) {
46 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
47 }
48 const int input_height = input_shape.Dims(1);
49 const int input_width = input_shape.Dims(2);
50 const int filter_height = filter_shape.Dims(1);
51 const int filter_width = filter_shape.Dims(2);
52 const int output_height = output_shape.Dims(1);
53 const int output_width = output_shape.Dims(2);
54 const int32_t input_offset = params.input_offset;
55 const int32_t output_offset = params.output_offset;
56 const int32_t output_activation_min = std::numeric_limits<int8_t>::min();
57 const int32_t output_activation_max = std::numeric_limits<int8_t>::max();
58 TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
59
60 const int num_elements = output_shape.FlatSize();
61 // We need to initialize scratch_buffer to all 0s, as we apply the same
62 // 'scatter' based trick as in float version.
63 memset(scratch_buffer, 0, num_elements * sizeof(int32_t));
64
65 // Loop through input elements one at a time.
66 for (int batch = 0; batch < batches; ++batch) {
67 for (int in_y = 0; in_y < input_height; ++in_y) {
68 for (int in_x = 0; in_x < input_width; ++in_x) {
69 for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
70 // Loop through the output elements it will influence.
71 const int out_x_origin = (in_x * stride_width) - pad_width;
72 const int out_y_origin = (in_y * stride_height) - pad_height;
73 for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
74 for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
75 for (int out_channel = 0; out_channel < output_depth;
76 ++out_channel) {
77 // Compute output element location.
78 const int out_x = out_x_origin + filter_x;
79 const int out_y = out_y_origin + filter_y;
80 // We cannot accumulate out of bounds.
81 if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
82 (out_y < output_height)) {
83 const int8_t input_value = input_data[Offset(
84 input_shape, batch, in_y, in_x, in_channel)];
85 const int8_t filter_value =
86 filter_data[Offset(filter_shape, out_channel, filter_y,
87 filter_x, in_channel)];
88 scratch_buffer[Offset(output_shape, batch, out_y, out_x,
89 out_channel)] +=
90 (input_value + input_offset) * filter_value;
91 }
92 }
93 }
94 }
95 }
96 }
97 }
98 }
99
100 for (int batch = 0; batch < batches; ++batch) {
101 for (int out_y = 0; out_y < output_height; ++out_y) {
102 for (int out_x = 0; out_x < output_width; ++out_x) {
103 for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
104 int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
105 out_channel)];
106 if (bias_data) {
107 acc += bias_data[out_channel];
108 }
109 acc = MultiplyByQuantizedMultiplier(
110 acc, output_multiplier[out_channel], output_shift[out_channel]);
111 acc += output_offset;
112 acc = std::max(acc, output_activation_min);
113 acc = std::min(acc, output_activation_max);
114 output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
115 static_cast<int8_t>(acc);
116 }
117 }
118 }
119 }
120 }
121
122 // int16_t input (zero_point=0), int8_t filter, int64 accumulator
TransposeConv(const ConvParams & params,const int32_t * output_multiplier,const int32_t * output_shift,const RuntimeShape & input_shape,const int16_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const std::int64_t * bias_data,const RuntimeShape & output_shape,int16_t * output_data,const RuntimeShape & im2col_shape,int8_t * im2col_data,std::int64_t * scratch_buffer)123 inline void TransposeConv(
124 const ConvParams& params, const int32_t* output_multiplier,
125 const int32_t* output_shift, const RuntimeShape& input_shape,
126 const int16_t* input_data, const RuntimeShape& filter_shape,
127 const int8_t* filter_data, const RuntimeShape& bias_shape,
128 const std::int64_t* bias_data, const RuntimeShape& output_shape,
129 int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
130 std::int64_t* scratch_buffer) {
131 const int stride_width = params.stride_width;
132 const int stride_height = params.stride_height;
133 const int pad_width = params.padding_values.width;
134 const int pad_height = params.padding_values.height;
135 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
136 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
137 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
138 (void)im2col_data; // only used in optimized code.
139 (void)im2col_shape; // only used in optimized code.
140
141 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
142 const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
143 const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
144 if (bias_data) {
145 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
146 }
147 const int input_height = input_shape.Dims(1);
148 const int input_width = input_shape.Dims(2);
149 const int filter_height = filter_shape.Dims(1);
150 const int filter_width = filter_shape.Dims(2);
151 const int output_height = output_shape.Dims(1);
152 const int output_width = output_shape.Dims(2);
153 const int32_t output_activation_min = std::numeric_limits<int16_t>::min();
154 const int32_t output_activation_max = std::numeric_limits<int16_t>::max();
155 TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
156
157 const int num_elements = output_shape.FlatSize();
158 // We need to initialize scratch_buffer to all 0s, as we apply the same
159 // 'scatter' based trick as in float version.
160 memset(scratch_buffer, 0, num_elements * sizeof(std::int64_t));
161
162 // Loop through input elements one at a time.
163 for (int batch = 0; batch < batches; ++batch) {
164 for (int in_y = 0; in_y < input_height; ++in_y) {
165 for (int in_x = 0; in_x < input_width; ++in_x) {
166 for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
167 // Loop through the output elements it will influence.
168 const int out_x_origin = (in_x * stride_width) - pad_width;
169 const int out_y_origin = (in_y * stride_height) - pad_height;
170 for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
171 for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
172 for (int out_channel = 0; out_channel < output_depth;
173 ++out_channel) {
174 // Compute output element location.
175 const int out_x = out_x_origin + filter_x;
176 const int out_y = out_y_origin + filter_y;
177 // We cannot accumulate out of bounds.
178 if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
179 (out_y < output_height)) {
180 const int32_t input_value = input_data[Offset(
181 input_shape, batch, in_y, in_x, in_channel)];
182 const int32_t filter_value =
183 filter_data[Offset(filter_shape, out_channel, filter_y,
184 filter_x, in_channel)];
185 scratch_buffer[Offset(output_shape, batch, out_y, out_x,
186 out_channel)] +=
187 input_value * filter_value;
188 }
189 }
190 }
191 }
192 }
193 }
194 }
195 }
196
197 for (int batch = 0; batch < batches; ++batch) {
198 for (int out_y = 0; out_y < output_height; ++out_y) {
199 for (int out_x = 0; out_x < output_width; ++out_x) {
200 for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
201 std::int64_t acc = scratch_buffer[Offset(output_shape, batch, out_y,
202 out_x, out_channel)];
203 if (bias_data) {
204 acc += bias_data[out_channel];
205 }
206 int32_t scaled_acc = MultiplyByQuantizedMultiplier(
207 acc, output_multiplier[out_channel], output_shift[out_channel]);
208 scaled_acc = std::max(scaled_acc, output_activation_min);
209 scaled_acc = std::min(scaled_acc, output_activation_max);
210 output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
211 static_cast<int16_t>(scaled_acc);
212 }
213 }
214 }
215 }
216 }
217
218 } // namespace reference_integer_ops
219 } // namespace tflite
220
221 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
222