• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
17 
18 #include "tensorflow/lite/kernels/internal/common.h"
19 
20 namespace tflite {
21 namespace reference_integer_ops {
22 
23 // Fixed-point per-channel-quantization transpose convolution reference kernel.
TransposeConv(const ConvParams & params,const int32_t * output_multiplier,const int32_t * output_shift,const RuntimeShape & input_shape,const int8_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const int32_t * bias_data,const RuntimeShape & output_shape,int8_t * output_data,const RuntimeShape & im2col_shape,int8_t * im2col_data,int32_t * scratch_buffer)24 inline void TransposeConv(
25     const ConvParams& params, const int32_t* output_multiplier,
26     const int32_t* output_shift, const RuntimeShape& input_shape,
27     const int8_t* input_data, const RuntimeShape& filter_shape,
28     const int8_t* filter_data, const RuntimeShape& bias_shape,
29     const int32_t* bias_data, const RuntimeShape& output_shape,
30     int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
31     int32_t* scratch_buffer) {
32   const int stride_width = params.stride_width;
33   const int stride_height = params.stride_height;
34   const int pad_width = params.padding_values.width;
35   const int pad_height = params.padding_values.height;
36   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
37   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
38   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
39   (void)im2col_data;   // only used in optimized code.
40   (void)im2col_shape;  // only used in optimized code.
41 
42   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
43   const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
44   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
45   if (bias_data) {
46     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
47   }
48   const int input_height = input_shape.Dims(1);
49   const int input_width = input_shape.Dims(2);
50   const int filter_height = filter_shape.Dims(1);
51   const int filter_width = filter_shape.Dims(2);
52   const int output_height = output_shape.Dims(1);
53   const int output_width = output_shape.Dims(2);
54   const int32_t input_offset = params.input_offset;
55   const int32_t output_offset = params.output_offset;
56   const int32_t output_activation_min = std::numeric_limits<int8_t>::min();
57   const int32_t output_activation_max = std::numeric_limits<int8_t>::max();
58   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
59 
60   const int num_elements = output_shape.FlatSize();
61   // We need to initialize scratch_buffer to all 0s, as we apply the same
62   // 'scatter' based trick as in float version.
63   memset(scratch_buffer, 0, num_elements * sizeof(int32_t));
64 
65   // Loop through input elements one at a time.
66   for (int batch = 0; batch < batches; ++batch) {
67     for (int in_y = 0; in_y < input_height; ++in_y) {
68       for (int in_x = 0; in_x < input_width; ++in_x) {
69         for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
70           // Loop through the output elements it will influence.
71           const int out_x_origin = (in_x * stride_width) - pad_width;
72           const int out_y_origin = (in_y * stride_height) - pad_height;
73           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
74             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
75               for (int out_channel = 0; out_channel < output_depth;
76                    ++out_channel) {
77                 // Compute output element location.
78                 const int out_x = out_x_origin + filter_x;
79                 const int out_y = out_y_origin + filter_y;
80                 // We cannot accumulate out of bounds.
81                 if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
82                     (out_y < output_height)) {
83                   const int8_t input_value = input_data[Offset(
84                       input_shape, batch, in_y, in_x, in_channel)];
85                   const int8_t filter_value =
86                       filter_data[Offset(filter_shape, out_channel, filter_y,
87                                          filter_x, in_channel)];
88                   scratch_buffer[Offset(output_shape, batch, out_y, out_x,
89                                         out_channel)] +=
90                       (input_value + input_offset) * filter_value;
91                 }
92               }
93             }
94           }
95         }
96       }
97     }
98   }
99 
100   for (int batch = 0; batch < batches; ++batch) {
101     for (int out_y = 0; out_y < output_height; ++out_y) {
102       for (int out_x = 0; out_x < output_width; ++out_x) {
103         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
104           int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
105                                               out_channel)];
106           if (bias_data) {
107             acc += bias_data[out_channel];
108           }
109           acc = MultiplyByQuantizedMultiplier(
110               acc, output_multiplier[out_channel], output_shift[out_channel]);
111           acc += output_offset;
112           acc = std::max(acc, output_activation_min);
113           acc = std::min(acc, output_activation_max);
114           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
115               static_cast<int8_t>(acc);
116         }
117       }
118     }
119   }
120 }
121 
122 // int16_t input (zero_point=0), int8_t filter, int64 accumulator
TransposeConv(const ConvParams & params,const int32_t * output_multiplier,const int32_t * output_shift,const RuntimeShape & input_shape,const int16_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const std::int64_t * bias_data,const RuntimeShape & output_shape,int16_t * output_data,const RuntimeShape & im2col_shape,int8_t * im2col_data,std::int64_t * scratch_buffer)123 inline void TransposeConv(
124     const ConvParams& params, const int32_t* output_multiplier,
125     const int32_t* output_shift, const RuntimeShape& input_shape,
126     const int16_t* input_data, const RuntimeShape& filter_shape,
127     const int8_t* filter_data, const RuntimeShape& bias_shape,
128     const std::int64_t* bias_data, const RuntimeShape& output_shape,
129     int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
130     std::int64_t* scratch_buffer) {
131   const int stride_width = params.stride_width;
132   const int stride_height = params.stride_height;
133   const int pad_width = params.padding_values.width;
134   const int pad_height = params.padding_values.height;
135   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
136   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
137   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
138   (void)im2col_data;   // only used in optimized code.
139   (void)im2col_shape;  // only used in optimized code.
140 
141   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
142   const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
143   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
144   if (bias_data) {
145     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
146   }
147   const int input_height = input_shape.Dims(1);
148   const int input_width = input_shape.Dims(2);
149   const int filter_height = filter_shape.Dims(1);
150   const int filter_width = filter_shape.Dims(2);
151   const int output_height = output_shape.Dims(1);
152   const int output_width = output_shape.Dims(2);
153   const int32_t output_activation_min = std::numeric_limits<int16_t>::min();
154   const int32_t output_activation_max = std::numeric_limits<int16_t>::max();
155   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
156 
157   const int num_elements = output_shape.FlatSize();
158   // We need to initialize scratch_buffer to all 0s, as we apply the same
159   // 'scatter' based trick as in float version.
160   memset(scratch_buffer, 0, num_elements * sizeof(std::int64_t));
161 
162   // Loop through input elements one at a time.
163   for (int batch = 0; batch < batches; ++batch) {
164     for (int in_y = 0; in_y < input_height; ++in_y) {
165       for (int in_x = 0; in_x < input_width; ++in_x) {
166         for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
167           // Loop through the output elements it will influence.
168           const int out_x_origin = (in_x * stride_width) - pad_width;
169           const int out_y_origin = (in_y * stride_height) - pad_height;
170           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
171             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
172               for (int out_channel = 0; out_channel < output_depth;
173                    ++out_channel) {
174                 // Compute output element location.
175                 const int out_x = out_x_origin + filter_x;
176                 const int out_y = out_y_origin + filter_y;
177                 // We cannot accumulate out of bounds.
178                 if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
179                     (out_y < output_height)) {
180                   const int32_t input_value = input_data[Offset(
181                       input_shape, batch, in_y, in_x, in_channel)];
182                   const int32_t filter_value =
183                       filter_data[Offset(filter_shape, out_channel, filter_y,
184                                          filter_x, in_channel)];
185                   scratch_buffer[Offset(output_shape, batch, out_y, out_x,
186                                         out_channel)] +=
187                       input_value * filter_value;
188                 }
189               }
190             }
191           }
192         }
193       }
194     }
195   }
196 
197   for (int batch = 0; batch < batches; ++batch) {
198     for (int out_y = 0; out_y < output_height; ++out_y) {
199       for (int out_x = 0; out_x < output_width; ++out_x) {
200         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
201           std::int64_t acc = scratch_buffer[Offset(output_shape, batch, out_y,
202                                                    out_x, out_channel)];
203           if (bias_data) {
204             acc += bias_data[out_channel];
205           }
206           int32_t scaled_acc = MultiplyByQuantizedMultiplier(
207               acc, output_multiplier[out_channel], output_shift[out_channel]);
208           scaled_acc = std::max(scaled_acc, output_activation_min);
209           scaled_acc = std::min(scaled_acc, output_activation_max);
210           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
211               static_cast<int16_t>(scaled_acc);
212         }
213       }
214     }
215   }
216 }
217 
218 }  // namespace reference_integer_ops
219 }  // namespace tflite
220 
221 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
222