• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_COMMON_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_COMMON_H_
17 
18 #include <algorithm>
19 #include <cstdint>
20 
21 #include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
22 
23 #if defined(_MSC_VER)
24 #define __restrict__ __restrict
25 #endif
26 
27 namespace tflite {
28 
29 namespace tensor_utils {
30 
31 // Checks if all entries of vector are zero for float.
32 bool IsZeroVector(const float* vector, int v_size);
33 
34 // Checks if all entries of vector are zero for int8.
35 bool IsZeroVector(const int8_t* vector, int v_size);
36 
37 // Quantizes a buffer of floating point values using a symmetric quantization
38 // (i.e. linear quantization without an offset) to 8-bit signed integers.
39 // It also outputs the range (min, max) of the floating point buffer, and the
40 // scaling factor used to quantize the values.
41 void SymmetricQuantizeFloats(const float* values, const int size,
42                              int8_t* quantized_values, float* min_value,
43                              float* max_value, float* scaling_factor);
44 
45 // Quantizes a buffer of floating point values using a symmetric quantization
46 // (i.e. linear quantization without an offset) to 8-bit signed integers.
47 // It uses the range (min, max) provided to the function to calculate the
48 // appropriate scaling factor to quantize the values.
49 void SymmetricQuantizeFloats(const float* values, const int size,
50                              int8_t* quantized_values, float min_value,
51                              float max_value, float* scaling_factor);
52 
53 void AsymmetricQuantizeFloats(const float* values, const int size,
54                               int8_t* quantized_values, float* scaling_factor,
55                               int32_t* offset);
56 
57 // Helper function to quantize floats.
58 // float_data_ptr     input float vectors
59 // n_batch            number of input vectors
60 // n_data             size of a single input vector
61 // quantized_data_ptr (out) vector with quantized data
62 // scaling_factors    (out) scaling factors (one per vector)
63 // zero_points        (out) zero points (one per vector)
64 // do_asymmetric      controls if the quantization should be asymmetric.
BatchQuantizeFloats(const float * float_data_ptr,int n_batch,int n_data,int8_t * quantized_data_ptr,float * scaling_factors,int32_t * zero_points,bool do_asymmetric)65 inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
66                                 int n_data, int8_t* quantized_data_ptr,
67                                 float* scaling_factors, int32_t* zero_points,
68                                 bool do_asymmetric) {
69   for (int b = 0; b < n_batch; ++b) {
70     const int offset = b * n_data;
71     if (do_asymmetric) {
72       tensor_utils::AsymmetricQuantizeFloats(
73           float_data_ptr + offset, n_data, quantized_data_ptr + offset,
74           &scaling_factors[b], &zero_points[b]);
75     } else {
76       float unused_min, unused_max;
77       tensor_utils::SymmetricQuantizeFloats(
78           float_data_ptr + offset, n_data, quantized_data_ptr + offset,
79           &unused_min, &unused_max, &scaling_factors[b]);
80     }
81   }
82 }
83 
84 // Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
85 // dimension composed by input vectors independent from each other). The result
86 // of the multiplication is accumulated to the passed result buffer.
87 // More specifically, for a matrix M of shape [n, i] and a batched-vector
88 // of shape [i, batch] it will first compute the product of shape [n, batch].
89 // This product will be accumulated to the result buffer.
90 void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
91                                          int m_cols, const float* vector,
92                                          int n_batch, float* result);
93 
94 // Same as the function above, but the matrix is a sparse tensor with block
95 // pattern 1x4.
96 // This function assumes that m_cols is a multiple of the block size (4 in this
97 // case) so that there's no incomplete block.
98 void SparseMatrixBatchVectorMultiplyAccumulate1x4(
99     const float* __restrict__ matrix, const int32_t* __restrict__ segments,
100     const int32_t* __restrict__ indices, int m_rows, int m_cols,
101     const float* __restrict__ vector, int n_batch, float* __restrict__ result);
102 
103 // Same as the function above, but the matrix is stored in block compressed
104 // sparse row format with block pattern 1x16 which consists of two arrays:
105 //   1. A matrix array stores non-zero blocks of the matrix in row major.
106 //   2. A ledger array stores nrows groups, one group per row. Each group starts
107 //      with an integer representing the number of non-zero blocks for the
108 //      corresponding row and follows with column indexes of the first element
109 //      of each non-zero block.
110 // This function assumes that
111 //   1. m_cols is a multiple of 16 so that all blocks are full blocks.
112 //   2. m_cols < 254 * 16 so that block index can be represented by uint8.
113 void SparseMatrixBatchVectorMultiplyAccumulate(
114     const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
115     int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
116     float* __restrict__ result);
117 
118 // Same as the function above, but for values quantized using symmetric
119 // quantization (e.g. by calling SymmetricQuantizeFloats).
120 // The passed scaling factors is a buffer of the quantization scaling factors
121 // that will be used to dequentize the products into the final result buffer.
122 // These scaling factors are the multiplication of the matrix scaling factor
123 // by the vector's scaling factor, one per batch (i.e. this allows quantizing
124 // each batch in the batch-vector matrix independently).
125 void MatrixBatchVectorMultiplyAccumulate(
126     const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
127     const int8_t* __restrict__ vectors,
128     const float* __restrict__ scaling_factors, int n_batch,
129     float* __restrict__ result);
130 
131 // Same as the function above except that vector values
132 // are quantized with asymmetric quantization per-batch and the matrix
133 // is quantized per row.
134 void MatrixBatchVectorMultiplyAccumulate(
135     const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
136     const int8_t* __restrict__ vectors,
137     const float* __restrict__ scaling_factors, int n_batch,
138     float* __restrict__ result, const float* __restrict__ per_channel_scale,
139     const int32_t* __restrict__ input_offset);
140 
141 // Same as the function above, but the matrix is stored in block compressed
142 // sparse row format with block pattern 1x16 which consists of two arrays:
143 //   1. A matrix array stores non-zero blocks of the matrix in row major.
144 //   2. A ledger array stores nrows groups, one group per row. Each group starts
145 //      with an integer representing the number of non-zero blocks for the
146 //      corresponding row followed by column index of the first element of
147 //      each non-zero block.
148 // This function assumes that
149 //   1. m_cols is a multiple of 16 so that all blocks are full blocks.
150 //   2. m_cols < 254 * 16 so that block index can be represented by uint8.
151 void SparseMatrixBatchVectorMultiplyAccumulate(
152     const int8_t* __restrict__ matrix, const uint8_t* __restrict__ ledger,
153     const int m_rows, const int m_cols, const int8_t* __restrict__ vectors,
154     const float* __restrict__ scaling_factors, int n_batch,
155     float* __restrict__ result);
156 
157 // Same as the above 8, 8, 8 integer matmul except for the presence of zero
158 // point and non-accumulative.
159 // TODO(b/148688698): remove this function by folding zero point calculation in
160 // prepare() function.
161 void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
162                                const int8_t* input_to_gate_weights,
163                                int32_t input_to_gate_effective_scale_a,
164                                int32_t input_to_gate_effective_scale_b,
165                                int32_t n_batch, int32_t n_input, int32_t n_cell,
166                                int8_t* gate_output, int8_t gate_output_zp);
167 
168 // Same as above but has 16 bit and 8 bit input and 8 bit output.
169 // Used in projection when hidden is 16bit.
170 void MatrixBatchVectorMultiply(const int16_t* hidden,
171                                const int8_t* hidden_to_output_weights,
172                                int32_t proj_effective_scale_a,
173                                int32_t proj_effective_scale_b,
174                                const int32_t* gate_bias, int32_t n_batch,
175                                int32_t n_hidden, int32_t n_output,
176                                int32_t output_zp, int8_t* proj_output);
177 
178 
179 // Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
180 // vector.
181 // Parameters:
182 //     - input: batch vector of size n_batch * n_input; 16 bit.
183 //     - layer_norm_weights:  the quantized layer normalization weights.
184 //     - bias: the bias for the layer normalization.
185 //     - layer_norm_scale_a: multiplier for scale factor.
186 //     - layer_norm_scale_b: shift for scale factor.
187 //     - variance_limit: the guard to make sure the inverse does not overflow.
188 //     - n_batch: the number of batches.
189 //     - n_input: the size for input and output.
190 //     - output:  the 16 bit output
191 void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
192                     const int32_t* bias, int32_t layer_norm_scale_a,
193                     int32_t layer_norm_scale_b, int32_t variance_limit,
194                     int n_batch, int n_input, int16_t* output);
195 
196 // Same as above but the internal calculation is done in float.
197 void ApplyLayerNormFloat(const int16_t* input,
198                          const int16_t* layer_norm_weights,
199                          int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
200                          const int32_t* bias, int n_batch, int n_input,
201                          int16_t* output);
202 
203 // Apply Sigmoid to a quantized vector.
204 // Parameters:
205 //     - input: batch vector of size n_batch * n_input; 16 bit.
206 //     - n_batch: the number of batches.
207 //     - n_input: the size for input and output.
208 //     - output:  the 16 bit output
209 // The input is in Q3.12 format and the output is in Q0.15 format.
210 void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
211                   int16_t* output);
212 
213 // Same as above but the internal calcualtion is float.
214 void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
215                        int16_t* output);
216 
217 // Apply Tanh to a quantized vector.
218 // Parameters:
219 //     - integer_bits: the integer bits of the input.
220 //                     Currently supports 0, 1, 2, 3, 4, 5, 6.
221 //     - input: batch vector of size n_batch * n_input; 16 bit.
222 //     - n_batch: the number of batches.
223 //     - n_input: the size for input and output.
224 //     - output:  the 16 bit output
225 // The input is in Qm.15-m format and the output is in Q0.15 format.
226 void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
227                int32_t n_input, int16_t* output);
228 
229 // Apply Tanh to a quantized vector. Tbe internal calculation is in float.
230 //    - Input has 2^(integer_bits) as scale.
231 //    - Output has Q0.15 as scale.
232 void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
233                     int32_t integer_bits, int16_t* output);
234 
235 // Element-wise multiplication of two quantized vectors.
236 // Parameters:
237 //     - input_1: batch vector of size n_batch * n_input; 16 bit.
238 //     - input_2: batch vector of size n_batch * n_input; 16 bit.
239 //     - n_batch: the number of batches.
240 //     - n_input: the size for input and output.
241 //     - shift:   the shift needed to produce the output.
242 //     - output:  the 16 bit output of size n_batch * n_input.
243 // Output does not need to be initialized.
244 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
245               int n_input, int shift, int16_t* output);
246 
247 // Element-wise multiplication of two quantized vectors.
248 // Parameters:
249 //     - input_1: batch vector of size n_batch * n_input; 16 bit.
250 //     - input_2: batch vector of size n_batch * n_input; 16 bit.
251 //     - n_batch: the number of batches.
252 //     - n_input: the size for input and output.
253 //     - shift:   the shift needed to produce the output.
254 //     - output:  the 8 bit output of size n_batch * n_input.
255 // Output does not need to be initialized.
256 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
257               int n_input, int shift, int8_t* output);
258 
259 // Element-wise multiplication of two quantized vectors with rescaling.
260 // Parameters:
261 //     - input_1:    batch vector of size n_batch * n_input; 16 bit.
262 //     - input_2:    batch vector of size n_batch * n_input; 16 bit.
263 //     - multiplier: the multiplier part of scale.
264 //     - shift:      the shift part of scale.
265 //     - n_batch:    the number of batches.
266 //     - n_input:    the size for input and output.
267 //     - output:     the 8 bit output of size n_batch * n_input.
268 //     - output_zp:  the zero point of output.
269 // Output does not need to be initialized.
270 // Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m *
271 // 2^(s - 31).
272 void CwiseMul(const int16_t* input_1, const int16_t* input_2,
273               int32_t multiplier, int32_t shift, int32_t n_batch,
274               int32_t n_input, int32_t output_zp, int8_t* output);
275 
276 // Element-wise saturating addition of two quantized vectors without rescaling.
277 // Parameters:
278 //     - input_1:    batch vector of size n_batch * n_input; 16 bit.
279 //     - input_2:    batch vector of size n_batch * n_input; 16 bit.
280 //     - n_batch:    the number of batches.
281 //     - n_input:    the size for input and output.
282 //     - output:     the 8 bit output of size n_batch * n_input.
283 // Output does not need to be initialized.
284 void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
285               int n_input, int16_t* output);
286 
287 // Element-wise in-place clipping of a vector. Overloaded for float, int16_t,
288 // int8_t. Parameters:
289 //     - vector:         vector of size v_size.
290 //     - v_size:         the size of the vector.
291 //     - clipping_value: the value used for clipping.
292 void CwiseClipping(float* vector, const int v_size, const float clipping_value);
293 void CwiseClipping(int16_t* vector, const int v_size,
294                    const int16_t clipping_value);
295 void CwiseClipping(int8_t* vector, const int v_size,
296                    const int8_t clipping_value);
297 
298 // Dot product of two vectors.
299 float VectorVectorDotProduct(const float* vector1, const float* vector2,
300                              int v_size);
301 
302 // Dot product of two batch vectors of size n_batch * v_size:
303 // vector1 = [x_1_1, x_1_2, ..., x_1_vsize,
304 //            x_2_1, x_2_2, ..., x_2_vsize,
305 //            ...
306 //            x_nbatch_1,..., x_nbatch_vsize]
307 // vector2 = [y_1_1, y_1_2, ..., y_1_vsize,
308 //            y_2_1, y_2_2, ..., y_2_vsize,
309 //            ...
310 //            y_nbatch_1,..., y_nbatch_vsize]
311 // Then result will be a vector of n_batch size starting from 'result':
312 // [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize,
313 //  x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize,
314 //  ...
315 //  x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize]
316 template <typename T>
BatchVectorBatchVectorDotProduct(const T * vector1,const T * vector2,int v_size,int n_batch,T * result)317 inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2,
318                                              int v_size, int n_batch,
319                                              T* result) {
320   for (int b = 0; b < n_batch; b++) {
321     result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
322     vector1 += v_size;
323     vector2 += v_size;
324   }
325 }
326 
327 // Same as above but input is 16bit and output is 32bit.
328 void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
329                                       const int16_t* vector2, int v_size,
330                                       int n_batch, int32_t* result);
331 
332 
333 // Same as above, but inputs are 16bit integer and output is 16bit integer.
334 void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
335                                              const int16_t* batch_vector,
336                                              int n_batch, int32_t multiplier,
337                                              int shift, int16_t* result);
338 
339 // Compute "1.0f - elements of vector" (used in CIFG).
340 void Sub1Vector(const float* vector, int v_size, float* result);
341 
342 // Compute "1.0f - elements of vector" (used in CIFG) for int16 input.
343 // "vector" has range [0, 32767] because it is the output of sigmoid function.
344 void Sub1Vector(const int16_t* vector, int v_size, int16_t* result);
345 
346 // Multiply all elements of vector with a scalar.
347 void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
348                           float* result);
349 
350 // Reduce-sum on a float input vector:
351 // input_vector: float pointer to input vector.
352 // output_vector: float pointer to vector.
353 // output_size: output vector size.
354 // reduction_size: number of consecutive elements from input vector which are
355 // added to get one element of output.
356 void ReductionSumVector(const float* input_vector, float* output_vector,
357                         int output_size, int reduction_size);
358 
359 // Same as above but input/output is 32 bit integer.
360 void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
361                         int output_size, int reduction_size);
362 
363 // Same as above but input is 8 bit integer.
364 void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
365                         int output_size, int reduction_size);
366 
367 // Layer norm for each batch.
368 void MeanStddevNormalization(const float* __restrict__ input_vector,
369                              float* __restrict__ output_vector, int v_size,
370                              int n_batch);
371 
372 // Saturate Add with rescale on both inputs.
373 void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
374                           const int8_t* recurrent, int8_t recurrent_zp,
375                           int32_t input_effective_scale_a,
376                           int32_t input_effective_scale_b,
377                           int32_t recurrent_effective_scale_a,
378                           int32_t recurrent_effective_scale_b, int32_t n_batch,
379                           int32_t n_cell, int16_t* output);
380 
381 }  // namespace tensor_utils
382 }  // namespace tflite
383 
384 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_COMMON_H_
385