• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
17 
18 #include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
19 
20 #if defined(_MSC_VER)
21 #define __restrict__ __restrict
22 #endif
23 
24 namespace tflite {
25 namespace tensor_utils {
26 
27 // Check if all entries of a vector are zero for float.
IsZeroVector(const float * vector,int v_size)28 bool IsZeroVector(const float* vector, int v_size) {
29   return PortableIsZeroVector(vector, v_size);
30 }
31 
32 // Check if all entries of a vector are zero for int8_t.
IsZeroVector(const int8_t * vector,int v_size)33 bool IsZeroVector(const int8_t* vector, int v_size) {
34   return PortableIsZeroVector(vector, v_size);
35 }
36 
SymmetricQuantizeFloats(const float * values,const int size,int8_t * quantized_values,float * min,float * max,float * scaling_factor)37 void SymmetricQuantizeFloats(const float* values, const int size,
38                              int8_t* quantized_values, float* min, float* max,
39                              float* scaling_factor) {
40   PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
41                                   scaling_factor);
42 }
43 
SymmetricQuantizeFloats(const float * values,const int size,int8_t * quantized_values,float min_value,float max_value,float * scaling_factor)44 void SymmetricQuantizeFloats(const float* values, const int size,
45                              int8_t* quantized_values, float min_value,
46                              float max_value, float* scaling_factor) {
47   PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
48                                   max_value, scaling_factor);
49 }
50 
AsymmetricQuantizeFloats(const float * values,const int size,int8_t * quantized_values,float * scaling_factor,int32_t * offset)51 void AsymmetricQuantizeFloats(const float* values, const int size,
52                               int8_t* quantized_values, float* scaling_factor,
53                               int32_t* offset) {
54   PortableAsymmetricQuantizeFloats(values, size, quantized_values,
55                                    scaling_factor, offset);
56 }
57 
MatrixBatchVectorMultiplyAccumulate(const float * matrix,int m_rows,int m_cols,const float * vector,int n_batch,float * result)58 void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
59                                          int m_cols, const float* vector,
60                                          int n_batch, float* result) {
61   PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
62                                               n_batch, result);
63 }
64 
MatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const int m_rows,const int m_cols,const int8_t * __restrict__ vector,const float * scaling_factors,int n_batch,float * __restrict__ result)65 void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
66                                          const int m_rows, const int m_cols,
67                                          const int8_t* __restrict__ vector,
68                                          const float* scaling_factors,
69                                          int n_batch,
70                                          float* __restrict__ result) {
71   PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
72                                               scaling_factors, n_batch, result);
73 }
74 
MatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const int m_rows,const int m_cols,const int8_t * __restrict__ vectors,const float * scaling_factors,int n_batch,float * __restrict__ result,const float * per_channel_scale,const int32_t * input_offset,int32_t * scratch,int32_t * row_sums,bool * compute_row_sums,CpuBackendContext * context)75 void MatrixBatchVectorMultiplyAccumulate(
76     const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
77     const int8_t* __restrict__ vectors, const float* scaling_factors,
78     int n_batch, float* __restrict__ result, const float* per_channel_scale,
79     const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
80     bool* compute_row_sums, CpuBackendContext* context) {
81   PortableMatrixBatchVectorMultiplyAccumulate(
82       matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
83       per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
84       context);
85 }
86 
MatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const int m_rows,const int m_cols,const int8_t * __restrict__ vector,const float * scaling_factors,int n_batch,int32_t * scratch,float * __restrict__ result,CpuBackendContext * context)87 void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
88                                          const int m_rows, const int m_cols,
89                                          const int8_t* __restrict__ vector,
90                                          const float* scaling_factors,
91                                          int n_batch, int32_t* scratch,
92                                          float* __restrict__ result,
93                                          CpuBackendContext* context) {
94   PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
95                                               scaling_factors, n_batch, result);
96 }
97 
SparseMatrixBatchVectorMultiplyAccumulate1x4(const float * __restrict__ matrix,const int32_t * __restrict__ segments,const int32_t * __restrict__ indices,int m_rows,int m_cols,const float * __restrict__ vector,int n_batch,float * __restrict__ result)98 void SparseMatrixBatchVectorMultiplyAccumulate1x4(
99     const float* __restrict__ matrix, const int32_t* __restrict__ segments,
100     const int32_t* __restrict__ indices, int m_rows, int m_cols,
101     const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
102   PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
103       matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
104 }
105 
SparseMatrixBatchVectorMultiplyAccumulate(const float * __restrict__ matrix,const uint8_t * __restrict__ ledger,int m_rows,int m_cols,const float * __restrict__ vector,int n_batch,float * __restrict__ result)106 void SparseMatrixBatchVectorMultiplyAccumulate(
107     const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
108     int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
109     float* __restrict__ result) {
110   PortableSparseMatrixBatchVectorMultiplyAccumulate(
111       matrix, ledger, m_rows, m_cols, vector, n_batch, result);
112 }
113 
SparseMatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const uint8_t * ledger,const int m_rows,const int m_cols,const int8_t * __restrict__ vectors,const float * scaling_factors,int n_batch,float * __restrict__ result)114 void SparseMatrixBatchVectorMultiplyAccumulate(
115     const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
116     const int m_cols, const int8_t* __restrict__ vectors,
117     const float* scaling_factors, int n_batch, float* __restrict__ result) {
118   PortableSparseMatrixBatchVectorMultiplyAccumulate(
119       matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
120       result);
121 }
122 
MatrixBatchVectorMultiplyAccumulate(const int8_t * input,const int32_t * bias,const int8_t * input_to_gate_weights,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t n_output,int32_t output_zp,int32_t * scratch,int16_t * output,CpuBackendContext * context)123 void MatrixBatchVectorMultiplyAccumulate(
124     const int8_t* input, const int32_t* bias,
125     const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
126     int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
127     int32_t* scratch, int16_t* output, CpuBackendContext* context) {
128   PortableMatrixBatchVectorMultiplyAccumulate(
129       input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
130       n_output, output_zp, scratch, output, context);
131 }
132 
MatrixBatchVectorMultiplyAccumulate(const int8_t * input,const int32_t * bias,const int8_t * input_to_gate_weights,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t n_output,int32_t output_zp,int32_t * scratch,int8_t * output,CpuBackendContext * context)133 void MatrixBatchVectorMultiplyAccumulate(
134     const int8_t* input, const int32_t* bias,
135     const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
136     int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
137     int32_t* scratch, int8_t* output, CpuBackendContext* context) {
138   PortableMatrixBatchVectorMultiplyAccumulate(
139       input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
140       n_output, output_zp, scratch, output, context);
141 }
142 
MatrixScalarMultiplyAccumulate(const int8_t * matrix,int32_t scalar,int32_t n_row,int32_t n_col,int32_t * output)143 void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
144                                     int32_t n_row, int32_t n_col,
145                                     int32_t* output) {
146   PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
147 }
148 
MatrixBatchVectorMultiply(const int8_t * input,int32_t input_zeropoint,const int8_t * input_to_gate_weights,int32_t input_to_gate_effective_scale_a,int32_t input_to_gate_effective_scale_b,int32_t n_batch,int32_t n_input,int32_t n_cell,int8_t * gate_output,int8_t gate_output_zp)149 void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
150                                const int8_t* input_to_gate_weights,
151                                int32_t input_to_gate_effective_scale_a,
152                                int32_t input_to_gate_effective_scale_b,
153                                int32_t n_batch, int32_t n_input, int32_t n_cell,
154                                int8_t* gate_output, int8_t gate_output_zp) {
155   PortableMatrixBatchVectorMultiply(
156       input, input_zeropoint, input_to_gate_weights,
157       input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
158       n_input, n_cell, gate_output, gate_output_zp);
159 }
160 
MatrixBatchVectorMultiply(const int16_t * hidden,const int8_t * hidden_to_output_weights,int32_t proj_effective_scale_a,int32_t proj_effective_scale_b,const int32_t * gate_bias,int32_t n_batch,int32_t n_hidden,int32_t n_output,int32_t output_zp,int8_t * proj_output)161 void MatrixBatchVectorMultiply(const int16_t* hidden,
162                                const int8_t* hidden_to_output_weights,
163                                int32_t proj_effective_scale_a,
164                                int32_t proj_effective_scale_b,
165                                const int32_t* gate_bias, int32_t n_batch,
166                                int32_t n_hidden, int32_t n_output,
167                                int32_t output_zp, int8_t* proj_output) {
168   PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
169                                     proj_effective_scale_a,
170                                     proj_effective_scale_b, gate_bias, n_batch,
171                                     n_hidden, n_output, output_zp, proj_output);
172 }
173 
ApplyLayerNorm(const int16_t * input,const int16_t * layer_norm_weights,const int32_t * bias,int32_t layer_norm_scale_a,int32_t layer_norm_scale_b,int32_t variance_limit,int n_batch,int n_input,int16_t * output)174 void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
175                     const int32_t* bias, int32_t layer_norm_scale_a,
176                     int32_t layer_norm_scale_b, int32_t variance_limit,
177                     int n_batch, int n_input, int16_t* output) {
178   PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
179                          layer_norm_scale_b, variance_limit, n_batch, n_input,
180                          output);
181 }
182 
ApplyLayerNormFloat(const int16_t * input,const int16_t * layer_norm_weights,int32_t layer_norm_scale_a,int32_t layer_norm_scale_b,const int32_t * bias,int n_batch,int n_input,int16_t * output)183 void ApplyLayerNormFloat(const int16_t* input,
184                          const int16_t* layer_norm_weights,
185                          int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
186                          const int32_t* bias, int n_batch, int n_input,
187                          int16_t* output) {
188   PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
189                               layer_norm_scale_b, bias, n_batch, n_input,
190                               output);
191 }
192 
ApplySigmoid(const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)193 void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
194                   int16_t* output) {
195   PortableApplySigmoid(input, n_batch, n_input, output);
196 }
197 
ApplySigmoidFloat(const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)198 void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
199                        int16_t* output) {
200   PortableApplySigmoidFloat(input, n_batch, n_input, output);
201 }
202 
ApplyTanh(int32_t integer_bits,const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)203 void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
204                int32_t n_input, int16_t* output) {
205   PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
206 }
207 
ApplyTanhFloat(const int16_t * input,int32_t n_batch,int32_t n_input,int32_t integer_bits,int16_t * output)208 void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
209                     int32_t integer_bits, int16_t* output) {
210   PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
211 }
212 
CwiseMul(const int16_t * input_1,const int16_t * input_2,int n_batch,int n_input,int shift,int16_t * output)213 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
214               int n_input, int shift, int16_t* output) {
215   PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
216 }
217 
CwiseMul(const int16_t * input_1,const int16_t * input_2,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t output_zp,int8_t * output)218 void CwiseMul(const int16_t* input_1, const int16_t* input_2,
219               int32_t multiplier, int32_t shift, int32_t n_batch,
220               int32_t n_input, int32_t output_zp, int8_t* output) {
221   PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
222                    output_zp, output);
223 }
224 
CwiseAdd(const int16_t * input_1,const int16_t * input_2,int n_batch,int n_input,int16_t * output)225 void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
226               int n_input, int16_t* output) {
227   PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
228 }
229 
CwiseClipping(float * vector,const int v_size,const float clipping_value)230 void CwiseClipping(float* vector, const int v_size,
231                    const float clipping_value) {
232   PortableCwiseClipping(vector, v_size, clipping_value);
233 }
234 
CwiseClipping(int16_t * vector,const int v_size,const int16_t clipping_value)235 void CwiseClipping(int16_t* vector, const int v_size,
236                    const int16_t clipping_value) {
237   PortableCwiseClipping(vector, v_size, clipping_value);
238 }
239 
CwiseClipping(int8_t * vector,const int v_size,const int8_t clipping_value)240 void CwiseClipping(int8_t* vector, const int v_size,
241                    const int8_t clipping_value) {
242   PortableCwiseClipping(vector, v_size, clipping_value);
243 }
244 
VectorBatchVectorCwiseProductAccumulate(const int16_t * vector,int v_size,const int16_t * batch_vector,int n_batch,int32_t multiplier,int shift,int16_t * result)245 void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
246                                              const int16_t* batch_vector,
247                                              int n_batch, int32_t multiplier,
248                                              int shift, int16_t* result) {
249   PortableVectorBatchVectorCwiseProductAccumulate(
250       vector, v_size, batch_vector, n_batch, multiplier, shift, result);
251 }
252 
VectorVectorDotProduct(const float * vector1,const float * vector2,int v_size)253 float VectorVectorDotProduct(const float* vector1, const float* vector2,
254                              int v_size) {
255   return PortableVectorVectorDotProduct(vector1, vector2, v_size);
256 }
257 
BatchVectorBatchVectorDotProduct(const int16_t * vector1,const int16_t * vector2,int v_size,int n_batch,int32_t * result)258 void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
259                                       const int16_t* vector2, int v_size,
260                                       int n_batch, int32_t* result) {
261   PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch,
262                                            result);
263 }
264 
Sub1Vector(const float * vector,int v_size,float * result)265 void Sub1Vector(const float* vector, int v_size, float* result) {
266   PortableSub1Vector(vector, v_size, result);
267 }
268 
Sub1Vector(const int16_t * vector,int v_size,int16_t * result)269 void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
270   PortableSub1Vector(vector, v_size, result);
271 }
272 
273 // Multiply all elements of vector with a scalar.
VectorScalarMultiply(const int8_t * vector,int v_size,float scale,float * result)274 void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
275                           float* result) {
276   PortableVectorScalarMultiply(vector, v_size, scale, result);
277 }
278 
ReductionSumVector(const float * input_vector,float * output_vector,int output_size,int reduction_size)279 void ReductionSumVector(const float* input_vector, float* output_vector,
280                         int output_size, int reduction_size) {
281   PortableReductionSumVector(input_vector, output_vector, output_size,
282                              reduction_size);
283 }
284 
ReductionSumVector(const int32_t * input_vector,int32_t * output_vector,int output_size,int reduction_size)285 void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
286                         int output_size, int reduction_size) {
287   PortableReductionSumVector(input_vector, output_vector, output_size,
288                              reduction_size);
289 }
290 
ReductionSumVector(const int8_t * input_vector,int32_t * output_vector,int output_size,int reduction_size)291 void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
292                         int output_size, int reduction_size) {
293   PortableReductionSumVector(input_vector, output_vector, output_size,
294                              reduction_size);
295 }
296 
MeanStddevNormalization(const float * __restrict__ input_vector,float * __restrict__ output_vector,int v_size,int n_batch)297 void MeanStddevNormalization(const float* __restrict__ input_vector,
298                              float* __restrict__ output_vector, int v_size,
299                              int n_batch) {
300   PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
301 }
302 
TwoGateSaturatingAdd(const int8_t * input,int8_t input_zp,const int8_t * recurrent,int8_t recurrent_zp,int32_t input_effective_scale_a,int32_t input_effective_scale_b,int32_t recurrent_effective_scale_a,int32_t recurrent_effective_scale_b,int32_t n_batch,int32_t n_cell,int16_t * output)303 void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
304                           const int8_t* recurrent, int8_t recurrent_zp,
305                           int32_t input_effective_scale_a,
306                           int32_t input_effective_scale_b,
307                           int32_t recurrent_effective_scale_a,
308                           int32_t recurrent_effective_scale_b, int32_t n_batch,
309                           int32_t n_cell, int16_t* output) {
310   PortableTwoGateSaturatingAdd(
311       input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
312       input_effective_scale_b, recurrent_effective_scale_a,
313       recurrent_effective_scale_b, n_batch, n_cell, output);
314 }
315 
316 }  // namespace tensor_utils
317 }  // namespace tflite
318 
319 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
320