1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
17
18 #include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
19
20 #if defined(_MSC_VER)
21 #define __restrict__ __restrict
22 #endif
23
24 namespace tflite {
25 namespace tensor_utils {
26
27 // Check if all entries of a vector are zero for float.
IsZeroVector(const float * vector,int v_size)28 bool IsZeroVector(const float* vector, int v_size) {
29 return PortableIsZeroVector(vector, v_size);
30 }
31
32 // Check if all entries of a vector are zero for int8_t.
IsZeroVector(const int8_t * vector,int v_size)33 bool IsZeroVector(const int8_t* vector, int v_size) {
34 return PortableIsZeroVector(vector, v_size);
35 }
36
SymmetricQuantizeFloats(const float * values,const int size,int8_t * quantized_values,float * min,float * max,float * scaling_factor)37 void SymmetricQuantizeFloats(const float* values, const int size,
38 int8_t* quantized_values, float* min, float* max,
39 float* scaling_factor) {
40 PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
41 scaling_factor);
42 }
43
SymmetricQuantizeFloats(const float * values,const int size,int8_t * quantized_values,float min_value,float max_value,float * scaling_factor)44 void SymmetricQuantizeFloats(const float* values, const int size,
45 int8_t* quantized_values, float min_value,
46 float max_value, float* scaling_factor) {
47 PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
48 max_value, scaling_factor);
49 }
50
AsymmetricQuantizeFloats(const float * values,const int size,int8_t * quantized_values,float * scaling_factor,int32_t * offset)51 void AsymmetricQuantizeFloats(const float* values, const int size,
52 int8_t* quantized_values, float* scaling_factor,
53 int32_t* offset) {
54 PortableAsymmetricQuantizeFloats(values, size, quantized_values,
55 scaling_factor, offset);
56 }
57
MatrixBatchVectorMultiplyAccumulate(const float * matrix,int m_rows,int m_cols,const float * vector,int n_batch,float * result)58 void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
59 int m_cols, const float* vector,
60 int n_batch, float* result) {
61 PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
62 n_batch, result);
63 }
64
MatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const int m_rows,const int m_cols,const int8_t * __restrict__ vector,const float * scaling_factors,int n_batch,float * __restrict__ result)65 void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
66 const int m_rows, const int m_cols,
67 const int8_t* __restrict__ vector,
68 const float* scaling_factors,
69 int n_batch,
70 float* __restrict__ result) {
71 PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
72 scaling_factors, n_batch, result);
73 }
74
MatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const int m_rows,const int m_cols,const int8_t * __restrict__ vectors,const float * scaling_factors,int n_batch,float * __restrict__ result,const float * per_channel_scale,const int32_t * input_offset,int32_t * scratch,int32_t * row_sums,bool * compute_row_sums,CpuBackendContext * context)75 void MatrixBatchVectorMultiplyAccumulate(
76 const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
77 const int8_t* __restrict__ vectors, const float* scaling_factors,
78 int n_batch, float* __restrict__ result, const float* per_channel_scale,
79 const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
80 bool* compute_row_sums, CpuBackendContext* context) {
81 PortableMatrixBatchVectorMultiplyAccumulate(
82 matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
83 per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
84 context);
85 }
86
MatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const int m_rows,const int m_cols,const int8_t * __restrict__ vector,const float * scaling_factors,int n_batch,int32_t * scratch,float * __restrict__ result,CpuBackendContext * context)87 void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
88 const int m_rows, const int m_cols,
89 const int8_t* __restrict__ vector,
90 const float* scaling_factors,
91 int n_batch, int32_t* scratch,
92 float* __restrict__ result,
93 CpuBackendContext* context) {
94 PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
95 scaling_factors, n_batch, result);
96 }
97
SparseMatrixBatchVectorMultiplyAccumulate1x4(const float * __restrict__ matrix,const int32_t * __restrict__ segments,const int32_t * __restrict__ indices,int m_rows,int m_cols,const float * __restrict__ vector,int n_batch,float * __restrict__ result)98 void SparseMatrixBatchVectorMultiplyAccumulate1x4(
99 const float* __restrict__ matrix, const int32_t* __restrict__ segments,
100 const int32_t* __restrict__ indices, int m_rows, int m_cols,
101 const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
102 PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
103 matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
104 }
105
SparseMatrixBatchVectorMultiplyAccumulate(const float * __restrict__ matrix,const uint8_t * __restrict__ ledger,int m_rows,int m_cols,const float * __restrict__ vector,int n_batch,float * __restrict__ result)106 void SparseMatrixBatchVectorMultiplyAccumulate(
107 const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
108 int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
109 float* __restrict__ result) {
110 PortableSparseMatrixBatchVectorMultiplyAccumulate(
111 matrix, ledger, m_rows, m_cols, vector, n_batch, result);
112 }
113
SparseMatrixBatchVectorMultiplyAccumulate(const int8_t * __restrict__ matrix,const uint8_t * ledger,const int m_rows,const int m_cols,const int8_t * __restrict__ vectors,const float * scaling_factors,int n_batch,float * __restrict__ result)114 void SparseMatrixBatchVectorMultiplyAccumulate(
115 const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
116 const int m_cols, const int8_t* __restrict__ vectors,
117 const float* scaling_factors, int n_batch, float* __restrict__ result) {
118 PortableSparseMatrixBatchVectorMultiplyAccumulate(
119 matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
120 result);
121 }
122
MatrixBatchVectorMultiplyAccumulate(const int8_t * input,const int32_t * bias,const int8_t * input_to_gate_weights,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t n_output,int32_t output_zp,int32_t * scratch,int16_t * output,CpuBackendContext * context)123 void MatrixBatchVectorMultiplyAccumulate(
124 const int8_t* input, const int32_t* bias,
125 const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
126 int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
127 int32_t* scratch, int16_t* output, CpuBackendContext* context) {
128 PortableMatrixBatchVectorMultiplyAccumulate(
129 input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
130 n_output, output_zp, scratch, output, context);
131 }
132
MatrixBatchVectorMultiplyAccumulate(const int8_t * input,const int32_t * bias,const int8_t * input_to_gate_weights,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t n_output,int32_t output_zp,int32_t * scratch,int8_t * output,CpuBackendContext * context)133 void MatrixBatchVectorMultiplyAccumulate(
134 const int8_t* input, const int32_t* bias,
135 const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
136 int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
137 int32_t* scratch, int8_t* output, CpuBackendContext* context) {
138 PortableMatrixBatchVectorMultiplyAccumulate(
139 input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
140 n_output, output_zp, scratch, output, context);
141 }
142
MatrixScalarMultiplyAccumulate(const int8_t * matrix,int32_t scalar,int32_t n_row,int32_t n_col,int32_t * output)143 void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
144 int32_t n_row, int32_t n_col,
145 int32_t* output) {
146 PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
147 }
148
MatrixBatchVectorMultiply(const int8_t * input,int32_t input_zeropoint,const int8_t * input_to_gate_weights,int32_t input_to_gate_effective_scale_a,int32_t input_to_gate_effective_scale_b,int32_t n_batch,int32_t n_input,int32_t n_cell,int8_t * gate_output,int8_t gate_output_zp)149 void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
150 const int8_t* input_to_gate_weights,
151 int32_t input_to_gate_effective_scale_a,
152 int32_t input_to_gate_effective_scale_b,
153 int32_t n_batch, int32_t n_input, int32_t n_cell,
154 int8_t* gate_output, int8_t gate_output_zp) {
155 PortableMatrixBatchVectorMultiply(
156 input, input_zeropoint, input_to_gate_weights,
157 input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
158 n_input, n_cell, gate_output, gate_output_zp);
159 }
160
MatrixBatchVectorMultiply(const int16_t * hidden,const int8_t * hidden_to_output_weights,int32_t proj_effective_scale_a,int32_t proj_effective_scale_b,const int32_t * gate_bias,int32_t n_batch,int32_t n_hidden,int32_t n_output,int32_t output_zp,int8_t * proj_output)161 void MatrixBatchVectorMultiply(const int16_t* hidden,
162 const int8_t* hidden_to_output_weights,
163 int32_t proj_effective_scale_a,
164 int32_t proj_effective_scale_b,
165 const int32_t* gate_bias, int32_t n_batch,
166 int32_t n_hidden, int32_t n_output,
167 int32_t output_zp, int8_t* proj_output) {
168 PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
169 proj_effective_scale_a,
170 proj_effective_scale_b, gate_bias, n_batch,
171 n_hidden, n_output, output_zp, proj_output);
172 }
173
ApplyLayerNorm(const int16_t * input,const int16_t * layer_norm_weights,const int32_t * bias,int32_t layer_norm_scale_a,int32_t layer_norm_scale_b,int32_t variance_limit,int n_batch,int n_input,int16_t * output)174 void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
175 const int32_t* bias, int32_t layer_norm_scale_a,
176 int32_t layer_norm_scale_b, int32_t variance_limit,
177 int n_batch, int n_input, int16_t* output) {
178 PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
179 layer_norm_scale_b, variance_limit, n_batch, n_input,
180 output);
181 }
182
ApplyLayerNormFloat(const int16_t * input,const int16_t * layer_norm_weights,int32_t layer_norm_scale_a,int32_t layer_norm_scale_b,const int32_t * bias,int n_batch,int n_input,int16_t * output)183 void ApplyLayerNormFloat(const int16_t* input,
184 const int16_t* layer_norm_weights,
185 int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
186 const int32_t* bias, int n_batch, int n_input,
187 int16_t* output) {
188 PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
189 layer_norm_scale_b, bias, n_batch, n_input,
190 output);
191 }
192
ApplySigmoid(const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)193 void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
194 int16_t* output) {
195 PortableApplySigmoid(input, n_batch, n_input, output);
196 }
197
ApplySigmoidFloat(const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)198 void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
199 int16_t* output) {
200 PortableApplySigmoidFloat(input, n_batch, n_input, output);
201 }
202
ApplyTanh(int32_t integer_bits,const int16_t * input,int32_t n_batch,int32_t n_input,int16_t * output)203 void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
204 int32_t n_input, int16_t* output) {
205 PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
206 }
207
ApplyTanhFloat(const int16_t * input,int32_t n_batch,int32_t n_input,int32_t integer_bits,int16_t * output)208 void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
209 int32_t integer_bits, int16_t* output) {
210 PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
211 }
212
CwiseMul(const int16_t * input_1,const int16_t * input_2,int n_batch,int n_input,int shift,int16_t * output)213 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
214 int n_input, int shift, int16_t* output) {
215 PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
216 }
217
CwiseMul(const int16_t * input_1,const int16_t * input_2,int32_t multiplier,int32_t shift,int32_t n_batch,int32_t n_input,int32_t output_zp,int8_t * output)218 void CwiseMul(const int16_t* input_1, const int16_t* input_2,
219 int32_t multiplier, int32_t shift, int32_t n_batch,
220 int32_t n_input, int32_t output_zp, int8_t* output) {
221 PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
222 output_zp, output);
223 }
224
CwiseAdd(const int16_t * input_1,const int16_t * input_2,int n_batch,int n_input,int16_t * output)225 void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
226 int n_input, int16_t* output) {
227 PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
228 }
229
CwiseClipping(float * vector,const int v_size,const float clipping_value)230 void CwiseClipping(float* vector, const int v_size,
231 const float clipping_value) {
232 PortableCwiseClipping(vector, v_size, clipping_value);
233 }
234
CwiseClipping(int16_t * vector,const int v_size,const int16_t clipping_value)235 void CwiseClipping(int16_t* vector, const int v_size,
236 const int16_t clipping_value) {
237 PortableCwiseClipping(vector, v_size, clipping_value);
238 }
239
CwiseClipping(int8_t * vector,const int v_size,const int8_t clipping_value)240 void CwiseClipping(int8_t* vector, const int v_size,
241 const int8_t clipping_value) {
242 PortableCwiseClipping(vector, v_size, clipping_value);
243 }
244
VectorBatchVectorCwiseProductAccumulate(const int16_t * vector,int v_size,const int16_t * batch_vector,int n_batch,int32_t multiplier,int shift,int16_t * result)245 void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
246 const int16_t* batch_vector,
247 int n_batch, int32_t multiplier,
248 int shift, int16_t* result) {
249 PortableVectorBatchVectorCwiseProductAccumulate(
250 vector, v_size, batch_vector, n_batch, multiplier, shift, result);
251 }
252
VectorVectorDotProduct(const float * vector1,const float * vector2,int v_size)253 float VectorVectorDotProduct(const float* vector1, const float* vector2,
254 int v_size) {
255 return PortableVectorVectorDotProduct(vector1, vector2, v_size);
256 }
257
BatchVectorBatchVectorDotProduct(const int16_t * vector1,const int16_t * vector2,int v_size,int n_batch,int32_t * result)258 void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
259 const int16_t* vector2, int v_size,
260 int n_batch, int32_t* result) {
261 PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch,
262 result);
263 }
264
Sub1Vector(const float * vector,int v_size,float * result)265 void Sub1Vector(const float* vector, int v_size, float* result) {
266 PortableSub1Vector(vector, v_size, result);
267 }
268
Sub1Vector(const int16_t * vector,int v_size,int16_t * result)269 void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
270 PortableSub1Vector(vector, v_size, result);
271 }
272
273 // Multiply all elements of vector with a scalar.
VectorScalarMultiply(const int8_t * vector,int v_size,float scale,float * result)274 void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
275 float* result) {
276 PortableVectorScalarMultiply(vector, v_size, scale, result);
277 }
278
ReductionSumVector(const float * input_vector,float * output_vector,int output_size,int reduction_size)279 void ReductionSumVector(const float* input_vector, float* output_vector,
280 int output_size, int reduction_size) {
281 PortableReductionSumVector(input_vector, output_vector, output_size,
282 reduction_size);
283 }
284
ReductionSumVector(const int32_t * input_vector,int32_t * output_vector,int output_size,int reduction_size)285 void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
286 int output_size, int reduction_size) {
287 PortableReductionSumVector(input_vector, output_vector, output_size,
288 reduction_size);
289 }
290
ReductionSumVector(const int8_t * input_vector,int32_t * output_vector,int output_size,int reduction_size)291 void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
292 int output_size, int reduction_size) {
293 PortableReductionSumVector(input_vector, output_vector, output_size,
294 reduction_size);
295 }
296
MeanStddevNormalization(const float * __restrict__ input_vector,float * __restrict__ output_vector,int v_size,int n_batch)297 void MeanStddevNormalization(const float* __restrict__ input_vector,
298 float* __restrict__ output_vector, int v_size,
299 int n_batch) {
300 PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
301 }
302
TwoGateSaturatingAdd(const int8_t * input,int8_t input_zp,const int8_t * recurrent,int8_t recurrent_zp,int32_t input_effective_scale_a,int32_t input_effective_scale_b,int32_t recurrent_effective_scale_a,int32_t recurrent_effective_scale_b,int32_t n_batch,int32_t n_cell,int16_t * output)303 void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
304 const int8_t* recurrent, int8_t recurrent_zp,
305 int32_t input_effective_scale_a,
306 int32_t input_effective_scale_b,
307 int32_t recurrent_effective_scale_a,
308 int32_t recurrent_effective_scale_b, int32_t n_batch,
309 int32_t n_cell, int16_t* output) {
310 PortableTwoGateSaturatingAdd(
311 input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
312 input_effective_scale_b, recurrent_effective_scale_a,
313 recurrent_effective_scale_b, n_batch, n_cell, output);
314 }
315
316 } // namespace tensor_utils
317 } // namespace tflite
318
319 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
320