1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <stddef.h>
16
17 #include <algorithm>
18 #include <cmath>
19 #include <cstdint>
20 #include <functional>
21 #include <limits>
22
23 #include "tensorflow/lite/c/builtin_op_data.h"
24 #include "tensorflow/lite/c/common.h"
25 #include "tensorflow/lite/kernels/cpu_backend_context.h"
26 #include "tensorflow/lite/kernels/internal/common.h"
27 #include "tensorflow/lite/kernels/internal/compatibility.h"
28 #include "tensorflow/lite/kernels/internal/cppmath.h"
29 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
30 #include "tensorflow/lite/kernels/internal/quantization_util.h"
31 #include "tensorflow/lite/kernels/internal/reference/binary_function.h"
32 #include "tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h"
33 #include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
34 #include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
35 #include "tensorflow/lite/kernels/internal/reference/logistic.h"
36 #include "tensorflow/lite/kernels/internal/reference/prelu.h"
37 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
38 #include "tensorflow/lite/kernels/internal/reference/softmax.h"
39 #include "tensorflow/lite/kernels/internal/reference/tanh.h"
40 #include "tensorflow/lite/kernels/internal/tensor.h"
41 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
42 #include "tensorflow/lite/kernels/internal/types.h"
43 #include "tensorflow/lite/kernels/kernel_util.h"
44
45 #if __aarch64__ && __clang__
46 #include <arm_neon.h>
47 #endif
48
49 namespace tflite {
50 namespace ops {
51 namespace builtin {
52 namespace activations {
53
54 // TODO(b/142762739): We should figure out a multi-threading plan for most of
55 // the activation ops below.
56
57 enum KernelType {
58 kReference,
59 kGenericOptimized,
60 kFixedPointOptimized,
61 };
62
63 struct OpData {
64 int32_t input_multiplier = 0;
65 int input_left_shift = 0;
66 int32_t input_range_radius = 0;
67 int diff_min = 0;
68 uint8_t table[256] = {0};
69 };
70
71 struct SoftmaxOpData {
72 struct SoftmaxParams params = {};
73 float table[256];
74 #ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
75 uint8_t uint8_table1[256];
76 uint8_t uint8_table2[256];
77 #endif
78 static constexpr int kInt16LUTArraySize = lut_size<int16_t>();
79 int16_t exp_lut[kInt16LUTArraySize]; // int16 LUT for exp(x), where x uniform
80 // distributed between [-10.0 , 0.0]
81 int16_t one_over_one_plus_x_lut[kInt16LUTArraySize]; // int16 LUT for 1 /
82 // (1 + x), where x
83 // uniform distributed
84 // between [0.0 , 1.0]
85 };
86
87 struct LogSoftmaxOpData : public OpData {
88 int32_t reverse_scaling_divisor = 0;
89 int32_t reverse_scaling_right_shift = 0;
90 struct SoftmaxParams params = {};
91 float f_table[256];
92 };
93
94 struct LeakyReluOpData : public OpData {
95 int32_t output_multiplier_alpha = 0;
96 int32_t output_shift_alpha = 0;
97 int32_t output_multiplier_identity = 0;
98 int32_t output_shift_identity = 0;
99 };
100
101 struct PreluOpData : public OpData {
102 int32_t output_multiplier_1 = 0;
103 int32_t output_shift_1 = 0;
104 int32_t output_multiplier_2 = 0;
105 int32_t output_shift_2 = 0;
106 bool requires_broadcast;
107 };
108
109 struct HardSwishData {
110 HardSwishParams params;
111 };
112
113 struct ReluOpData : public OpData {
114 int32_t output_multiplier = 0;
115 int output_shift = 0;
116 };
117
118 namespace {
119 template <typename T>
PopulateLookupTable(struct OpData * data,const TfLiteTensor * input,TfLiteTensor * output,const std::function<float (float)> & transform)120 void PopulateLookupTable(struct OpData* data, const TfLiteTensor* input,
121 TfLiteTensor* output,
122 const std::function<float(float)>& transform) {
123 static_assert(sizeof(T) == 1, "Lookup table valid only for 8bit");
124 const float inverse_scale = 1 / output->params.scale;
125 int32_t maxval = std::numeric_limits<T>::max();
126 int32_t minval = std::numeric_limits<T>::min();
127 for (int32_t val = minval; val <= maxval; ++val) {
128 const float dequantized =
129 input->params.scale * (val - input->params.zero_point);
130 const float transformed = transform(dequantized);
131 const float rescaled = std::round(transformed * inverse_scale);
132 const int32_t quantized =
133 static_cast<int32_t>(rescaled + output->params.zero_point);
134 data->table[static_cast<uint8_t>(static_cast<T>(val))] =
135 static_cast<uint8_t>(
136 static_cast<T>(std::max(std::min(maxval, quantized), minval)));
137 }
138 }
139
140 // TODO(b/143696793): move this to optimized_ops.
EvalUsingLookupTable(struct OpData * data,const TfLiteTensor * input,TfLiteTensor * output)141 void EvalUsingLookupTable(struct OpData* data, const TfLiteTensor* input,
142 TfLiteTensor* output) {
143 const int size =
144 MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
145 uint8_t* output_data = GetTensorData<uint8_t>(output);
146 const uint8_t* input_data = GetTensorData<uint8_t>(input);
147 int i = 0;
148 #if __aarch64__ && __clang__
149 // This code uses ARM64-only instructions.
150 // TODO(b/143709993): Port to ARMv7
151
152 // Load the tables into registers. (4*4 128-bit registers)
153 uint8x16x4_t table[4];
154 table[0] = vld1q_u8_x4(data->table + 16 * 4 * 0);
155 table[1] = vld1q_u8_x4(data->table + 16 * 4 * 1);
156 table[2] = vld1q_u8_x4(data->table + 16 * 4 * 2);
157 table[3] = vld1q_u8_x4(data->table + 16 * 4 * 3);
158
159 // Vectorized loop; process uint8x16_t (16 elements) at a time.
160 constexpr int vectorized_16_loop_step = 16;
161 const int vectorized_16_loop_end =
162 size / vectorized_16_loop_step * vectorized_16_loop_step;
163 for (; i < vectorized_16_loop_end; i += vectorized_16_loop_step) {
164 uint8x16_t input = vld1q_u8(input_data + i);
165 uint8x16_t output = optimized_ops::aarch64_lookup_vector(table, input);
166 vst1q_u8(output_data + i, output);
167 }
168 // Postamble and non-ARM64 code: simple for loop.
169 #endif
170 for (; i < size; ++i) {
171 output_data[i] = data->table[input_data[i]];
172 }
173 }
174
175 template <typename T>
QuantizedReluX(float act_min,float act_max,const TfLiteTensor * input,TfLiteTensor * output,const ReluOpData * data)176 void QuantizedReluX(float act_min, float act_max, const TfLiteTensor* input,
177 TfLiteTensor* output, const ReluOpData* data) {
178 ReluParams params;
179 params.quantized_activation_min =
180 std::max(static_cast<int32_t>(std::numeric_limits<T>::min()),
181 output->params.zero_point +
182 static_cast<int32>(roundf(act_min / output->params.scale)));
183 params.quantized_activation_max =
184 act_max == std::numeric_limits<float>::infinity()
185 ? static_cast<int32_t>(std::numeric_limits<T>::max())
186 : std::min(
187 static_cast<int32_t>(std::numeric_limits<T>::max()),
188 output->params.zero_point +
189 static_cast<int32>(roundf(act_max / output->params.scale)));
190 params.input_offset = input->params.zero_point;
191 params.output_offset = output->params.zero_point;
192 params.output_multiplier = data->output_multiplier;
193 params.output_shift = data->output_shift;
194 optimized_ops::ReluX(params, GetTensorShape(input), GetTensorData<T>(input),
195 GetTensorShape(output), GetTensorData<T>(output));
196 }
197
198 } // namespace
199
Init(TfLiteContext * context,const char * buffer,size_t length)200 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
201 // This is a builtin op, so we don't use the contents in 'buffer', if any.
202 // Instead, we allocate a new object to carry information from Prepare() to
203 // Eval().
204 return new OpData;
205 }
206
SoftmaxInit(TfLiteContext * context,const char * buffer,size_t length)207 void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
208 return new SoftmaxOpData;
209 }
210
SoftmaxFree(TfLiteContext * context,void * buffer)211 void SoftmaxFree(TfLiteContext* context, void* buffer) {
212 delete reinterpret_cast<SoftmaxOpData*>(buffer);
213 }
214
LogSoftmaxInit(TfLiteContext * context,const char * buffer,size_t length)215 void* LogSoftmaxInit(TfLiteContext* context, const char* buffer,
216 size_t length) {
217 return new LogSoftmaxOpData;
218 }
219
PreluInit(TfLiteContext * context,const char * buffer,size_t length)220 void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
221 return new PreluOpData;
222 }
223
Free(TfLiteContext * context,void * buffer)224 void Free(TfLiteContext* context, void* buffer) {
225 delete reinterpret_cast<OpData*>(buffer);
226 }
227
LogSoftmaxFree(TfLiteContext * context,void * buffer)228 void LogSoftmaxFree(TfLiteContext* context, void* buffer) {
229 delete reinterpret_cast<LogSoftmaxOpData*>(buffer);
230 }
231
PreluFree(TfLiteContext * context,void * buffer)232 void PreluFree(TfLiteContext* context, void* buffer) {
233 delete reinterpret_cast<PreluOpData*>(buffer);
234 }
235
HardSwishInit(TfLiteContext * context,const char * buffer,size_t length)236 void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
237 return new HardSwishData;
238 }
239
GenericPrepare(TfLiteContext * context,TfLiteNode * node)240 TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
241 TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
242 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
243 const TfLiteTensor* input;
244 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
245 TfLiteTensor* output;
246 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
247 TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
248
249 return context->ResizeTensor(context, output,
250 TfLiteIntArrayCopy(input->dims));
251 }
252
ReluInit(TfLiteContext * context,const char * buffer,size_t length)253 void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
254 return new ReluOpData;
255 }
256
ReluFree(TfLiteContext * context,void * buffer)257 void ReluFree(TfLiteContext* context, void* buffer) {
258 delete reinterpret_cast<ReluOpData*>(buffer);
259 }
260
ReluPrepare(TfLiteContext * context,TfLiteNode * node)261 TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
262 ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
263 TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
264 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
265 const TfLiteTensor* input;
266 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
267 TfLiteTensor* output;
268 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
269 TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
270
271 if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
272 input->type == kTfLiteInt16) {
273 double real_multiplier = input->params.scale / output->params.scale;
274 QuantizeMultiplier(real_multiplier, &data->output_multiplier,
275 &data->output_shift);
276 }
277
278 if (input->type == kTfLiteInt16) {
279 TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
280 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
281 }
282
283 return context->ResizeTensor(context, output,
284 TfLiteIntArrayCopy(input->dims));
285 }
286
LeakyReluInit(TfLiteContext * context,const char * buffer,size_t length)287 void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
288 return new LeakyReluOpData;
289 }
290
LeakyReluFree(TfLiteContext * context,void * buffer)291 void LeakyReluFree(TfLiteContext* context, void* buffer) {
292 delete reinterpret_cast<LeakyReluOpData*>(buffer);
293 }
294
HardSwishFree(TfLiteContext * context,void * buffer)295 void HardSwishFree(TfLiteContext* context, void* buffer) {
296 delete static_cast<HardSwishData*>(buffer);
297 }
298
HardSwishPrepare(TfLiteContext * context,TfLiteNode * node)299 TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
300 TF_LITE_ENSURE_STATUS(GenericPrepare(context, node));
301 TfLiteTensor* output;
302 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
303
304 if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
305 HardSwishData* data = static_cast<HardSwishData*>(node->user_data);
306 HardSwishParams* params = &data->params;
307 const TfLiteTensor* input;
308 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
309 params->input_zero_point = input->params.zero_point;
310 params->output_zero_point = output->params.zero_point;
311 const float input_scale = input->params.scale;
312 const float hires_input_scale = (1.0f / 128.0f) * input_scale;
313 const float reluish_scale = 3.0f / 32768.0f;
314 const float output_scale = output->params.scale;
315
316 const float output_multiplier = hires_input_scale / output_scale;
317
318 int32_t output_multiplier_fixedpoint_int32;
319 QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
320 ¶ms->output_multiplier_exponent);
321 DownScaleInt32ToInt16Multiplier(
322 output_multiplier_fixedpoint_int32,
323 ¶ms->output_multiplier_fixedpoint_int16);
324 TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
325
326 const float reluish_multiplier = hires_input_scale / reluish_scale;
327 int32_t reluish_multiplier_fixedpoint_int32;
328 QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
329 ¶ms->reluish_multiplier_exponent);
330 DownScaleInt32ToInt16Multiplier(
331 reluish_multiplier_fixedpoint_int32,
332 ¶ms->reluish_multiplier_fixedpoint_int16);
333 }
334 return kTfLiteOk;
335 }
336
LeakyReluPrepare(TfLiteContext * context,TfLiteNode * node)337 TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
338 TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
339 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
340 const TfLiteTensor* input;
341 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
342 TfLiteTensor* output;
343 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
344 TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
345
346 LeakyReluOpData* data = reinterpret_cast<LeakyReluOpData*>(node->user_data);
347
348 if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
349 output->type == kTfLiteInt16) {
350 const auto* params =
351 reinterpret_cast<TfLiteLeakyReluParams*>(node->builtin_data);
352
353 double alpha_multiplier =
354 input->params.scale * params->alpha / output->params.scale;
355 QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha,
356 &data->output_shift_alpha);
357 double identity_multiplier = input->params.scale / output->params.scale;
358 QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
359 &data->output_shift_identity);
360 }
361
362 if (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) {
363 TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
364 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
365 }
366
367 return context->ResizeTensor(context, output,
368 TfLiteIntArrayCopy(input->dims));
369 }
370
371 template <KernelType kernel_type>
TanhPrepare(TfLiteContext * context,TfLiteNode * node)372 TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
373 OpData* data = reinterpret_cast<OpData*>(node->user_data);
374
375 TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
376 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
377 const TfLiteTensor* input;
378 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
379 TfLiteTensor* output;
380 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
381 TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
382
383 if (kernel_type == kFixedPointOptimized) {
384 if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
385 static constexpr int kInputIntegerBits = 4;
386
387 const double input_real_multiplier =
388 input->params.scale *
389 static_cast<double>(1 << (15 - kInputIntegerBits));
390
391 const double q =
392 std::frexp(input_real_multiplier, &data->input_left_shift);
393 auto q_fixed = static_cast<int32_t>(TfLiteRound(q * (1ll << 15)));
394 data->input_multiplier = static_cast<int16_t>(q_fixed);
395
396 int16_t input_range_radius =
397 CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 15);
398 data->input_range_radius = input_range_radius;
399 }
400 }
401
402 if (kernel_type == kGenericOptimized || kernel_type == kReference) {
403 if (input->type == kTfLiteUInt8) {
404 PopulateLookupTable<uint8_t>(
405 data, input, output, [](float value) { return std::tanh(value); });
406 } else if (input->type == kTfLiteInt8) {
407 PopulateLookupTable<int8_t>(data, input, output,
408 [](float value) { return std::tanh(value); });
409 }
410 }
411
412 if (input->type == kTfLiteInt16) {
413 static constexpr int kInputIntegerBits = 3;
414 static constexpr int kOutputFractionalBits = 15;
415
416 // These operators are implemented in fixed-point arithmetic,
417 // which intrinsically wants symmetric ranges (zero_point==0)
418 // and power-of-two scales (power-of-two is abbreviated below as POT).
419 // While more general support would be possible by means of rescaling,
420 // that would add some overhead and some loss of accuracy and wouldn't
421 // be used at the moment as current quantized LSTM applications are
422 // happy with symmetric, power-of-two-scales quantization. So we just
423 // implement that narrow case only for now.
424
425 TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
426 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
427
428 int input_scale_log2_rounded;
429 bool param_scale_pot =
430 CheckedLog2(input->params.scale, &input_scale_log2_rounded);
431
432 data->input_left_shift =
433 (15 - kInputIntegerBits) + input_scale_log2_rounded;
434 param_scale_pot &=
435 (data->input_left_shift == 0 || data->input_left_shift == 1);
436
437 if (!param_scale_pot) {
438 // Calculate multiplier to change input scale to 1/(3*4096)
439 // as required by the table lookup.
440 // The number 3.0 in the multiplier comes from here,
441 // because the interval is [-10.7, 10.7] instead of [-8, 8].
442 // So, in this scaling +/-2^17 represents +/-10.7.
443
444 double multiplier = input->params.scale * 4096.0 * 3.0;
445 data->input_left_shift = 0;
446
447 while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
448 data->input_left_shift++;
449 multiplier = multiplier * 2.0;
450 }
451
452 data->input_multiplier = static_cast<int32_t>(multiplier);
453 }
454
455 int output_scale_log2_rounded;
456 TF_LITE_ENSURE(
457 context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
458 TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
459 -kOutputFractionalBits);
460 }
461
462 return context->ResizeTensor(context, output,
463 TfLiteIntArrayCopy(input->dims));
464 }
465
466 template <KernelType kernel_type>
SigmoidPrepare(TfLiteContext * context,TfLiteNode * node)467 TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) {
468 OpData* data = reinterpret_cast<OpData*>(node->user_data);
469
470 TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
471 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
472 const TfLiteTensor* input;
473 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
474 TfLiteTensor* output;
475 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
476 TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
477
478 if (kernel_type == kFixedPointOptimized) {
479 if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
480 if (input->type == kTfLiteUInt8) {
481 TF_LITE_ENSURE_EQ(context, output->params.zero_point,
482 std::numeric_limits<uint8_t>::min());
483 }
484 if (input->type == kTfLiteInt8) {
485 TF_LITE_ENSURE_EQ(context, output->params.zero_point,
486 std::numeric_limits<int8_t>::min());
487 }
488 TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
489
490 static constexpr int kInputIntegerBits = 4;
491
492 const double input_real_multiplier =
493 input->params.scale *
494 static_cast<double>(1 << (15 - kInputIntegerBits));
495
496 const double q =
497 std::frexp(input_real_multiplier, &data->input_left_shift);
498 auto q_fixed = static_cast<int32_t>(TfLiteRound(q * (1ll << 15)));
499 data->input_multiplier = static_cast<int16_t>(q_fixed);
500
501 int16_t input_range_radius =
502 CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 15);
503 data->input_range_radius = input_range_radius;
504 }
505 }
506
507 if (kernel_type == kGenericOptimized || kernel_type == kReference) {
508 if (input->type == kTfLiteUInt8) {
509 TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
510 PopulateLookupTable<uint8_t>(data, input, output, [](float value) {
511 return 1.0f / (1.0f + std::exp(-value));
512 });
513 } else if (input->type == kTfLiteInt8) {
514 TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
515 PopulateLookupTable<int8_t>(data, input, output, [](float value) {
516 return 1.0f / (1.0f + std::exp(-value));
517 });
518 } else if (input->type == kTfLiteInt16) {
519 TF_LITE_ENSURE(context, output->params.scale == 1. / 32768);
520 TF_LITE_ENSURE(context, output->params.zero_point == 0);
521 }
522 }
523
524 if (input->type == kTfLiteInt16) {
525 static constexpr int kInputIntegerBits = 3;
526 static constexpr int kOutputFractionalBits = 15;
527
528 // See comments in TanhPrepare about requiring zero_point==0
529 // and a power-of-two ("POT") scale.
530
531 TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
532 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
533
534 int input_scale_log2_rounded;
535 bool param_scale_pot =
536 CheckedLog2(input->params.scale, &input_scale_log2_rounded);
537
538 data->input_left_shift =
539 (15 - kInputIntegerBits) + input_scale_log2_rounded;
540 param_scale_pot &= (data->input_left_shift == 0);
541
542 if (!param_scale_pot) {
543 // Calculate multiplier to change input scale to 1/(3*4096)
544 // as required by the table lookup.
545 // In this scaling +/-2^17 represents +/-10.7
546 double multiplier = input->params.scale * 4096.0 * 3.0;
547
548 data->input_left_shift = 0;
549
550 while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
551 data->input_left_shift++;
552 multiplier = multiplier * 2.0;
553 }
554
555 data->input_multiplier = static_cast<int32_t>(multiplier);
556 }
557
558 int output_scale_log2_rounded;
559 TF_LITE_ENSURE(
560 context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
561 TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
562 -kOutputFractionalBits);
563 }
564
565 return context->ResizeTensor(context, output,
566 TfLiteIntArrayCopy(input->dims));
567 }
568
569 template <KernelType kernel_type>
SoftmaxPrepare(TfLiteContext * context,TfLiteNode * node)570 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
571 auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
572 SoftmaxOpData* data = reinterpret_cast<SoftmaxOpData*>(node->user_data);
573
574 TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
575 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
576 const TfLiteTensor* input;
577 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
578 TfLiteTensor* output;
579 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
580
581 TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
582
583 if (input->type == kTfLiteInt8 && output->type == kTfLiteInt8) {
584 TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
585 TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 256,
586 (0.001f * 1.f / 256));
587 } else if (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) {
588 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
589 TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
590 (0.001f * 1.f / 32768));
591 }
592
593 if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
594 if (kernel_type == kReference) {
595 const int kScaledDiffIntegerBits = 5;
596 int input_left_shift;
597 tflite::PreprocessSoftmaxScaling(
598 static_cast<double>(params->beta),
599 static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
600 &data->params.input_multiplier, &input_left_shift);
601 data->params.input_left_shift = input_left_shift;
602 data->params.diff_min =
603 -1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
604 input_left_shift);
605 } else {
606 switch (output->type) {
607 case kTfLiteUInt8:
608 case kTfLiteInt8:
609 #ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
610 // Only apply when both input & output are uint8/int8 & build with
611 // clang on aarch64.
612 // TODO(b/143709993): Port to ARMv7 and other platforms.
613 data->params.uint8_table1 = data->uint8_table1;
614 data->params.uint8_table2 = data->uint8_table2;
615 optimized_ops::PopulateSoftmaxUInt8LookupTable(
616 &data->params, input->params.scale, params->beta);
617 break;
618 #endif
619 case kTfLiteInt16:
620 default:
621 data->params.table = data->table;
622 optimized_ops::PopulateSoftmaxLookupTable(
623 &data->params, input->params.scale, params->beta);
624 }
625
626 data->params.zero_point = output->params.zero_point;
627 data->params.scale = output->params.scale;
628 }
629 } else if (input->type == kTfLiteInt16) {
630 TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
631 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
632
633 data->params.exp_lut = data->exp_lut;
634 // exp LUT only used on nagative values
635 // we consider exp(-10.0) is insignificant to accumulation
636 gen_lut<double, int16_t, int16_t>(
637 [](double value) { return std::exp(value); }, -10.0, 0.0, -1.0, 1.0,
638 data->params.exp_lut);
639 data->params.one_over_one_plus_x_lut = data->one_over_one_plus_x_lut;
640 gen_lut<double, int16_t, int16_t>(
641 [](double value) { return 1.0 / (1.0 + value); }, 0.0, 1.0, -1.0, 1.0,
642 data->params.one_over_one_plus_x_lut);
643 data->params.zero_point = output->params.zero_point;
644 data->params.scale = output->params.scale;
645
646 double input_scale_beta_rescale =
647 input->params.scale * params->beta /
648 (10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
649 // correspond to [-10.0, 0.0]
650 QuantizeMultiplier(input_scale_beta_rescale, &data->params.input_multiplier,
651 &data->params.input_left_shift);
652 }
653
654 return context->ResizeTensor(context, output,
655 TfLiteIntArrayCopy(input->dims));
656 }
657
658 template <KernelType kernel_type>
LogSoftmaxPrepare(TfLiteContext * context,TfLiteNode * node)659 TfLiteStatus LogSoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
660 LogSoftmaxOpData* data = reinterpret_cast<LogSoftmaxOpData*>(node->user_data);
661
662 TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
663 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
664 const TfLiteTensor* input;
665 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
666 TfLiteTensor* output;
667 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
668 TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
669
670 if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
671 TF_LITE_ENSURE_EQ(context, output->params.scale, 16.0 / 256);
672 static const double kBeta = 1.0;
673 if (input->type == kTfLiteUInt8) {
674 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 255);
675 }
676 if (input->type == kTfLiteInt8) {
677 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 127);
678 }
679
680 if (kernel_type == kReference) {
681 const int kScaledDiffIntegerBits = 5;
682 int input_left_shift;
683 int reverse_scaling_right_shift;
684 tflite::PreprocessLogSoftmaxScalingExp(
685 kBeta, static_cast<double>(input->params.scale),
686 kScaledDiffIntegerBits, &data->params.input_multiplier,
687 &input_left_shift, &data->params.reverse_scaling_divisor,
688 &reverse_scaling_right_shift);
689 reverse_scaling_right_shift *= -1;
690 data->params.input_left_shift = input_left_shift;
691 data->params.reverse_scaling_right_shift = reverse_scaling_right_shift;
692 data->params.diff_min = -tflite::CalculateInputRadius(
693 kScaledDiffIntegerBits, input_left_shift);
694 } else {
695 data->params.table = data->f_table;
696 optimized_ops::PopulateSoftmaxLookupTable(&data->params,
697 input->params.scale, kBeta);
698 data->params.zero_point = output->params.zero_point;
699 data->params.scale = output->params.scale;
700 }
701 }
702
703 return context->ResizeTensor(context, output,
704 TfLiteIntArrayCopy(input->dims));
705 }
706
PreluPrepare(TfLiteContext * context,TfLiteNode * node)707 TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
708 TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
709 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
710 const TfLiteTensor* input;
711 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
712 TfLiteTensor* output;
713 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
714 const TfLiteTensor* alpha;
715 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &alpha));
716 PreluOpData* data = reinterpret_cast<PreluOpData*>(node->user_data);
717
718 TF_LITE_ENSURE_TYPES_EQ(context, input->type, alpha->type);
719
720 output->type = input->type;
721
722 if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
723 // prelu(x) = x if x >= 0 else x * alpha.
724 // So if we translate that for quantized computation:
725 //
726 // input_float = (input_q - input_zp) * input_scale
727 // output_float = (output_q - output_zp) * output_scale
728 // alpha_float = (alpha_q - alpha_zp) * alpha_scale
729 //
730 // When input_q - input_zp >= 0:
731 // ouput_q = (input_q - input_zp) * input_scale / output_scale + output_q
732 // else:
733 // output_q = (input_q - input_zp) * (alpha_q - alpha_zp) * input_scale
734 // * alpha_scale / output_scale + output_q
735 //
736 // So for input_q - input_zp >= 0:
737 // output real multiplier 1 is input_scale / output_scale;
738 // for input_q - input_zp < 0:
739 // output real multiplier 2 is input_scale * alpha_scale/ output_scale.
740 double real_multiplier_1 = input->params.scale / output->params.scale;
741 double real_multiplier_2 =
742 input->params.scale * alpha->params.scale / output->params.scale;
743 QuantizeMultiplier(real_multiplier_1, &data->output_multiplier_1,
744 &data->output_shift_1);
745 QuantizeMultiplier(real_multiplier_2, &data->output_multiplier_2,
746 &data->output_shift_2);
747 }
748
749 data->requires_broadcast = !HaveSameShapes(input, alpha);
750 // PRelu (parameteric Relu) shares the same alpha value on "shared axis".
751 // This means it's always required to "broadcast" alpha values in PRelu.
752 TfLiteIntArray* output_size = nullptr;
753 TF_LITE_ENSURE_OK(
754 context, CalculateShapeForBroadcast(context, input, alpha, &output_size));
755
756 TF_LITE_ENSURE_OK(context,
757 context->ResizeTensor(context, output, output_size));
758 // After broadcasting, the output shape should always be the same as the
759 // input shape.
760 TF_LITE_ENSURE(context, HaveSameShapes(input, output));
761
762 return kTfLiteOk;
763 }
764
ReluEval(TfLiteContext * context,TfLiteNode * node)765 TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
766 const TfLiteTensor* input;
767 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
768 TfLiteTensor* output;
769 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
770 const ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
771 switch (input->type) {
772 case kTfLiteFloat32: {
773 optimized_ops::Relu(GetTensorShape(input), GetTensorData<float>(input),
774 GetTensorShape(output), GetTensorData<float>(output));
775 } break;
776 // TODO(renjieliu): We may revisit the quantization calculation logic,
777 // the unbounded upper limit is actually hard to quantize.
778 case kTfLiteUInt8: {
779 QuantizedReluX<uint8_t>(0.0f, std::numeric_limits<float>::infinity(),
780 input, output, data);
781 } break;
782 case kTfLiteInt8: {
783 QuantizedReluX<int8_t>(0.0f, std::numeric_limits<float>::infinity(),
784 input, output, data);
785 } break;
786 case kTfLiteInt16: {
787 QuantizedReluX<int16_t>(0.0f, std::numeric_limits<float>::infinity(),
788 input, output, data);
789 } break;
790 default:
791 TF_LITE_KERNEL_LOG(context,
792 "Only float32, uint8, int8 and int16 are supported "
793 "currently, got %s.",
794 TfLiteTypeGetName(input->type));
795 return kTfLiteError;
796 }
797 return kTfLiteOk;
798 }
799
Relu1Eval(TfLiteContext * context,TfLiteNode * node)800 TfLiteStatus Relu1Eval(TfLiteContext* context, TfLiteNode* node) {
801 const TfLiteTensor* input;
802 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
803 TfLiteTensor* output;
804 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
805 const ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
806 switch (input->type) {
807 case kTfLiteFloat32: {
808 optimized_ops::Relu1(GetTensorShape(input), GetTensorData<float>(input),
809 GetTensorShape(output),
810 GetTensorData<float>(output));
811 return kTfLiteOk;
812 }
813 case kTfLiteUInt8: {
814 QuantizedReluX<uint8_t>(-1.0f, 1.0f, input, output, data);
815 return kTfLiteOk;
816 }
817 case kTfLiteInt8: {
818 QuantizedReluX<int8_t>(-1, 1, input, output, data);
819 return kTfLiteOk;
820 }
821 default:
822 TF_LITE_KERNEL_LOG(context,
823 "Only float32, uint8, int8 supported "
824 "currently, got %s.",
825 TfLiteTypeGetName(input->type));
826 return kTfLiteError;
827 }
828 }
829
830 template <KernelType kernel_type>
HardSwishEval(TfLiteContext * context,TfLiteNode * node)831 TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
832 HardSwishData* data = static_cast<HardSwishData*>(node->user_data);
833
834 const TfLiteTensor* input;
835 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
836 TfLiteTensor* output;
837 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
838 switch (input->type) {
839 case kTfLiteFloat32: {
840 if (kernel_type == kReference) {
841 reference_ops::HardSwish(
842 GetTensorShape(input), GetTensorData<float>(input),
843 GetTensorShape(output), GetTensorData<float>(output));
844 } else {
845 optimized_ops::HardSwish(
846 GetTensorShape(input), GetTensorData<float>(input),
847 GetTensorShape(output), GetTensorData<float>(output));
848 }
849 return kTfLiteOk;
850 } break;
851 case kTfLiteUInt8: {
852 HardSwishParams& params = data->params;
853 if (kernel_type == kReference) {
854 reference_ops::HardSwish(
855 params, GetTensorShape(input), GetTensorData<uint8_t>(input),
856 GetTensorShape(output), GetTensorData<uint8_t>(output));
857 } else {
858 optimized_ops::HardSwish(
859 params, GetTensorShape(input), GetTensorData<uint8_t>(input),
860 GetTensorShape(output), GetTensorData<uint8_t>(output));
861 }
862 return kTfLiteOk;
863 } break;
864 case kTfLiteInt8: {
865 HardSwishParams& params = data->params;
866 if (kernel_type == kReference) {
867 reference_ops::HardSwish(
868 params, GetTensorShape(input), GetTensorData<int8_t>(input),
869 GetTensorShape(output), GetTensorData<int8_t>(output));
870 } else {
871 optimized_ops::HardSwish(
872 params, GetTensorShape(input), GetTensorData<int8_t>(input),
873 GetTensorShape(output), GetTensorData<int8_t>(output));
874 }
875 return kTfLiteOk;
876 } break;
877 default:
878 TF_LITE_KERNEL_LOG(
879 context,
880 "Only float32, uint8 and int8 are supported currently, got %s.",
881 TfLiteTypeGetName(input->type));
882 return kTfLiteError;
883 }
884 }
885
Relu6Eval(TfLiteContext * context,TfLiteNode * node)886 TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
887 const TfLiteTensor* input;
888 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
889 TfLiteTensor* output;
890 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
891 ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
892 switch (input->type) {
893 case kTfLiteFloat32: {
894 size_t elements = input->bytes / sizeof(float);
895 const float* in = GetTensorData<float>(input);
896 const float* in_end = in + elements;
897 float* out = GetTensorData<float>(output);
898 for (; in < in_end; in++, out++) *out = std::min(std::max(0.f, *in), 6.f);
899 return kTfLiteOk;
900 }
901 case kTfLiteUInt8:
902 QuantizedReluX<uint8_t>(0.0f, 6.0f, input, output, data);
903 return kTfLiteOk;
904 case kTfLiteInt8: {
905 QuantizedReluX<int8_t>(0.0f, 6.0f, input, output, data);
906 return kTfLiteOk;
907 }
908 case kTfLiteInt16: {
909 QuantizedReluX<int16_t>(0.0f, 6.0f, input, output, data);
910 return kTfLiteOk;
911 }
912 default:
913 TF_LITE_KERNEL_LOG(context,
914 "Only float32, uint8, int8 and int16 are supported "
915 "currently, got %s.",
916 TfLiteTypeGetName(input->type));
917 return kTfLiteError;
918 }
919 }
920
921 template <KernelType kernel_type>
TanhEval(TfLiteContext * context,TfLiteNode * node)922 TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
923 OpData* data = reinterpret_cast<OpData*>(node->user_data);
924 const TfLiteTensor* input;
925 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
926 TfLiteTensor* output;
927 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
928 switch (input->type) {
929 case kTfLiteFloat32: {
930 if (kernel_type == kReference) {
931 reference_ops::Tanh(GetTensorShape(input), GetTensorData<float>(input),
932 GetTensorShape(output),
933 GetTensorData<float>(output));
934 } else {
935 optimized_ops::Tanh(GetTensorShape(input), GetTensorData<float>(input),
936 GetTensorShape(output),
937 GetTensorData<float>(output));
938 }
939 return kTfLiteOk;
940 } break;
941 case kTfLiteInt16: {
942 TanhParams params;
943 params.input_left_shift = data->input_left_shift;
944 if (kernel_type == kReference || (data->input_multiplier > 0)) {
945 reference_integer_ops::Tanh(
946 data->input_multiplier, data->input_left_shift,
947 GetTensorShape(input), GetTensorData<int16_t>(input),
948 GetTensorShape(output), GetTensorData<int16_t>(output));
949 } else {
950 optimized_ops::Tanh(
951 params, GetTensorShape(input), GetTensorData<int16_t>(input),
952 GetTensorShape(output), GetTensorData<int16_t>(output));
953 }
954 return kTfLiteOk;
955 } break;
956 case kTfLiteUInt8: {
957 if (kernel_type == kFixedPointOptimized) {
958 TanhParams params;
959 params.input_zero_point = input->params.zero_point;
960 params.input_range_radius = data->input_range_radius;
961 params.input_multiplier = data->input_multiplier;
962 params.input_left_shift = data->input_left_shift;
963 optimized_ops::Tanh16bitPrecision(
964 params, GetTensorShape(input), GetTensorData<uint8_t>(input),
965 GetTensorShape(output), GetTensorData<uint8_t>(output));
966 } else {
967 EvalUsingLookupTable(data, input, output);
968 }
969 return kTfLiteOk;
970 } break;
971 case kTfLiteInt8: {
972 if (kernel_type == kFixedPointOptimized) {
973 TanhParams params;
974 params.input_zero_point = input->params.zero_point;
975 params.input_range_radius = data->input_range_radius;
976 params.input_multiplier = data->input_multiplier;
977 params.input_left_shift = data->input_left_shift;
978 optimized_ops::Tanh16bitPrecision(
979 params, GetTensorShape(input), GetTensorData<int8_t>(input),
980 GetTensorShape(output), GetTensorData<int8_t>(output));
981 } else {
982 EvalUsingLookupTable(data, input, output);
983 }
984 return kTfLiteOk;
985 } break;
986 default:
987 TF_LITE_KERNEL_LOG(context,
988 "Only float32, uint8, int16 and int8 are supported "
989 "currently, got %s.",
990 TfLiteTypeGetName(input->type));
991 return kTfLiteError;
992 }
993 }
994
995 // Sigmoid is also know as "Logistic".
996 template <KernelType kernel_type>
SigmoidEval(TfLiteContext * context,TfLiteNode * node)997 TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
998 OpData* data = reinterpret_cast<OpData*>(node->user_data);
999
1000 const TfLiteTensor* input;
1001 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1002 TfLiteTensor* output;
1003 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1004 switch (input->type) {
1005 case kTfLiteFloat32: {
1006 if (kernel_type == kReference) {
1007 reference_ops::Logistic(
1008 GetTensorShape(input), GetTensorData<float>(input),
1009 GetTensorShape(output), GetTensorData<float>(output));
1010 } else {
1011 optimized_ops::Logistic(
1012 GetTensorShape(input), GetTensorData<float>(input),
1013 GetTensorShape(output), GetTensorData<float>(output));
1014 }
1015 break;
1016 }
1017 case kTfLiteInt16: {
1018 LogisticParams params;
1019 if (kernel_type == kReference || (data->input_multiplier > 0)) {
1020 const int size =
1021 MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
1022
1023 reference_integer_ops::Logistic(
1024 data->input_multiplier, data->input_left_shift, size,
1025 GetTensorData<int16_t>(input), GetTensorData<int16_t>(output));
1026 } else {
1027 optimized_ops::Logistic(
1028 params, GetTensorShape(input), GetTensorData<int16_t>(input),
1029 GetTensorShape(output), GetTensorData<int16_t>(output));
1030 }
1031 break;
1032 }
1033 case kTfLiteUInt8: {
1034 if (kernel_type == kFixedPointOptimized) {
1035 LogisticParams params;
1036 params.input_zero_point = input->params.zero_point;
1037 params.input_range_radius = data->input_range_radius;
1038 params.input_multiplier = data->input_multiplier;
1039 params.input_left_shift = data->input_left_shift;
1040 optimized_ops::Logistic16bitPrecision(
1041 params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1042 GetTensorShape(output), GetTensorData<uint8_t>(output));
1043 } else {
1044 EvalUsingLookupTable(data, input, output);
1045 }
1046 break;
1047 }
1048 case kTfLiteInt8: {
1049 if (kernel_type == kFixedPointOptimized) {
1050 LogisticParams params;
1051 params.input_zero_point = input->params.zero_point;
1052 params.input_range_radius = data->input_range_radius;
1053 params.input_multiplier = data->input_multiplier;
1054 params.input_left_shift = data->input_left_shift;
1055 optimized_ops::Logistic16bitPrecision(
1056 params, GetTensorShape(input), GetTensorData<int8_t>(input),
1057 GetTensorShape(output), GetTensorData<int8_t>(output));
1058 } else {
1059 EvalUsingLookupTable(data, input, output);
1060 }
1061 break;
1062 }
1063 default:
1064 TF_LITE_KERNEL_LOG(context,
1065 "Only float32, uint8, int16 and int8 are supported "
1066 "currently, got %s.",
1067 TfLiteTypeGetName(input->type));
1068 return kTfLiteError;
1069 }
1070 return kTfLiteOk;
1071 }
1072
SoftmaxFloat(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,TfLiteSoftmaxParams * params,KernelType kernel_type=kGenericOptimized)1073 TfLiteStatus SoftmaxFloat(TfLiteContext* context, const TfLiteTensor* input,
1074 TfLiteTensor* output, TfLiteSoftmaxParams* params,
1075 KernelType kernel_type = kGenericOptimized) {
1076 SoftmaxParams op_params;
1077 op_params.beta = params->beta;
1078 if (kernel_type == kReference) {
1079 reference_ops::Softmax(op_params, GetTensorShape(input),
1080 GetTensorData<float>(input), GetTensorShape(output),
1081 GetTensorData<float>(output));
1082 } else {
1083 optimized_ops::Softmax(op_params, GetTensorShape(input),
1084 GetTensorData<float>(input), GetTensorShape(output),
1085 GetTensorData<float>(output),
1086 CpuBackendContext::GetFromContext(context));
1087 }
1088 return kTfLiteOk;
1089 }
1090
1091 template <typename In, typename Out>
SoftmaxQuantized(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,SoftmaxOpData * data,KernelType kernel_type=kGenericOptimized)1092 TfLiteStatus SoftmaxQuantized(TfLiteContext* context, const TfLiteTensor* input,
1093 TfLiteTensor* output, SoftmaxOpData* data,
1094 KernelType kernel_type = kGenericOptimized) {
1095 if (kernel_type == kReference) {
1096 reference_ops::Softmax(data->params, GetTensorShape(input),
1097 GetTensorData<In>(input), GetTensorShape(output),
1098 GetTensorData<Out>(output));
1099 } else {
1100 optimized_ops::Softmax(data->params, GetTensorShape(input),
1101 GetTensorData<In>(input), GetTensorShape(output),
1102 GetTensorData<Out>(output));
1103 }
1104 return kTfLiteOk;
1105 }
1106
1107 template <>
SoftmaxQuantized(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,SoftmaxOpData * data,KernelType kernel_type)1108 TfLiteStatus SoftmaxQuantized<int8_t, int8_t>(TfLiteContext* context,
1109 const TfLiteTensor* input,
1110 TfLiteTensor* output,
1111 SoftmaxOpData* data,
1112 KernelType kernel_type) {
1113 if (kernel_type == kReference) {
1114 reference_ops::Softmax(data->params, GetTensorShape(input),
1115 GetTensorData<int8_t>(input), GetTensorShape(output),
1116 GetTensorData<int8_t>(output));
1117 } else {
1118 #ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
1119 optimized_ops::SoftmaxInt8LUT(
1120 data->params, GetTensorShape(input), GetTensorData<int8_t>(input),
1121 GetTensorShape(output), GetTensorData<int8_t>(output));
1122 #else
1123 optimized_ops::Softmax(data->params, GetTensorShape(input),
1124 GetTensorData<int8_t>(input), GetTensorShape(output),
1125 GetTensorData<int8_t>(output));
1126 #endif
1127 }
1128 return kTfLiteOk;
1129 }
1130
1131 template <>
SoftmaxQuantized(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,SoftmaxOpData * data,KernelType kernel_type)1132 TfLiteStatus SoftmaxQuantized<uint8_t, uint8_t>(TfLiteContext* context,
1133 const TfLiteTensor* input,
1134 TfLiteTensor* output,
1135 SoftmaxOpData* data,
1136 KernelType kernel_type) {
1137 if (kernel_type == kReference) {
1138 reference_ops::Softmax(
1139 data->params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1140 GetTensorShape(output), GetTensorData<uint8_t>(output));
1141 } else {
1142 #ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
1143 optimized_ops::SoftmaxInt8LUT(
1144 data->params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1145 GetTensorShape(output), GetTensorData<uint8_t>(output));
1146 #else
1147 optimized_ops::Softmax(data->params, GetTensorShape(input),
1148 GetTensorData<uint8_t>(input), GetTensorShape(output),
1149 GetTensorData<uint8_t>(output));
1150 #endif
1151 }
1152 return kTfLiteOk;
1153 }
1154
1155 template <>
SoftmaxQuantized(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,SoftmaxOpData * data,KernelType kernel_type)1156 TfLiteStatus SoftmaxQuantized<int16, int16>(TfLiteContext* context,
1157 const TfLiteTensor* input,
1158 TfLiteTensor* output,
1159 SoftmaxOpData* data,
1160 KernelType kernel_type) {
1161 if (NumDimensions(input) >= 1 && NumDimensions(input) <= 4) {
1162 reference_ops::SoftmaxInt16(
1163 data->params, GetTensorShape(input), GetTensorData<int16_t>(input),
1164 GetTensorShape(output), GetTensorData<int16_t>(output));
1165 return kTfLiteOk;
1166 } else {
1167 TF_LITE_KERNEL_LOG(context,
1168 "Only 1D, 2D, 3D and 4D tensors supported for int16 "
1169 "input with int16 output, got %dD.",
1170 NumDimensions(input));
1171 return kTfLiteError;
1172 }
1173 }
1174
1175 template <KernelType kernel_type>
SoftmaxEval(TfLiteContext * context,TfLiteNode * node)1176 TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
1177 auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
1178 SoftmaxOpData* data = reinterpret_cast<SoftmaxOpData*>(node->user_data);
1179
1180 const TfLiteTensor* input;
1181 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1182 TfLiteTensor* output;
1183 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1184
1185 switch (input->type) {
1186 case kTfLiteFloat32: {
1187 return SoftmaxFloat(context, input, output, params, kernel_type);
1188 }
1189 case kTfLiteUInt8: {
1190 switch (output->type) {
1191 case kTfLiteUInt8:
1192 return SoftmaxQuantized<uint8_t, uint8_t>(context, input, output,
1193 data, kernel_type);
1194 case kTfLiteInt16:
1195 return SoftmaxQuantized<uint8_t, int16_t>(context, input, output,
1196 data, kernel_type);
1197 default:
1198 TF_LITE_KERNEL_LOG(context,
1199 "Only uint8_t and int16_t outputs are supported "
1200 "with uint8_t inputs currently, got %s.",
1201 TfLiteTypeGetName(output->type));
1202 return kTfLiteError;
1203 }
1204 }
1205 case kTfLiteInt8: {
1206 switch (output->type) {
1207 case kTfLiteInt8:
1208 return SoftmaxQuantized<int8_t, int8_t>(context, input, output, data,
1209 kernel_type);
1210 case kTfLiteInt16:
1211 return SoftmaxQuantized<int8_t, int16_t>(context, input, output, data,
1212 kernel_type);
1213 default:
1214 TF_LITE_KERNEL_LOG(context,
1215 "Only int8_t and int16_t outputs are supported "
1216 "with int8_t inputs currently, got %s.",
1217 TfLiteTypeGetName(output->type));
1218 return kTfLiteError;
1219 }
1220 }
1221 case kTfLiteInt16: {
1222 return SoftmaxQuantized<int16_t, int16_t>(context, input, output, data,
1223 kernel_type);
1224 }
1225
1226 default:
1227 TF_LITE_KERNEL_LOG(context,
1228 "Only float32, uint8_t, Int8_t, Int16_t are supported "
1229 "currently, got %s.",
1230 TfLiteTypeGetName(input->type));
1231 return kTfLiteError;
1232 }
1233 }
1234
1235 template <KernelType kernel_type>
LogSoftmaxEval(TfLiteContext * context,TfLiteNode * node)1236 TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
1237 const LogSoftmaxOpData* data =
1238 reinterpret_cast<LogSoftmaxOpData*>(node->user_data);
1239 const TfLiteTensor* input;
1240 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1241 TfLiteTensor* output;
1242 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1243 switch (input->type) {
1244 case kTfLiteFloat32: {
1245 SoftmaxParams op_params;
1246 if (kernel_type == kGenericOptimized) {
1247 optimized_ops::LogSoftmax(
1248 op_params, GetTensorShape(input), GetTensorData<float>(input),
1249 GetTensorShape(output), GetTensorData<float>(output));
1250 } else {
1251 reference_ops::LogSoftmax(
1252 op_params, GetTensorShape(input), GetTensorData<float>(input),
1253 GetTensorShape(output), GetTensorData<float>(output));
1254 }
1255 return kTfLiteOk;
1256 }
1257 case kTfLiteUInt8: {
1258 const SoftmaxParams& op_params = data->params;
1259 if (kernel_type == kGenericOptimized) {
1260 optimized_ops::LogSoftmax(
1261 op_params, input->params.scale, GetTensorShape(input),
1262 GetTensorData<uint8_t>(input), GetTensorShape(output),
1263 GetTensorData<uint8_t>(output));
1264 } else {
1265 reference_ops::LogSoftmax(
1266 op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1267 GetTensorShape(output), GetTensorData<uint8_t>(output));
1268 }
1269 return kTfLiteOk;
1270 }
1271 case kTfLiteInt8: {
1272 const SoftmaxParams& op_params = data->params;
1273 if (kernel_type == kGenericOptimized) {
1274 optimized_ops::LogSoftmax(
1275 op_params, input->params.scale, GetTensorShape(input),
1276 GetTensorData<int8_t>(input), GetTensorShape(output),
1277 GetTensorData<int8_t>(output));
1278 } else {
1279 const auto input_shape = GetTensorShape(input);
1280 const auto output_shape = GetTensorShape(output);
1281 const int trailing_dim = input_shape.DimensionsCount() - 1;
1282 const int outer_size =
1283 MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
1284 const int depth =
1285 MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
1286 reference_integer_ops::LogSoftmax(
1287 op_params.input_multiplier, op_params.input_left_shift,
1288 op_params.reverse_scaling_divisor,
1289 op_params.reverse_scaling_right_shift, op_params.diff_min,
1290 outer_size, depth, GetTensorData<int8_t>(input),
1291 GetTensorData<int8_t>(output));
1292 }
1293 return kTfLiteOk;
1294 }
1295 default:
1296 TF_LITE_KERNEL_LOG(
1297 context,
1298 "Only float32, uint8 and int8 are supported currently, got %s.",
1299 TfLiteTypeGetName(input->type));
1300 return kTfLiteError;
1301 }
1302 }
1303
1304 template <typename T>
ApplyPrelu(T input,T alpha)1305 T ApplyPrelu(T input, T alpha) {
1306 return input >= 0.0 ? input : input * alpha;
1307 }
1308
1309 template <KernelType kernel_type>
PreluEval(TfLiteContext * context,TfLiteNode * node)1310 TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
1311 const TfLiteTensor* input;
1312 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1313 const TfLiteTensor* alpha;
1314 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &alpha));
1315 TfLiteTensor* output;
1316 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1317 const PreluOpData* data = reinterpret_cast<PreluOpData*>(node->user_data);
1318 switch (input->type) {
1319 case kTfLiteFloat32: {
1320 if (kernel_type == kGenericOptimized) {
1321 tflite::ArithmeticParams op_params;
1322 bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
1323 GetTensorShape(input), GetTensorShape(alpha), &op_params);
1324 if (need_broadcast) {
1325 optimized_ops::BroadcastPReluDispatch(
1326 op_params, GetTensorShape(input), GetTensorData<float>(input),
1327 GetTensorShape(alpha), GetTensorData<float>(alpha),
1328 GetTensorShape(output), GetTensorData<float>(output),
1329 ApplyPrelu<float>);
1330 } else {
1331 const int flat_size =
1332 MatchingElementsSize(GetTensorShape(input), GetTensorShape(alpha),
1333 GetTensorShape(output));
1334 optimized_ops::PReluElementWise(
1335 flat_size, op_params, GetTensorData<float>(alpha),
1336 GetTensorData<float>(input), GetTensorData<float>(output));
1337 }
1338 } else {
1339 if (data->requires_broadcast) {
1340 reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
1341 GetTensorShape(input), GetTensorData<float>(input),
1342 GetTensorShape(alpha), GetTensorData<float>(alpha),
1343 GetTensorShape(output), GetTensorData<float>(output),
1344 ApplyPrelu<float>);
1345 } else {
1346 reference_ops::BinaryFunction<float, float, float>(
1347 GetTensorShape(input), GetTensorData<float>(input),
1348 GetTensorShape(alpha), GetTensorData<float>(alpha),
1349 GetTensorShape(output), GetTensorData<float>(output),
1350 ApplyPrelu<float>);
1351 }
1352 }
1353 return kTfLiteOk;
1354 }
1355 case kTfLiteUInt8: {
1356 PreluParams op_params;
1357 op_params.input_offset = -input->params.zero_point;
1358 op_params.alpha_offset = -alpha->params.zero_point;
1359 op_params.output_offset = output->params.zero_point;
1360 op_params.output_multiplier_1 = data->output_multiplier_1;
1361 op_params.output_shift_1 = data->output_shift_1;
1362 op_params.output_multiplier_2 = data->output_multiplier_2;
1363 op_params.output_shift_2 = data->output_shift_2;
1364 if (data->requires_broadcast) {
1365 reference_ops::BroadcastPrelu4DSlow(
1366 op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1367 GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
1368 GetTensorShape(output), GetTensorData<uint8_t>(output));
1369 } else {
1370 reference_ops::Prelu(
1371 op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1372 GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
1373 GetTensorShape(output), GetTensorData<uint8_t>(output));
1374 }
1375 return kTfLiteOk;
1376 }
1377 case kTfLiteInt8: {
1378 PreluParams op_params;
1379 op_params.input_offset = -input->params.zero_point;
1380 op_params.alpha_offset = -alpha->params.zero_point;
1381 op_params.output_offset = output->params.zero_point;
1382 op_params.output_multiplier_1 = data->output_multiplier_1;
1383 op_params.output_shift_1 = data->output_shift_1;
1384 op_params.output_multiplier_2 = data->output_multiplier_2;
1385 op_params.output_shift_2 = data->output_shift_2;
1386 if (data->requires_broadcast) {
1387 reference_ops::BroadcastPrelu4DSlow(
1388 op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
1389 GetTensorShape(alpha), GetTensorData<int8_t>(alpha),
1390 GetTensorShape(output), GetTensorData<int8_t>(output));
1391 } else {
1392 reference_ops::Prelu(
1393 op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
1394 GetTensorShape(alpha), GetTensorData<int8_t>(alpha),
1395 GetTensorShape(output), GetTensorData<int8_t>(output));
1396 }
1397 return kTfLiteOk;
1398 }
1399 default:
1400 TF_LITE_KERNEL_LOG(
1401 context,
1402 "Only float32 and uint8 and int8 are supported currently, got %d.",
1403 TfLiteTypeGetName(input->type));
1404 return kTfLiteError;
1405 }
1406 }
1407
1408 template <typename T>
QuantizeLeakyRelu(const TfLiteTensor * input,TfLiteTensor * output,const LeakyReluOpData * data)1409 void QuantizeLeakyRelu(const TfLiteTensor* input, TfLiteTensor* output,
1410 const LeakyReluOpData* data) {
1411 LeakyReluParams op_params;
1412
1413 op_params.input_offset = input->params.zero_point;
1414 op_params.output_offset = output->params.zero_point;
1415 op_params.output_multiplier_alpha = data->output_multiplier_alpha;
1416 op_params.output_shift_alpha = data->output_shift_alpha;
1417 op_params.output_multiplier_identity = data->output_multiplier_identity;
1418 op_params.output_shift_identity = data->output_shift_identity;
1419 reference_ops::QuantizeLeakyRelu(
1420 op_params, GetTensorShape(input), GetTensorData<T>(input),
1421 GetTensorShape(output), GetTensorData<T>(output));
1422 }
1423
LeakyReluEval(TfLiteContext * context,TfLiteNode * node)1424 TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
1425 const TfLiteTensor* input;
1426 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1427 TfLiteTensor* output;
1428 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1429 const auto* params =
1430 reinterpret_cast<TfLiteLeakyReluParams*>(node->builtin_data);
1431 const LeakyReluOpData* data =
1432 reinterpret_cast<LeakyReluOpData*>(node->user_data);
1433
1434 LeakyReluParams op_params;
1435 switch (input->type) {
1436 case kTfLiteFloat32: {
1437 op_params.alpha = params->alpha;
1438 optimized_ops::LeakyRelu(
1439 op_params, GetTensorShape(input), GetTensorData<float>(input),
1440 GetTensorShape(output), GetTensorData<float>(output));
1441 return kTfLiteOk;
1442 }
1443 case kTfLiteUInt8: {
1444 QuantizeLeakyRelu<uint8_t>(input, output, data);
1445 return kTfLiteOk;
1446 }
1447 case kTfLiteInt8: {
1448 QuantizeLeakyRelu<int8_t>(input, output, data);
1449 return kTfLiteOk;
1450 }
1451 case kTfLiteInt16: {
1452 QuantizeLeakyRelu<int16_t>(input, output, data);
1453 return kTfLiteOk;
1454 }
1455 default:
1456 TF_LITE_KERNEL_LOG(
1457 context,
1458 "Only float32, int8, int16 and uint8 is supported currently, got %s.",
1459 TfLiteTypeGetName(input->type));
1460 return kTfLiteError;
1461 }
1462 }
1463
EluPrepare(TfLiteContext * context,TfLiteNode * node)1464 TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) {
1465 const TfLiteTensor* input;
1466 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1467 TfLiteTensor* output;
1468 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1469 OpData* data = reinterpret_cast<OpData*>(node->user_data);
1470
1471 // Use LUT to handle quantized elu path.
1472 if (input->type == kTfLiteInt8) {
1473 PopulateLookupTable<int8_t>(data, input, output, [](float value) {
1474 return value < 0.0 ? std::exp(value) - 1.0f : value;
1475 });
1476 }
1477 return GenericPrepare(context, node);
1478 }
1479
EluEval(TfLiteContext * context,TfLiteNode * node)1480 TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
1481 const TfLiteTensor* input;
1482 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1483 TfLiteTensor* output;
1484 TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1485 switch (input->type) {
1486 case kTfLiteFloat32: {
1487 optimized_ops::Elu(GetTensorShape(input), GetTensorData<float>(input),
1488 GetTensorShape(output), GetTensorData<float>(output));
1489 return kTfLiteOk;
1490 }
1491 case kTfLiteInt8: {
1492 OpData* data = reinterpret_cast<OpData*>(node->user_data);
1493 EvalUsingLookupTable(data, input, output);
1494 return kTfLiteOk;
1495 }
1496 default:
1497 TF_LITE_KERNEL_LOG(
1498 context, "Only float32 and int8 is supported currently, got %s.",
1499 TfLiteTypeGetName(input->type));
1500 return kTfLiteError;
1501 }
1502 }
1503
1504 } // namespace activations
1505
Register_ELU()1506 TfLiteRegistration* Register_ELU() {
1507 static TfLiteRegistration r = {activations::Init, activations::Free,
1508 activations::EluPrepare, activations::EluEval};
1509 return &r;
1510 }
1511
Register_RELU()1512 TfLiteRegistration* Register_RELU() {
1513 static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
1514 activations::ReluPrepare,
1515 activations::ReluEval};
1516 return &r;
1517 }
1518
Register_RELU_N1_TO_1()1519 TfLiteRegistration* Register_RELU_N1_TO_1() {
1520 static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
1521 activations::ReluPrepare,
1522 activations::Relu1Eval};
1523 return &r;
1524 }
1525
Register_RELU6()1526 TfLiteRegistration* Register_RELU6() {
1527 static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
1528 activations::ReluPrepare,
1529 activations::Relu6Eval};
1530 return &r;
1531 }
1532
Register_TANH_REF()1533 TfLiteRegistration* Register_TANH_REF() {
1534 static TfLiteRegistration r = {
1535 activations::Init, activations::Free,
1536 activations::TanhPrepare<activations::kReference>,
1537 activations::TanhEval<activations::kReference>};
1538 return &r;
1539 }
1540
Register_TANH_GENERIC_OPT()1541 TfLiteRegistration* Register_TANH_GENERIC_OPT() {
1542 static TfLiteRegistration r = {
1543 activations::Init, activations::Free,
1544 activations::TanhPrepare<activations::kGenericOptimized>,
1545 activations::TanhEval<activations::kGenericOptimized>};
1546 return &r;
1547 }
1548
Register_TANH_FIXED_POINT_OPT()1549 TfLiteRegistration* Register_TANH_FIXED_POINT_OPT() {
1550 static TfLiteRegistration r = {
1551 activations::Init, activations::Free,
1552 activations::TanhPrepare<activations::kFixedPointOptimized>,
1553 activations::TanhEval<activations::kFixedPointOptimized>};
1554 return &r;
1555 }
1556
Register_TANH()1557 TfLiteRegistration* Register_TANH() {
1558 // TODO(b/134622898): Switch over from the LUT optimized method to the fixed
1559 // point optimized method when typical Android hardware performs better on
1560 // the latter one.
1561 return Register_TANH_GENERIC_OPT();
1562 }
1563
Register_LOGISTIC_REF()1564 TfLiteRegistration* Register_LOGISTIC_REF() {
1565 static TfLiteRegistration r = {
1566 activations::Init, activations::Free,
1567 activations::SigmoidPrepare<activations::kReference>,
1568 activations::SigmoidEval<activations::kReference>};
1569 return &r;
1570 }
1571
Register_LOGISTIC_GENERIC_OPT()1572 TfLiteRegistration* Register_LOGISTIC_GENERIC_OPT() {
1573 static TfLiteRegistration r = {
1574 activations::Init, activations::Free,
1575 activations::SigmoidPrepare<activations::kGenericOptimized>,
1576 activations::SigmoidEval<activations::kGenericOptimized>};
1577 return &r;
1578 }
1579
Register_LOGISTIC_FIXED_POINT_OPT()1580 TfLiteRegistration* Register_LOGISTIC_FIXED_POINT_OPT() {
1581 static TfLiteRegistration r = {
1582 activations::Init, activations::Free,
1583 activations::SigmoidPrepare<activations::kFixedPointOptimized>,
1584 activations::SigmoidEval<activations::kFixedPointOptimized>};
1585 return &r;
1586 }
1587
Register_LOGISTIC()1588 TfLiteRegistration* Register_LOGISTIC() {
1589 // TODO(b/134622898): Switch over from the LUT optimized method to the fixed
1590 // point optimized method when typical Android hardware performs better on
1591 // the latter one.
1592 return Register_LOGISTIC_GENERIC_OPT();
1593 }
1594
Register_SOFTMAX_REF()1595 TfLiteRegistration* Register_SOFTMAX_REF() {
1596 static TfLiteRegistration r = {
1597 activations::SoftmaxInit, activations::SoftmaxFree,
1598 activations::SoftmaxPrepare<activations::kReference>,
1599 activations::SoftmaxEval<activations::kReference>};
1600 return &r;
1601 }
1602
Register_SOFTMAX()1603 TfLiteRegistration* Register_SOFTMAX() {
1604 static TfLiteRegistration r = {
1605 activations::SoftmaxInit, activations::SoftmaxFree,
1606 activations::SoftmaxPrepare<activations::kGenericOptimized>,
1607 activations::SoftmaxEval<activations::kGenericOptimized>};
1608 return &r;
1609 }
1610
Register_LOG_SOFTMAX_REF()1611 TfLiteRegistration* Register_LOG_SOFTMAX_REF() {
1612 static TfLiteRegistration r = {
1613 activations::LogSoftmaxInit, activations::LogSoftmaxFree,
1614 activations::LogSoftmaxPrepare<activations::kReference>,
1615 activations::LogSoftmaxEval<activations::kReference>};
1616 return &r;
1617 }
1618
Register_LOG_SOFTMAX()1619 TfLiteRegistration* Register_LOG_SOFTMAX() {
1620 static TfLiteRegistration r = {
1621 activations::LogSoftmaxInit, activations::LogSoftmaxFree,
1622 activations::LogSoftmaxPrepare<activations::kGenericOptimized>,
1623 activations::LogSoftmaxEval<activations::kGenericOptimized>};
1624 return &r;
1625 }
1626
Register_PRELU_REF()1627 TfLiteRegistration* Register_PRELU_REF() {
1628 static TfLiteRegistration r = {
1629 activations::PreluInit, activations::PreluFree, activations::PreluPrepare,
1630 activations::PreluEval<activations::kReference>};
1631 return &r;
1632 }
1633
Register_PRELU()1634 TfLiteRegistration* Register_PRELU() {
1635 static TfLiteRegistration r = {
1636 activations::PreluInit, activations::PreluFree, activations::PreluPrepare,
1637 activations::PreluEval<activations::kGenericOptimized>};
1638 return &r;
1639 }
1640
Register_LEAKY_RELU()1641 TfLiteRegistration* Register_LEAKY_RELU() {
1642 static TfLiteRegistration r = {
1643 activations::LeakyReluInit, activations::LeakyReluFree,
1644 activations::LeakyReluPrepare, activations::LeakyReluEval};
1645 return &r;
1646 }
1647
Register_HARD_SWISH()1648 TfLiteRegistration* Register_HARD_SWISH() {
1649 static TfLiteRegistration r = {
1650 activations::HardSwishInit, activations::HardSwishFree,
1651 activations::HardSwishPrepare,
1652 activations::HardSwishEval<activations::kGenericOptimized>};
1653 return &r;
1654 }
1655
Register_HARD_SWISH_REF()1656 TfLiteRegistration* Register_HARD_SWISH_REF() {
1657 static TfLiteRegistration r = {
1658 activations::HardSwishInit, activations::HardSwishFree,
1659 activations::HardSwishPrepare,
1660 activations::HardSwishEval<activations::kReference>};
1661 return &r;
1662 }
1663
1664 } // namespace builtin
1665 } // namespace ops
1666 } // namespace tflite
1667