• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <stddef.h>
16 
17 #include <algorithm>
18 #include <cmath>
19 #include <cstdint>
20 #include <functional>
21 #include <limits>
22 
23 #include "tensorflow/lite/c/builtin_op_data.h"
24 #include "tensorflow/lite/c/common.h"
25 #include "tensorflow/lite/kernels/cpu_backend_context.h"
26 #include "tensorflow/lite/kernels/internal/common.h"
27 #include "tensorflow/lite/kernels/internal/compatibility.h"
28 #include "tensorflow/lite/kernels/internal/cppmath.h"
29 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
30 #include "tensorflow/lite/kernels/internal/quantization_util.h"
31 #include "tensorflow/lite/kernels/internal/reference/binary_function.h"
32 #include "tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h"
33 #include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
34 #include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
35 #include "tensorflow/lite/kernels/internal/reference/logistic.h"
36 #include "tensorflow/lite/kernels/internal/reference/prelu.h"
37 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
38 #include "tensorflow/lite/kernels/internal/reference/softmax.h"
39 #include "tensorflow/lite/kernels/internal/reference/tanh.h"
40 #include "tensorflow/lite/kernels/internal/tensor.h"
41 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
42 #include "tensorflow/lite/kernels/internal/types.h"
43 #include "tensorflow/lite/kernels/kernel_util.h"
44 
45 #if __aarch64__ && __clang__
46 #include <arm_neon.h>
47 #endif
48 
49 namespace tflite {
50 namespace ops {
51 namespace builtin {
52 namespace activations {
53 
54 // TODO(b/142762739): We should figure out a multi-threading plan for most of
55 // the activation ops below.
56 
57 enum KernelType {
58   kReference,
59   kGenericOptimized,
60   kFixedPointOptimized,
61 };
62 
63 struct OpData {
64   int32_t input_multiplier = 0;
65   int input_left_shift = 0;
66   int32_t input_range_radius = 0;
67   int diff_min = 0;
68   uint8_t table[256] = {0};
69 };
70 
71 struct SoftmaxOpData {
72   struct SoftmaxParams params = {};
73   float table[256];
74 #ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
75   uint8_t uint8_table1[256];
76   uint8_t uint8_table2[256];
77 #endif
78   static constexpr int kInt16LUTArraySize = lut_size<int16_t>();
79   int16_t exp_lut[kInt16LUTArraySize];  // int16 LUT for exp(x), where x uniform
80                                         // distributed between [-10.0 , 0.0]
81   int16_t one_over_one_plus_x_lut[kInt16LUTArraySize];  // int16 LUT for 1 /
82                                                         // (1 + x), where x
83                                                         // uniform distributed
84                                                         // between [0.0 , 1.0]
85 };
86 
87 struct LogSoftmaxOpData : public OpData {
88   int32_t reverse_scaling_divisor = 0;
89   int32_t reverse_scaling_right_shift = 0;
90   struct SoftmaxParams params = {};
91   float f_table[256];
92 };
93 
94 struct LeakyReluOpData : public OpData {
95   int32_t output_multiplier_alpha = 0;
96   int32_t output_shift_alpha = 0;
97   int32_t output_multiplier_identity = 0;
98   int32_t output_shift_identity = 0;
99 };
100 
101 struct PreluOpData : public OpData {
102   int32_t output_multiplier_1 = 0;
103   int32_t output_shift_1 = 0;
104   int32_t output_multiplier_2 = 0;
105   int32_t output_shift_2 = 0;
106   bool requires_broadcast;
107 };
108 
109 struct HardSwishData {
110   HardSwishParams params;
111 };
112 
113 struct ReluOpData : public OpData {
114   int32_t output_multiplier = 0;
115   int output_shift = 0;
116 };
117 
118 namespace {
119 template <typename T>
PopulateLookupTable(struct OpData * data,const TfLiteTensor * input,TfLiteTensor * output,const std::function<float (float)> & transform)120 void PopulateLookupTable(struct OpData* data, const TfLiteTensor* input,
121                          TfLiteTensor* output,
122                          const std::function<float(float)>& transform) {
123   static_assert(sizeof(T) == 1, "Lookup table valid only for 8bit");
124   const float inverse_scale = 1 / output->params.scale;
125   int32_t maxval = std::numeric_limits<T>::max();
126   int32_t minval = std::numeric_limits<T>::min();
127   for (int32_t val = minval; val <= maxval; ++val) {
128     const float dequantized =
129         input->params.scale * (val - input->params.zero_point);
130     const float transformed = transform(dequantized);
131     const float rescaled = std::round(transformed * inverse_scale);
132     const int32_t quantized =
133         static_cast<int32_t>(rescaled + output->params.zero_point);
134     data->table[static_cast<uint8_t>(static_cast<T>(val))] =
135         static_cast<uint8_t>(
136             static_cast<T>(std::max(std::min(maxval, quantized), minval)));
137   }
138 }
139 
140 // TODO(b/143696793): move this to optimized_ops.
EvalUsingLookupTable(struct OpData * data,const TfLiteTensor * input,TfLiteTensor * output)141 void EvalUsingLookupTable(struct OpData* data, const TfLiteTensor* input,
142                           TfLiteTensor* output) {
143   const int size =
144       MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
145   uint8_t* output_data = GetTensorData<uint8_t>(output);
146   const uint8_t* input_data = GetTensorData<uint8_t>(input);
147   int i = 0;
148 #if __aarch64__ && __clang__
149   // This code uses ARM64-only instructions.
150   // TODO(b/143709993): Port to ARMv7
151 
152   // Load the tables into registers. (4*4 128-bit registers)
153   uint8x16x4_t table[4];
154   table[0] = vld1q_u8_x4(data->table + 16 * 4 * 0);
155   table[1] = vld1q_u8_x4(data->table + 16 * 4 * 1);
156   table[2] = vld1q_u8_x4(data->table + 16 * 4 * 2);
157   table[3] = vld1q_u8_x4(data->table + 16 * 4 * 3);
158 
159   // Vectorized loop; process uint8x16_t (16 elements) at a time.
160   constexpr int vectorized_16_loop_step = 16;
161   const int vectorized_16_loop_end =
162       size / vectorized_16_loop_step * vectorized_16_loop_step;
163   for (; i < vectorized_16_loop_end; i += vectorized_16_loop_step) {
164     uint8x16_t input = vld1q_u8(input_data + i);
165     uint8x16_t output = optimized_ops::aarch64_lookup_vector(table, input);
166     vst1q_u8(output_data + i, output);
167   }
168   // Postamble and non-ARM64 code: simple for loop.
169 #endif
170   for (; i < size; ++i) {
171     output_data[i] = data->table[input_data[i]];
172   }
173 }
174 
175 template <typename T>
QuantizedReluX(float act_min,float act_max,const TfLiteTensor * input,TfLiteTensor * output,const ReluOpData * data)176 void QuantizedReluX(float act_min, float act_max, const TfLiteTensor* input,
177                     TfLiteTensor* output, const ReluOpData* data) {
178   ReluParams params;
179   params.quantized_activation_min =
180       std::max(static_cast<int32_t>(std::numeric_limits<T>::min()),
181                output->params.zero_point +
182                    static_cast<int32>(roundf(act_min / output->params.scale)));
183   params.quantized_activation_max =
184       act_max == std::numeric_limits<float>::infinity()
185           ? static_cast<int32_t>(std::numeric_limits<T>::max())
186           : std::min(
187                 static_cast<int32_t>(std::numeric_limits<T>::max()),
188                 output->params.zero_point +
189                     static_cast<int32>(roundf(act_max / output->params.scale)));
190   params.input_offset = input->params.zero_point;
191   params.output_offset = output->params.zero_point;
192   params.output_multiplier = data->output_multiplier;
193   params.output_shift = data->output_shift;
194   optimized_ops::ReluX(params, GetTensorShape(input), GetTensorData<T>(input),
195                        GetTensorShape(output), GetTensorData<T>(output));
196 }
197 
198 }  // namespace
199 
Init(TfLiteContext * context,const char * buffer,size_t length)200 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
201   // This is a builtin op, so we don't use the contents in 'buffer', if any.
202   // Instead, we allocate a new object to carry information from Prepare() to
203   // Eval().
204   return new OpData;
205 }
206 
SoftmaxInit(TfLiteContext * context,const char * buffer,size_t length)207 void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
208   return new SoftmaxOpData;
209 }
210 
SoftmaxFree(TfLiteContext * context,void * buffer)211 void SoftmaxFree(TfLiteContext* context, void* buffer) {
212   delete reinterpret_cast<SoftmaxOpData*>(buffer);
213 }
214 
LogSoftmaxInit(TfLiteContext * context,const char * buffer,size_t length)215 void* LogSoftmaxInit(TfLiteContext* context, const char* buffer,
216                      size_t length) {
217   return new LogSoftmaxOpData;
218 }
219 
PreluInit(TfLiteContext * context,const char * buffer,size_t length)220 void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
221   return new PreluOpData;
222 }
223 
Free(TfLiteContext * context,void * buffer)224 void Free(TfLiteContext* context, void* buffer) {
225   delete reinterpret_cast<OpData*>(buffer);
226 }
227 
LogSoftmaxFree(TfLiteContext * context,void * buffer)228 void LogSoftmaxFree(TfLiteContext* context, void* buffer) {
229   delete reinterpret_cast<LogSoftmaxOpData*>(buffer);
230 }
231 
PreluFree(TfLiteContext * context,void * buffer)232 void PreluFree(TfLiteContext* context, void* buffer) {
233   delete reinterpret_cast<PreluOpData*>(buffer);
234 }
235 
HardSwishInit(TfLiteContext * context,const char * buffer,size_t length)236 void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
237   return new HardSwishData;
238 }
239 
GenericPrepare(TfLiteContext * context,TfLiteNode * node)240 TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
241   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
242   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
243   const TfLiteTensor* input;
244   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
245   TfLiteTensor* output;
246   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
247   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
248 
249   return context->ResizeTensor(context, output,
250                                TfLiteIntArrayCopy(input->dims));
251 }
252 
ReluInit(TfLiteContext * context,const char * buffer,size_t length)253 void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
254   return new ReluOpData;
255 }
256 
ReluFree(TfLiteContext * context,void * buffer)257 void ReluFree(TfLiteContext* context, void* buffer) {
258   delete reinterpret_cast<ReluOpData*>(buffer);
259 }
260 
ReluPrepare(TfLiteContext * context,TfLiteNode * node)261 TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
262   ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
263   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
264   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
265   const TfLiteTensor* input;
266   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
267   TfLiteTensor* output;
268   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
269   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
270 
271   if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
272       input->type == kTfLiteInt16) {
273     double real_multiplier = input->params.scale / output->params.scale;
274     QuantizeMultiplier(real_multiplier, &data->output_multiplier,
275                        &data->output_shift);
276   }
277 
278   if (input->type == kTfLiteInt16) {
279     TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
280     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
281   }
282 
283   return context->ResizeTensor(context, output,
284                                TfLiteIntArrayCopy(input->dims));
285 }
286 
LeakyReluInit(TfLiteContext * context,const char * buffer,size_t length)287 void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
288   return new LeakyReluOpData;
289 }
290 
LeakyReluFree(TfLiteContext * context,void * buffer)291 void LeakyReluFree(TfLiteContext* context, void* buffer) {
292   delete reinterpret_cast<LeakyReluOpData*>(buffer);
293 }
294 
HardSwishFree(TfLiteContext * context,void * buffer)295 void HardSwishFree(TfLiteContext* context, void* buffer) {
296   delete static_cast<HardSwishData*>(buffer);
297 }
298 
HardSwishPrepare(TfLiteContext * context,TfLiteNode * node)299 TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
300   TF_LITE_ENSURE_STATUS(GenericPrepare(context, node));
301   TfLiteTensor* output;
302   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
303 
304   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
305     HardSwishData* data = static_cast<HardSwishData*>(node->user_data);
306     HardSwishParams* params = &data->params;
307     const TfLiteTensor* input;
308     TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
309     params->input_zero_point = input->params.zero_point;
310     params->output_zero_point = output->params.zero_point;
311     const float input_scale = input->params.scale;
312     const float hires_input_scale = (1.0f / 128.0f) * input_scale;
313     const float reluish_scale = 3.0f / 32768.0f;
314     const float output_scale = output->params.scale;
315 
316     const float output_multiplier = hires_input_scale / output_scale;
317 
318     int32_t output_multiplier_fixedpoint_int32;
319     QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
320                        &params->output_multiplier_exponent);
321     DownScaleInt32ToInt16Multiplier(
322         output_multiplier_fixedpoint_int32,
323         &params->output_multiplier_fixedpoint_int16);
324     TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
325 
326     const float reluish_multiplier = hires_input_scale / reluish_scale;
327     int32_t reluish_multiplier_fixedpoint_int32;
328     QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
329                        &params->reluish_multiplier_exponent);
330     DownScaleInt32ToInt16Multiplier(
331         reluish_multiplier_fixedpoint_int32,
332         &params->reluish_multiplier_fixedpoint_int16);
333   }
334   return kTfLiteOk;
335 }
336 
LeakyReluPrepare(TfLiteContext * context,TfLiteNode * node)337 TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
338   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
339   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
340   const TfLiteTensor* input;
341   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
342   TfLiteTensor* output;
343   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
344   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
345 
346   LeakyReluOpData* data = reinterpret_cast<LeakyReluOpData*>(node->user_data);
347 
348   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
349       output->type == kTfLiteInt16) {
350     const auto* params =
351         reinterpret_cast<TfLiteLeakyReluParams*>(node->builtin_data);
352 
353     double alpha_multiplier =
354         input->params.scale * params->alpha / output->params.scale;
355     QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha,
356                        &data->output_shift_alpha);
357     double identity_multiplier = input->params.scale / output->params.scale;
358     QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
359                        &data->output_shift_identity);
360   }
361 
362   if (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) {
363     TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
364     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
365   }
366 
367   return context->ResizeTensor(context, output,
368                                TfLiteIntArrayCopy(input->dims));
369 }
370 
371 template <KernelType kernel_type>
TanhPrepare(TfLiteContext * context,TfLiteNode * node)372 TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
373   OpData* data = reinterpret_cast<OpData*>(node->user_data);
374 
375   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
376   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
377   const TfLiteTensor* input;
378   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
379   TfLiteTensor* output;
380   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
381   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
382 
383   if (kernel_type == kFixedPointOptimized) {
384     if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
385       static constexpr int kInputIntegerBits = 4;
386 
387       const double input_real_multiplier =
388           input->params.scale *
389           static_cast<double>(1 << (15 - kInputIntegerBits));
390 
391       const double q =
392           std::frexp(input_real_multiplier, &data->input_left_shift);
393       auto q_fixed = static_cast<int32_t>(TfLiteRound(q * (1ll << 15)));
394       data->input_multiplier = static_cast<int16_t>(q_fixed);
395 
396       int16_t input_range_radius =
397           CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 15);
398       data->input_range_radius = input_range_radius;
399     }
400   }
401 
402   if (kernel_type == kGenericOptimized || kernel_type == kReference) {
403     if (input->type == kTfLiteUInt8) {
404       PopulateLookupTable<uint8_t>(
405           data, input, output, [](float value) { return std::tanh(value); });
406     } else if (input->type == kTfLiteInt8) {
407       PopulateLookupTable<int8_t>(data, input, output,
408                                   [](float value) { return std::tanh(value); });
409     }
410   }
411 
412   if (input->type == kTfLiteInt16) {
413     static constexpr int kInputIntegerBits = 3;
414     static constexpr int kOutputFractionalBits = 15;
415 
416     // These operators are implemented in fixed-point arithmetic,
417     // which intrinsically wants symmetric ranges (zero_point==0)
418     // and power-of-two scales (power-of-two is abbreviated below as POT).
419     // While more general support would be possible by means of rescaling,
420     // that would add some overhead and some loss of accuracy and wouldn't
421     // be used at the moment as current quantized LSTM applications are
422     // happy with symmetric, power-of-two-scales quantization. So we just
423     // implement that narrow case only for now.
424 
425     TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
426     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
427 
428     int input_scale_log2_rounded;
429     bool param_scale_pot =
430         CheckedLog2(input->params.scale, &input_scale_log2_rounded);
431 
432     data->input_left_shift =
433         (15 - kInputIntegerBits) + input_scale_log2_rounded;
434     param_scale_pot &=
435         (data->input_left_shift == 0 || data->input_left_shift == 1);
436 
437     if (!param_scale_pot) {
438       // Calculate multiplier to change input scale to 1/(3*4096)
439       // as required by the table lookup.
440       // The number 3.0 in the multiplier comes from here,
441       // because the interval is [-10.7, 10.7] instead of [-8, 8].
442       // So, in this scaling +/-2^17 represents +/-10.7.
443 
444       double multiplier = input->params.scale * 4096.0 * 3.0;
445       data->input_left_shift = 0;
446 
447       while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
448         data->input_left_shift++;
449         multiplier = multiplier * 2.0;
450       }
451 
452       data->input_multiplier = static_cast<int32_t>(multiplier);
453     }
454 
455     int output_scale_log2_rounded;
456     TF_LITE_ENSURE(
457         context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
458     TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
459                       -kOutputFractionalBits);
460   }
461 
462   return context->ResizeTensor(context, output,
463                                TfLiteIntArrayCopy(input->dims));
464 }
465 
466 template <KernelType kernel_type>
SigmoidPrepare(TfLiteContext * context,TfLiteNode * node)467 TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) {
468   OpData* data = reinterpret_cast<OpData*>(node->user_data);
469 
470   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
471   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
472   const TfLiteTensor* input;
473   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
474   TfLiteTensor* output;
475   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
476   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
477 
478   if (kernel_type == kFixedPointOptimized) {
479     if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
480       if (input->type == kTfLiteUInt8) {
481         TF_LITE_ENSURE_EQ(context, output->params.zero_point,
482                           std::numeric_limits<uint8_t>::min());
483       }
484       if (input->type == kTfLiteInt8) {
485         TF_LITE_ENSURE_EQ(context, output->params.zero_point,
486                           std::numeric_limits<int8_t>::min());
487       }
488       TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
489 
490       static constexpr int kInputIntegerBits = 4;
491 
492       const double input_real_multiplier =
493           input->params.scale *
494           static_cast<double>(1 << (15 - kInputIntegerBits));
495 
496       const double q =
497           std::frexp(input_real_multiplier, &data->input_left_shift);
498       auto q_fixed = static_cast<int32_t>(TfLiteRound(q * (1ll << 15)));
499       data->input_multiplier = static_cast<int16_t>(q_fixed);
500 
501       int16_t input_range_radius =
502           CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 15);
503       data->input_range_radius = input_range_radius;
504     }
505   }
506 
507   if (kernel_type == kGenericOptimized || kernel_type == kReference) {
508     if (input->type == kTfLiteUInt8) {
509       TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
510       PopulateLookupTable<uint8_t>(data, input, output, [](float value) {
511         return 1.0f / (1.0f + std::exp(-value));
512       });
513     } else if (input->type == kTfLiteInt8) {
514       TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
515       PopulateLookupTable<int8_t>(data, input, output, [](float value) {
516         return 1.0f / (1.0f + std::exp(-value));
517       });
518     } else if (input->type == kTfLiteInt16) {
519       TF_LITE_ENSURE(context, output->params.scale == 1. / 32768);
520       TF_LITE_ENSURE(context, output->params.zero_point == 0);
521     }
522   }
523 
524   if (input->type == kTfLiteInt16) {
525     static constexpr int kInputIntegerBits = 3;
526     static constexpr int kOutputFractionalBits = 15;
527 
528     // See comments in TanhPrepare about requiring zero_point==0
529     // and a power-of-two ("POT") scale.
530 
531     TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
532     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
533 
534     int input_scale_log2_rounded;
535     bool param_scale_pot =
536         CheckedLog2(input->params.scale, &input_scale_log2_rounded);
537 
538     data->input_left_shift =
539         (15 - kInputIntegerBits) + input_scale_log2_rounded;
540     param_scale_pot &= (data->input_left_shift == 0);
541 
542     if (!param_scale_pot) {
543       // Calculate multiplier to change input scale to 1/(3*4096)
544       // as required by the table lookup.
545       // In this scaling +/-2^17 represents +/-10.7
546       double multiplier = input->params.scale * 4096.0 * 3.0;
547 
548       data->input_left_shift = 0;
549 
550       while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
551         data->input_left_shift++;
552         multiplier = multiplier * 2.0;
553       }
554 
555       data->input_multiplier = static_cast<int32_t>(multiplier);
556     }
557 
558     int output_scale_log2_rounded;
559     TF_LITE_ENSURE(
560         context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
561     TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
562                       -kOutputFractionalBits);
563   }
564 
565   return context->ResizeTensor(context, output,
566                                TfLiteIntArrayCopy(input->dims));
567 }
568 
569 template <KernelType kernel_type>
SoftmaxPrepare(TfLiteContext * context,TfLiteNode * node)570 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
571   auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
572   SoftmaxOpData* data = reinterpret_cast<SoftmaxOpData*>(node->user_data);
573 
574   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
575   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
576   const TfLiteTensor* input;
577   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
578   TfLiteTensor* output;
579   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
580 
581   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
582 
583   if (input->type == kTfLiteInt8 && output->type == kTfLiteInt8) {
584     TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
585     TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 256,
586                         (0.001f * 1.f / 256));
587   } else if (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) {
588     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
589     TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
590                         (0.001f * 1.f / 32768));
591   }
592 
593   if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
594     if (kernel_type == kReference) {
595       const int kScaledDiffIntegerBits = 5;
596       int input_left_shift;
597       tflite::PreprocessSoftmaxScaling(
598           static_cast<double>(params->beta),
599           static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
600           &data->params.input_multiplier, &input_left_shift);
601       data->params.input_left_shift = input_left_shift;
602       data->params.diff_min =
603           -1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
604                                               input_left_shift);
605     } else {
606       switch (output->type) {
607         case kTfLiteUInt8:
608         case kTfLiteInt8:
609 #ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
610           // Only apply when both input & output are uint8/int8 & build with
611           // clang on aarch64.
612           // TODO(b/143709993): Port to ARMv7 and other platforms.
613           data->params.uint8_table1 = data->uint8_table1;
614           data->params.uint8_table2 = data->uint8_table2;
615           optimized_ops::PopulateSoftmaxUInt8LookupTable(
616               &data->params, input->params.scale, params->beta);
617           break;
618 #endif
619         case kTfLiteInt16:
620         default:
621           data->params.table = data->table;
622           optimized_ops::PopulateSoftmaxLookupTable(
623               &data->params, input->params.scale, params->beta);
624       }
625 
626       data->params.zero_point = output->params.zero_point;
627       data->params.scale = output->params.scale;
628     }
629   } else if (input->type == kTfLiteInt16) {
630     TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
631     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
632 
633     data->params.exp_lut = data->exp_lut;
634     // exp LUT only used on nagative values
635     // we consider exp(-10.0) is insignificant to accumulation
636     gen_lut<double, int16_t, int16_t>(
637         [](double value) { return std::exp(value); }, -10.0, 0.0, -1.0, 1.0,
638         data->params.exp_lut);
639     data->params.one_over_one_plus_x_lut = data->one_over_one_plus_x_lut;
640     gen_lut<double, int16_t, int16_t>(
641         [](double value) { return 1.0 / (1.0 + value); }, 0.0, 1.0, -1.0, 1.0,
642         data->params.one_over_one_plus_x_lut);
643     data->params.zero_point = output->params.zero_point;
644     data->params.scale = output->params.scale;
645 
646     double input_scale_beta_rescale =
647         input->params.scale * params->beta /
648         (10.0 / 65535.0);  // scale the input_diff such that [-65535, 0]
649                            // correspond to [-10.0, 0.0]
650     QuantizeMultiplier(input_scale_beta_rescale, &data->params.input_multiplier,
651                        &data->params.input_left_shift);
652   }
653 
654   return context->ResizeTensor(context, output,
655                                TfLiteIntArrayCopy(input->dims));
656 }
657 
658 template <KernelType kernel_type>
LogSoftmaxPrepare(TfLiteContext * context,TfLiteNode * node)659 TfLiteStatus LogSoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
660   LogSoftmaxOpData* data = reinterpret_cast<LogSoftmaxOpData*>(node->user_data);
661 
662   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
663   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
664   const TfLiteTensor* input;
665   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
666   TfLiteTensor* output;
667   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
668   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
669 
670   if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
671     TF_LITE_ENSURE_EQ(context, output->params.scale, 16.0 / 256);
672     static const double kBeta = 1.0;
673     if (input->type == kTfLiteUInt8) {
674       TF_LITE_ENSURE_EQ(context, output->params.zero_point, 255);
675     }
676     if (input->type == kTfLiteInt8) {
677       TF_LITE_ENSURE_EQ(context, output->params.zero_point, 127);
678     }
679 
680     if (kernel_type == kReference) {
681       const int kScaledDiffIntegerBits = 5;
682       int input_left_shift;
683       int reverse_scaling_right_shift;
684       tflite::PreprocessLogSoftmaxScalingExp(
685           kBeta, static_cast<double>(input->params.scale),
686           kScaledDiffIntegerBits, &data->params.input_multiplier,
687           &input_left_shift, &data->params.reverse_scaling_divisor,
688           &reverse_scaling_right_shift);
689       reverse_scaling_right_shift *= -1;
690       data->params.input_left_shift = input_left_shift;
691       data->params.reverse_scaling_right_shift = reverse_scaling_right_shift;
692       data->params.diff_min = -tflite::CalculateInputRadius(
693           kScaledDiffIntegerBits, input_left_shift);
694     } else {
695       data->params.table = data->f_table;
696       optimized_ops::PopulateSoftmaxLookupTable(&data->params,
697                                                 input->params.scale, kBeta);
698       data->params.zero_point = output->params.zero_point;
699       data->params.scale = output->params.scale;
700     }
701   }
702 
703   return context->ResizeTensor(context, output,
704                                TfLiteIntArrayCopy(input->dims));
705 }
706 
PreluPrepare(TfLiteContext * context,TfLiteNode * node)707 TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
708   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
709   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
710   const TfLiteTensor* input;
711   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
712   TfLiteTensor* output;
713   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
714   const TfLiteTensor* alpha;
715   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &alpha));
716   PreluOpData* data = reinterpret_cast<PreluOpData*>(node->user_data);
717 
718   TF_LITE_ENSURE_TYPES_EQ(context, input->type, alpha->type);
719 
720   output->type = input->type;
721 
722   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
723     // prelu(x) = x if x >= 0 else x * alpha.
724     // So if we translate that for quantized computation:
725     //
726     // input_float = (input_q - input_zp) * input_scale
727     // output_float = (output_q - output_zp) * output_scale
728     // alpha_float = (alpha_q - alpha_zp) * alpha_scale
729     //
730     // When input_q - input_zp >= 0:
731     // ouput_q = (input_q - input_zp) * input_scale / output_scale + output_q
732     // else:
733     // output_q = (input_q - input_zp) * (alpha_q - alpha_zp) * input_scale
734     //            * alpha_scale / output_scale + output_q
735     //
736     // So for input_q - input_zp >= 0:
737     // output real multiplier 1 is input_scale / output_scale;
738     // for input_q - input_zp < 0:
739     // output real multiplier 2 is input_scale  * alpha_scale/ output_scale.
740     double real_multiplier_1 = input->params.scale / output->params.scale;
741     double real_multiplier_2 =
742         input->params.scale * alpha->params.scale / output->params.scale;
743     QuantizeMultiplier(real_multiplier_1, &data->output_multiplier_1,
744                        &data->output_shift_1);
745     QuantizeMultiplier(real_multiplier_2, &data->output_multiplier_2,
746                        &data->output_shift_2);
747   }
748 
749   data->requires_broadcast = !HaveSameShapes(input, alpha);
750   // PRelu (parameteric Relu) shares the same alpha value on "shared axis".
751   // This means it's always required to "broadcast" alpha values in PRelu.
752   TfLiteIntArray* output_size = nullptr;
753   TF_LITE_ENSURE_OK(
754       context, CalculateShapeForBroadcast(context, input, alpha, &output_size));
755 
756   TF_LITE_ENSURE_OK(context,
757                     context->ResizeTensor(context, output, output_size));
758   // After broadcasting, the output shape should always be the same as the
759   // input shape.
760   TF_LITE_ENSURE(context, HaveSameShapes(input, output));
761 
762   return kTfLiteOk;
763 }
764 
ReluEval(TfLiteContext * context,TfLiteNode * node)765 TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
766   const TfLiteTensor* input;
767   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
768   TfLiteTensor* output;
769   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
770   const ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
771   switch (input->type) {
772     case kTfLiteFloat32: {
773       optimized_ops::Relu(GetTensorShape(input), GetTensorData<float>(input),
774                           GetTensorShape(output), GetTensorData<float>(output));
775     } break;
776     // TODO(renjieliu): We may revisit the quantization calculation logic,
777     // the unbounded upper limit is actually hard to quantize.
778     case kTfLiteUInt8: {
779       QuantizedReluX<uint8_t>(0.0f, std::numeric_limits<float>::infinity(),
780                               input, output, data);
781     } break;
782     case kTfLiteInt8: {
783       QuantizedReluX<int8_t>(0.0f, std::numeric_limits<float>::infinity(),
784                              input, output, data);
785     } break;
786     case kTfLiteInt16: {
787       QuantizedReluX<int16_t>(0.0f, std::numeric_limits<float>::infinity(),
788                               input, output, data);
789     } break;
790     default:
791       TF_LITE_KERNEL_LOG(context,
792                          "Only float32, uint8, int8 and int16 are supported "
793                          "currently, got %s.",
794                          TfLiteTypeGetName(input->type));
795       return kTfLiteError;
796   }
797   return kTfLiteOk;
798 }
799 
Relu1Eval(TfLiteContext * context,TfLiteNode * node)800 TfLiteStatus Relu1Eval(TfLiteContext* context, TfLiteNode* node) {
801   const TfLiteTensor* input;
802   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
803   TfLiteTensor* output;
804   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
805   const ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
806   switch (input->type) {
807     case kTfLiteFloat32: {
808       optimized_ops::Relu1(GetTensorShape(input), GetTensorData<float>(input),
809                            GetTensorShape(output),
810                            GetTensorData<float>(output));
811       return kTfLiteOk;
812     }
813     case kTfLiteUInt8: {
814       QuantizedReluX<uint8_t>(-1.0f, 1.0f, input, output, data);
815       return kTfLiteOk;
816     }
817     case kTfLiteInt8: {
818       QuantizedReluX<int8_t>(-1, 1, input, output, data);
819       return kTfLiteOk;
820     }
821     default:
822       TF_LITE_KERNEL_LOG(context,
823                          "Only float32, uint8, int8 supported "
824                          "currently, got %s.",
825                          TfLiteTypeGetName(input->type));
826       return kTfLiteError;
827   }
828 }
829 
830 template <KernelType kernel_type>
HardSwishEval(TfLiteContext * context,TfLiteNode * node)831 TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
832   HardSwishData* data = static_cast<HardSwishData*>(node->user_data);
833 
834   const TfLiteTensor* input;
835   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
836   TfLiteTensor* output;
837   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
838   switch (input->type) {
839     case kTfLiteFloat32: {
840       if (kernel_type == kReference) {
841         reference_ops::HardSwish(
842             GetTensorShape(input), GetTensorData<float>(input),
843             GetTensorShape(output), GetTensorData<float>(output));
844       } else {
845         optimized_ops::HardSwish(
846             GetTensorShape(input), GetTensorData<float>(input),
847             GetTensorShape(output), GetTensorData<float>(output));
848       }
849       return kTfLiteOk;
850     } break;
851     case kTfLiteUInt8: {
852       HardSwishParams& params = data->params;
853       if (kernel_type == kReference) {
854         reference_ops::HardSwish(
855             params, GetTensorShape(input), GetTensorData<uint8_t>(input),
856             GetTensorShape(output), GetTensorData<uint8_t>(output));
857       } else {
858         optimized_ops::HardSwish(
859             params, GetTensorShape(input), GetTensorData<uint8_t>(input),
860             GetTensorShape(output), GetTensorData<uint8_t>(output));
861       }
862       return kTfLiteOk;
863     } break;
864     case kTfLiteInt8: {
865       HardSwishParams& params = data->params;
866       if (kernel_type == kReference) {
867         reference_ops::HardSwish(
868             params, GetTensorShape(input), GetTensorData<int8_t>(input),
869             GetTensorShape(output), GetTensorData<int8_t>(output));
870       } else {
871         optimized_ops::HardSwish(
872             params, GetTensorShape(input), GetTensorData<int8_t>(input),
873             GetTensorShape(output), GetTensorData<int8_t>(output));
874       }
875       return kTfLiteOk;
876     } break;
877     default:
878       TF_LITE_KERNEL_LOG(
879           context,
880           "Only float32, uint8 and int8 are supported currently, got %s.",
881           TfLiteTypeGetName(input->type));
882       return kTfLiteError;
883   }
884 }
885 
Relu6Eval(TfLiteContext * context,TfLiteNode * node)886 TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
887   const TfLiteTensor* input;
888   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
889   TfLiteTensor* output;
890   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
891   ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
892   switch (input->type) {
893     case kTfLiteFloat32: {
894       size_t elements = input->bytes / sizeof(float);
895       const float* in = GetTensorData<float>(input);
896       const float* in_end = in + elements;
897       float* out = GetTensorData<float>(output);
898       for (; in < in_end; in++, out++) *out = std::min(std::max(0.f, *in), 6.f);
899       return kTfLiteOk;
900     }
901     case kTfLiteUInt8:
902       QuantizedReluX<uint8_t>(0.0f, 6.0f, input, output, data);
903       return kTfLiteOk;
904     case kTfLiteInt8: {
905       QuantizedReluX<int8_t>(0.0f, 6.0f, input, output, data);
906       return kTfLiteOk;
907     }
908     case kTfLiteInt16: {
909       QuantizedReluX<int16_t>(0.0f, 6.0f, input, output, data);
910       return kTfLiteOk;
911     }
912     default:
913       TF_LITE_KERNEL_LOG(context,
914                          "Only float32, uint8, int8 and int16 are supported "
915                          "currently, got %s.",
916                          TfLiteTypeGetName(input->type));
917       return kTfLiteError;
918   }
919 }
920 
921 template <KernelType kernel_type>
TanhEval(TfLiteContext * context,TfLiteNode * node)922 TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
923   OpData* data = reinterpret_cast<OpData*>(node->user_data);
924   const TfLiteTensor* input;
925   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
926   TfLiteTensor* output;
927   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
928   switch (input->type) {
929     case kTfLiteFloat32: {
930       if (kernel_type == kReference) {
931         reference_ops::Tanh(GetTensorShape(input), GetTensorData<float>(input),
932                             GetTensorShape(output),
933                             GetTensorData<float>(output));
934       } else {
935         optimized_ops::Tanh(GetTensorShape(input), GetTensorData<float>(input),
936                             GetTensorShape(output),
937                             GetTensorData<float>(output));
938       }
939       return kTfLiteOk;
940     } break;
941     case kTfLiteInt16: {
942       TanhParams params;
943       params.input_left_shift = data->input_left_shift;
944       if (kernel_type == kReference || (data->input_multiplier > 0)) {
945         reference_integer_ops::Tanh(
946             data->input_multiplier, data->input_left_shift,
947             GetTensorShape(input), GetTensorData<int16_t>(input),
948             GetTensorShape(output), GetTensorData<int16_t>(output));
949       } else {
950         optimized_ops::Tanh(
951             params, GetTensorShape(input), GetTensorData<int16_t>(input),
952             GetTensorShape(output), GetTensorData<int16_t>(output));
953       }
954       return kTfLiteOk;
955     } break;
956     case kTfLiteUInt8: {
957       if (kernel_type == kFixedPointOptimized) {
958         TanhParams params;
959         params.input_zero_point = input->params.zero_point;
960         params.input_range_radius = data->input_range_radius;
961         params.input_multiplier = data->input_multiplier;
962         params.input_left_shift = data->input_left_shift;
963         optimized_ops::Tanh16bitPrecision(
964             params, GetTensorShape(input), GetTensorData<uint8_t>(input),
965             GetTensorShape(output), GetTensorData<uint8_t>(output));
966       } else {
967         EvalUsingLookupTable(data, input, output);
968       }
969       return kTfLiteOk;
970     } break;
971     case kTfLiteInt8: {
972       if (kernel_type == kFixedPointOptimized) {
973         TanhParams params;
974         params.input_zero_point = input->params.zero_point;
975         params.input_range_radius = data->input_range_radius;
976         params.input_multiplier = data->input_multiplier;
977         params.input_left_shift = data->input_left_shift;
978         optimized_ops::Tanh16bitPrecision(
979             params, GetTensorShape(input), GetTensorData<int8_t>(input),
980             GetTensorShape(output), GetTensorData<int8_t>(output));
981       } else {
982         EvalUsingLookupTable(data, input, output);
983       }
984       return kTfLiteOk;
985     } break;
986     default:
987       TF_LITE_KERNEL_LOG(context,
988                          "Only float32, uint8, int16 and int8 are supported "
989                          "currently, got %s.",
990                          TfLiteTypeGetName(input->type));
991       return kTfLiteError;
992   }
993 }
994 
995 // Sigmoid is also know as "Logistic".
996 template <KernelType kernel_type>
SigmoidEval(TfLiteContext * context,TfLiteNode * node)997 TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
998   OpData* data = reinterpret_cast<OpData*>(node->user_data);
999 
1000   const TfLiteTensor* input;
1001   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1002   TfLiteTensor* output;
1003   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1004   switch (input->type) {
1005     case kTfLiteFloat32: {
1006       if (kernel_type == kReference) {
1007         reference_ops::Logistic(
1008             GetTensorShape(input), GetTensorData<float>(input),
1009             GetTensorShape(output), GetTensorData<float>(output));
1010       } else {
1011         optimized_ops::Logistic(
1012             GetTensorShape(input), GetTensorData<float>(input),
1013             GetTensorShape(output), GetTensorData<float>(output));
1014       }
1015       break;
1016     }
1017     case kTfLiteInt16: {
1018       LogisticParams params;
1019       if (kernel_type == kReference || (data->input_multiplier > 0)) {
1020         const int size =
1021             MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
1022 
1023         reference_integer_ops::Logistic(
1024             data->input_multiplier, data->input_left_shift, size,
1025             GetTensorData<int16_t>(input), GetTensorData<int16_t>(output));
1026       } else {
1027         optimized_ops::Logistic(
1028             params, GetTensorShape(input), GetTensorData<int16_t>(input),
1029             GetTensorShape(output), GetTensorData<int16_t>(output));
1030       }
1031       break;
1032     }
1033     case kTfLiteUInt8: {
1034       if (kernel_type == kFixedPointOptimized) {
1035         LogisticParams params;
1036         params.input_zero_point = input->params.zero_point;
1037         params.input_range_radius = data->input_range_radius;
1038         params.input_multiplier = data->input_multiplier;
1039         params.input_left_shift = data->input_left_shift;
1040         optimized_ops::Logistic16bitPrecision(
1041             params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1042             GetTensorShape(output), GetTensorData<uint8_t>(output));
1043       } else {
1044         EvalUsingLookupTable(data, input, output);
1045       }
1046       break;
1047     }
1048     case kTfLiteInt8: {
1049       if (kernel_type == kFixedPointOptimized) {
1050         LogisticParams params;
1051         params.input_zero_point = input->params.zero_point;
1052         params.input_range_radius = data->input_range_radius;
1053         params.input_multiplier = data->input_multiplier;
1054         params.input_left_shift = data->input_left_shift;
1055         optimized_ops::Logistic16bitPrecision(
1056             params, GetTensorShape(input), GetTensorData<int8_t>(input),
1057             GetTensorShape(output), GetTensorData<int8_t>(output));
1058       } else {
1059         EvalUsingLookupTable(data, input, output);
1060       }
1061       break;
1062     }
1063     default:
1064       TF_LITE_KERNEL_LOG(context,
1065                          "Only float32, uint8, int16 and int8 are supported "
1066                          "currently, got %s.",
1067                          TfLiteTypeGetName(input->type));
1068       return kTfLiteError;
1069   }
1070   return kTfLiteOk;
1071 }
1072 
SoftmaxFloat(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,TfLiteSoftmaxParams * params,KernelType kernel_type=kGenericOptimized)1073 TfLiteStatus SoftmaxFloat(TfLiteContext* context, const TfLiteTensor* input,
1074                           TfLiteTensor* output, TfLiteSoftmaxParams* params,
1075                           KernelType kernel_type = kGenericOptimized) {
1076   SoftmaxParams op_params;
1077   op_params.beta = params->beta;
1078   if (kernel_type == kReference) {
1079     reference_ops::Softmax(op_params, GetTensorShape(input),
1080                            GetTensorData<float>(input), GetTensorShape(output),
1081                            GetTensorData<float>(output));
1082   } else {
1083     optimized_ops::Softmax(op_params, GetTensorShape(input),
1084                            GetTensorData<float>(input), GetTensorShape(output),
1085                            GetTensorData<float>(output),
1086                            CpuBackendContext::GetFromContext(context));
1087   }
1088   return kTfLiteOk;
1089 }
1090 
1091 template <typename In, typename Out>
SoftmaxQuantized(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,SoftmaxOpData * data,KernelType kernel_type=kGenericOptimized)1092 TfLiteStatus SoftmaxQuantized(TfLiteContext* context, const TfLiteTensor* input,
1093                               TfLiteTensor* output, SoftmaxOpData* data,
1094                               KernelType kernel_type = kGenericOptimized) {
1095   if (kernel_type == kReference) {
1096     reference_ops::Softmax(data->params, GetTensorShape(input),
1097                            GetTensorData<In>(input), GetTensorShape(output),
1098                            GetTensorData<Out>(output));
1099   } else {
1100     optimized_ops::Softmax(data->params, GetTensorShape(input),
1101                            GetTensorData<In>(input), GetTensorShape(output),
1102                            GetTensorData<Out>(output));
1103   }
1104   return kTfLiteOk;
1105 }
1106 
1107 template <>
SoftmaxQuantized(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,SoftmaxOpData * data,KernelType kernel_type)1108 TfLiteStatus SoftmaxQuantized<int8_t, int8_t>(TfLiteContext* context,
1109                                               const TfLiteTensor* input,
1110                                               TfLiteTensor* output,
1111                                               SoftmaxOpData* data,
1112                                               KernelType kernel_type) {
1113   if (kernel_type == kReference) {
1114     reference_ops::Softmax(data->params, GetTensorShape(input),
1115                            GetTensorData<int8_t>(input), GetTensorShape(output),
1116                            GetTensorData<int8_t>(output));
1117   } else {
1118 #ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
1119   optimized_ops::SoftmaxInt8LUT(
1120       data->params, GetTensorShape(input), GetTensorData<int8_t>(input),
1121       GetTensorShape(output), GetTensorData<int8_t>(output));
1122 #else
1123   optimized_ops::Softmax(data->params, GetTensorShape(input),
1124                          GetTensorData<int8_t>(input), GetTensorShape(output),
1125                          GetTensorData<int8_t>(output));
1126 #endif
1127   }
1128   return kTfLiteOk;
1129 }
1130 
1131 template <>
SoftmaxQuantized(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,SoftmaxOpData * data,KernelType kernel_type)1132 TfLiteStatus SoftmaxQuantized<uint8_t, uint8_t>(TfLiteContext* context,
1133                                                 const TfLiteTensor* input,
1134                                                 TfLiteTensor* output,
1135                                                 SoftmaxOpData* data,
1136                                                 KernelType kernel_type) {
1137   if (kernel_type == kReference) {
1138     reference_ops::Softmax(
1139         data->params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1140         GetTensorShape(output), GetTensorData<uint8_t>(output));
1141   } else {
1142 #ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
1143   optimized_ops::SoftmaxInt8LUT(
1144       data->params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1145       GetTensorShape(output), GetTensorData<uint8_t>(output));
1146 #else
1147   optimized_ops::Softmax(data->params, GetTensorShape(input),
1148                          GetTensorData<uint8_t>(input), GetTensorShape(output),
1149                          GetTensorData<uint8_t>(output));
1150 #endif
1151   }
1152   return kTfLiteOk;
1153 }
1154 
1155 template <>
SoftmaxQuantized(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,SoftmaxOpData * data,KernelType kernel_type)1156 TfLiteStatus SoftmaxQuantized<int16, int16>(TfLiteContext* context,
1157                                             const TfLiteTensor* input,
1158                                             TfLiteTensor* output,
1159                                             SoftmaxOpData* data,
1160                                             KernelType kernel_type) {
1161   if (NumDimensions(input) >= 1 && NumDimensions(input) <= 4) {
1162     reference_ops::SoftmaxInt16(
1163         data->params, GetTensorShape(input), GetTensorData<int16_t>(input),
1164         GetTensorShape(output), GetTensorData<int16_t>(output));
1165     return kTfLiteOk;
1166   } else {
1167     TF_LITE_KERNEL_LOG(context,
1168                        "Only 1D, 2D, 3D and 4D tensors supported for int16 "
1169                        "input with int16 output, got %dD.",
1170                        NumDimensions(input));
1171     return kTfLiteError;
1172   }
1173 }
1174 
1175 template <KernelType kernel_type>
SoftmaxEval(TfLiteContext * context,TfLiteNode * node)1176 TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
1177   auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
1178   SoftmaxOpData* data = reinterpret_cast<SoftmaxOpData*>(node->user_data);
1179 
1180   const TfLiteTensor* input;
1181   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1182   TfLiteTensor* output;
1183   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1184 
1185   switch (input->type) {
1186     case kTfLiteFloat32: {
1187       return SoftmaxFloat(context, input, output, params, kernel_type);
1188     }
1189     case kTfLiteUInt8: {
1190       switch (output->type) {
1191         case kTfLiteUInt8:
1192           return SoftmaxQuantized<uint8_t, uint8_t>(context, input, output,
1193                                                     data, kernel_type);
1194         case kTfLiteInt16:
1195           return SoftmaxQuantized<uint8_t, int16_t>(context, input, output,
1196                                                     data, kernel_type);
1197         default:
1198           TF_LITE_KERNEL_LOG(context,
1199                              "Only uint8_t and int16_t outputs are supported "
1200                              "with uint8_t inputs currently, got %s.",
1201                              TfLiteTypeGetName(output->type));
1202           return kTfLiteError;
1203       }
1204     }
1205     case kTfLiteInt8: {
1206       switch (output->type) {
1207         case kTfLiteInt8:
1208           return SoftmaxQuantized<int8_t, int8_t>(context, input, output, data,
1209                                                   kernel_type);
1210         case kTfLiteInt16:
1211           return SoftmaxQuantized<int8_t, int16_t>(context, input, output, data,
1212                                                    kernel_type);
1213         default:
1214           TF_LITE_KERNEL_LOG(context,
1215                              "Only int8_t and int16_t outputs are supported "
1216                              "with int8_t inputs currently, got %s.",
1217                              TfLiteTypeGetName(output->type));
1218           return kTfLiteError;
1219       }
1220     }
1221     case kTfLiteInt16: {
1222       return SoftmaxQuantized<int16_t, int16_t>(context, input, output, data,
1223                                                 kernel_type);
1224     }
1225 
1226     default:
1227       TF_LITE_KERNEL_LOG(context,
1228                          "Only float32, uint8_t, Int8_t, Int16_t are supported "
1229                          "currently, got %s.",
1230                          TfLiteTypeGetName(input->type));
1231       return kTfLiteError;
1232   }
1233 }
1234 
1235 template <KernelType kernel_type>
LogSoftmaxEval(TfLiteContext * context,TfLiteNode * node)1236 TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
1237   const LogSoftmaxOpData* data =
1238       reinterpret_cast<LogSoftmaxOpData*>(node->user_data);
1239   const TfLiteTensor* input;
1240   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1241   TfLiteTensor* output;
1242   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1243   switch (input->type) {
1244     case kTfLiteFloat32: {
1245       SoftmaxParams op_params;
1246       if (kernel_type == kGenericOptimized) {
1247         optimized_ops::LogSoftmax(
1248             op_params, GetTensorShape(input), GetTensorData<float>(input),
1249             GetTensorShape(output), GetTensorData<float>(output));
1250       } else {
1251         reference_ops::LogSoftmax(
1252             op_params, GetTensorShape(input), GetTensorData<float>(input),
1253             GetTensorShape(output), GetTensorData<float>(output));
1254       }
1255       return kTfLiteOk;
1256     }
1257     case kTfLiteUInt8: {
1258       const SoftmaxParams& op_params = data->params;
1259       if (kernel_type == kGenericOptimized) {
1260         optimized_ops::LogSoftmax(
1261             op_params, input->params.scale, GetTensorShape(input),
1262             GetTensorData<uint8_t>(input), GetTensorShape(output),
1263             GetTensorData<uint8_t>(output));
1264       } else {
1265         reference_ops::LogSoftmax(
1266             op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1267             GetTensorShape(output), GetTensorData<uint8_t>(output));
1268       }
1269       return kTfLiteOk;
1270     }
1271     case kTfLiteInt8: {
1272       const SoftmaxParams& op_params = data->params;
1273       if (kernel_type == kGenericOptimized) {
1274         optimized_ops::LogSoftmax(
1275             op_params, input->params.scale, GetTensorShape(input),
1276             GetTensorData<int8_t>(input), GetTensorShape(output),
1277             GetTensorData<int8_t>(output));
1278       } else {
1279         const auto input_shape = GetTensorShape(input);
1280         const auto output_shape = GetTensorShape(output);
1281         const int trailing_dim = input_shape.DimensionsCount() - 1;
1282         const int outer_size =
1283             MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
1284         const int depth =
1285             MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
1286         reference_integer_ops::LogSoftmax(
1287             op_params.input_multiplier, op_params.input_left_shift,
1288             op_params.reverse_scaling_divisor,
1289             op_params.reverse_scaling_right_shift, op_params.diff_min,
1290             outer_size, depth, GetTensorData<int8_t>(input),
1291             GetTensorData<int8_t>(output));
1292       }
1293       return kTfLiteOk;
1294     }
1295     default:
1296       TF_LITE_KERNEL_LOG(
1297           context,
1298           "Only float32, uint8 and int8 are supported currently, got %s.",
1299           TfLiteTypeGetName(input->type));
1300       return kTfLiteError;
1301   }
1302 }
1303 
1304 template <typename T>
ApplyPrelu(T input,T alpha)1305 T ApplyPrelu(T input, T alpha) {
1306   return input >= 0.0 ? input : input * alpha;
1307 }
1308 
1309 template <KernelType kernel_type>
PreluEval(TfLiteContext * context,TfLiteNode * node)1310 TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
1311   const TfLiteTensor* input;
1312   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1313   const TfLiteTensor* alpha;
1314   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &alpha));
1315   TfLiteTensor* output;
1316   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1317   const PreluOpData* data = reinterpret_cast<PreluOpData*>(node->user_data);
1318   switch (input->type) {
1319     case kTfLiteFloat32: {
1320       if (kernel_type == kGenericOptimized) {
1321         tflite::ArithmeticParams op_params;
1322         bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
1323             GetTensorShape(input), GetTensorShape(alpha), &op_params);
1324         if (need_broadcast) {
1325           optimized_ops::BroadcastPReluDispatch(
1326               op_params, GetTensorShape(input), GetTensorData<float>(input),
1327               GetTensorShape(alpha), GetTensorData<float>(alpha),
1328               GetTensorShape(output), GetTensorData<float>(output),
1329               ApplyPrelu<float>);
1330         } else {
1331           const int flat_size =
1332               MatchingElementsSize(GetTensorShape(input), GetTensorShape(alpha),
1333                                    GetTensorShape(output));
1334           optimized_ops::PReluElementWise(
1335               flat_size, op_params, GetTensorData<float>(alpha),
1336               GetTensorData<float>(input), GetTensorData<float>(output));
1337         }
1338       } else {
1339         if (data->requires_broadcast) {
1340           reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
1341               GetTensorShape(input), GetTensorData<float>(input),
1342               GetTensorShape(alpha), GetTensorData<float>(alpha),
1343               GetTensorShape(output), GetTensorData<float>(output),
1344               ApplyPrelu<float>);
1345         } else {
1346           reference_ops::BinaryFunction<float, float, float>(
1347               GetTensorShape(input), GetTensorData<float>(input),
1348               GetTensorShape(alpha), GetTensorData<float>(alpha),
1349               GetTensorShape(output), GetTensorData<float>(output),
1350               ApplyPrelu<float>);
1351         }
1352       }
1353       return kTfLiteOk;
1354     }
1355     case kTfLiteUInt8: {
1356       PreluParams op_params;
1357       op_params.input_offset = -input->params.zero_point;
1358       op_params.alpha_offset = -alpha->params.zero_point;
1359       op_params.output_offset = output->params.zero_point;
1360       op_params.output_multiplier_1 = data->output_multiplier_1;
1361       op_params.output_shift_1 = data->output_shift_1;
1362       op_params.output_multiplier_2 = data->output_multiplier_2;
1363       op_params.output_shift_2 = data->output_shift_2;
1364       if (data->requires_broadcast) {
1365         reference_ops::BroadcastPrelu4DSlow(
1366             op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1367             GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
1368             GetTensorShape(output), GetTensorData<uint8_t>(output));
1369       } else {
1370         reference_ops::Prelu(
1371             op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
1372             GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
1373             GetTensorShape(output), GetTensorData<uint8_t>(output));
1374       }
1375       return kTfLiteOk;
1376     }
1377     case kTfLiteInt8: {
1378       PreluParams op_params;
1379       op_params.input_offset = -input->params.zero_point;
1380       op_params.alpha_offset = -alpha->params.zero_point;
1381       op_params.output_offset = output->params.zero_point;
1382       op_params.output_multiplier_1 = data->output_multiplier_1;
1383       op_params.output_shift_1 = data->output_shift_1;
1384       op_params.output_multiplier_2 = data->output_multiplier_2;
1385       op_params.output_shift_2 = data->output_shift_2;
1386       if (data->requires_broadcast) {
1387         reference_ops::BroadcastPrelu4DSlow(
1388             op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
1389             GetTensorShape(alpha), GetTensorData<int8_t>(alpha),
1390             GetTensorShape(output), GetTensorData<int8_t>(output));
1391       } else {
1392         reference_ops::Prelu(
1393             op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
1394             GetTensorShape(alpha), GetTensorData<int8_t>(alpha),
1395             GetTensorShape(output), GetTensorData<int8_t>(output));
1396       }
1397       return kTfLiteOk;
1398     }
1399     default:
1400       TF_LITE_KERNEL_LOG(
1401           context,
1402           "Only float32 and uint8 and int8 are supported currently, got %d.",
1403           TfLiteTypeGetName(input->type));
1404       return kTfLiteError;
1405   }
1406 }
1407 
1408 template <typename T>
QuantizeLeakyRelu(const TfLiteTensor * input,TfLiteTensor * output,const LeakyReluOpData * data)1409 void QuantizeLeakyRelu(const TfLiteTensor* input, TfLiteTensor* output,
1410                        const LeakyReluOpData* data) {
1411   LeakyReluParams op_params;
1412 
1413   op_params.input_offset = input->params.zero_point;
1414   op_params.output_offset = output->params.zero_point;
1415   op_params.output_multiplier_alpha = data->output_multiplier_alpha;
1416   op_params.output_shift_alpha = data->output_shift_alpha;
1417   op_params.output_multiplier_identity = data->output_multiplier_identity;
1418   op_params.output_shift_identity = data->output_shift_identity;
1419   reference_ops::QuantizeLeakyRelu(
1420       op_params, GetTensorShape(input), GetTensorData<T>(input),
1421       GetTensorShape(output), GetTensorData<T>(output));
1422 }
1423 
LeakyReluEval(TfLiteContext * context,TfLiteNode * node)1424 TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
1425   const TfLiteTensor* input;
1426   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1427   TfLiteTensor* output;
1428   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1429   const auto* params =
1430       reinterpret_cast<TfLiteLeakyReluParams*>(node->builtin_data);
1431   const LeakyReluOpData* data =
1432       reinterpret_cast<LeakyReluOpData*>(node->user_data);
1433 
1434   LeakyReluParams op_params;
1435   switch (input->type) {
1436     case kTfLiteFloat32: {
1437       op_params.alpha = params->alpha;
1438       optimized_ops::LeakyRelu(
1439           op_params, GetTensorShape(input), GetTensorData<float>(input),
1440           GetTensorShape(output), GetTensorData<float>(output));
1441       return kTfLiteOk;
1442     }
1443     case kTfLiteUInt8: {
1444       QuantizeLeakyRelu<uint8_t>(input, output, data);
1445       return kTfLiteOk;
1446     }
1447     case kTfLiteInt8: {
1448       QuantizeLeakyRelu<int8_t>(input, output, data);
1449       return kTfLiteOk;
1450     }
1451     case kTfLiteInt16: {
1452       QuantizeLeakyRelu<int16_t>(input, output, data);
1453       return kTfLiteOk;
1454     }
1455     default:
1456       TF_LITE_KERNEL_LOG(
1457           context,
1458           "Only float32, int8, int16 and uint8 is supported currently, got %s.",
1459           TfLiteTypeGetName(input->type));
1460       return kTfLiteError;
1461   }
1462 }
1463 
EluPrepare(TfLiteContext * context,TfLiteNode * node)1464 TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) {
1465   const TfLiteTensor* input;
1466   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1467   TfLiteTensor* output;
1468   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1469   OpData* data = reinterpret_cast<OpData*>(node->user_data);
1470 
1471   // Use LUT to handle quantized elu path.
1472   if (input->type == kTfLiteInt8) {
1473     PopulateLookupTable<int8_t>(data, input, output, [](float value) {
1474       return value < 0.0 ? std::exp(value) - 1.0f : value;
1475     });
1476   }
1477   return GenericPrepare(context, node);
1478 }
1479 
EluEval(TfLiteContext * context,TfLiteNode * node)1480 TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
1481   const TfLiteTensor* input;
1482   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
1483   TfLiteTensor* output;
1484   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
1485   switch (input->type) {
1486     case kTfLiteFloat32: {
1487       optimized_ops::Elu(GetTensorShape(input), GetTensorData<float>(input),
1488                          GetTensorShape(output), GetTensorData<float>(output));
1489       return kTfLiteOk;
1490     }
1491     case kTfLiteInt8: {
1492       OpData* data = reinterpret_cast<OpData*>(node->user_data);
1493       EvalUsingLookupTable(data, input, output);
1494       return kTfLiteOk;
1495     }
1496     default:
1497       TF_LITE_KERNEL_LOG(
1498           context, "Only float32 and int8 is supported currently, got %s.",
1499           TfLiteTypeGetName(input->type));
1500       return kTfLiteError;
1501   }
1502 }
1503 
1504 }  // namespace activations
1505 
Register_ELU()1506 TfLiteRegistration* Register_ELU() {
1507   static TfLiteRegistration r = {activations::Init, activations::Free,
1508                                  activations::EluPrepare, activations::EluEval};
1509   return &r;
1510 }
1511 
Register_RELU()1512 TfLiteRegistration* Register_RELU() {
1513   static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
1514                                  activations::ReluPrepare,
1515                                  activations::ReluEval};
1516   return &r;
1517 }
1518 
Register_RELU_N1_TO_1()1519 TfLiteRegistration* Register_RELU_N1_TO_1() {
1520   static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
1521                                  activations::ReluPrepare,
1522                                  activations::Relu1Eval};
1523   return &r;
1524 }
1525 
Register_RELU6()1526 TfLiteRegistration* Register_RELU6() {
1527   static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
1528                                  activations::ReluPrepare,
1529                                  activations::Relu6Eval};
1530   return &r;
1531 }
1532 
Register_TANH_REF()1533 TfLiteRegistration* Register_TANH_REF() {
1534   static TfLiteRegistration r = {
1535       activations::Init, activations::Free,
1536       activations::TanhPrepare<activations::kReference>,
1537       activations::TanhEval<activations::kReference>};
1538   return &r;
1539 }
1540 
Register_TANH_GENERIC_OPT()1541 TfLiteRegistration* Register_TANH_GENERIC_OPT() {
1542   static TfLiteRegistration r = {
1543       activations::Init, activations::Free,
1544       activations::TanhPrepare<activations::kGenericOptimized>,
1545       activations::TanhEval<activations::kGenericOptimized>};
1546   return &r;
1547 }
1548 
Register_TANH_FIXED_POINT_OPT()1549 TfLiteRegistration* Register_TANH_FIXED_POINT_OPT() {
1550   static TfLiteRegistration r = {
1551       activations::Init, activations::Free,
1552       activations::TanhPrepare<activations::kFixedPointOptimized>,
1553       activations::TanhEval<activations::kFixedPointOptimized>};
1554   return &r;
1555 }
1556 
Register_TANH()1557 TfLiteRegistration* Register_TANH() {
1558   // TODO(b/134622898): Switch over from the LUT optimized method to the fixed
1559   // point optimized method when typical Android hardware performs better on
1560   // the latter one.
1561   return Register_TANH_GENERIC_OPT();
1562 }
1563 
Register_LOGISTIC_REF()1564 TfLiteRegistration* Register_LOGISTIC_REF() {
1565   static TfLiteRegistration r = {
1566       activations::Init, activations::Free,
1567       activations::SigmoidPrepare<activations::kReference>,
1568       activations::SigmoidEval<activations::kReference>};
1569   return &r;
1570 }
1571 
Register_LOGISTIC_GENERIC_OPT()1572 TfLiteRegistration* Register_LOGISTIC_GENERIC_OPT() {
1573   static TfLiteRegistration r = {
1574       activations::Init, activations::Free,
1575       activations::SigmoidPrepare<activations::kGenericOptimized>,
1576       activations::SigmoidEval<activations::kGenericOptimized>};
1577   return &r;
1578 }
1579 
Register_LOGISTIC_FIXED_POINT_OPT()1580 TfLiteRegistration* Register_LOGISTIC_FIXED_POINT_OPT() {
1581   static TfLiteRegistration r = {
1582       activations::Init, activations::Free,
1583       activations::SigmoidPrepare<activations::kFixedPointOptimized>,
1584       activations::SigmoidEval<activations::kFixedPointOptimized>};
1585   return &r;
1586 }
1587 
Register_LOGISTIC()1588 TfLiteRegistration* Register_LOGISTIC() {
1589   // TODO(b/134622898): Switch over from the LUT optimized method to the fixed
1590   // point optimized method when typical Android hardware performs better on
1591   // the latter one.
1592   return Register_LOGISTIC_GENERIC_OPT();
1593 }
1594 
Register_SOFTMAX_REF()1595 TfLiteRegistration* Register_SOFTMAX_REF() {
1596   static TfLiteRegistration r = {
1597       activations::SoftmaxInit, activations::SoftmaxFree,
1598       activations::SoftmaxPrepare<activations::kReference>,
1599       activations::SoftmaxEval<activations::kReference>};
1600   return &r;
1601 }
1602 
Register_SOFTMAX()1603 TfLiteRegistration* Register_SOFTMAX() {
1604   static TfLiteRegistration r = {
1605       activations::SoftmaxInit, activations::SoftmaxFree,
1606       activations::SoftmaxPrepare<activations::kGenericOptimized>,
1607       activations::SoftmaxEval<activations::kGenericOptimized>};
1608   return &r;
1609 }
1610 
Register_LOG_SOFTMAX_REF()1611 TfLiteRegistration* Register_LOG_SOFTMAX_REF() {
1612   static TfLiteRegistration r = {
1613       activations::LogSoftmaxInit, activations::LogSoftmaxFree,
1614       activations::LogSoftmaxPrepare<activations::kReference>,
1615       activations::LogSoftmaxEval<activations::kReference>};
1616   return &r;
1617 }
1618 
Register_LOG_SOFTMAX()1619 TfLiteRegistration* Register_LOG_SOFTMAX() {
1620   static TfLiteRegistration r = {
1621       activations::LogSoftmaxInit, activations::LogSoftmaxFree,
1622       activations::LogSoftmaxPrepare<activations::kGenericOptimized>,
1623       activations::LogSoftmaxEval<activations::kGenericOptimized>};
1624   return &r;
1625 }
1626 
Register_PRELU_REF()1627 TfLiteRegistration* Register_PRELU_REF() {
1628   static TfLiteRegistration r = {
1629       activations::PreluInit, activations::PreluFree, activations::PreluPrepare,
1630       activations::PreluEval<activations::kReference>};
1631   return &r;
1632 }
1633 
Register_PRELU()1634 TfLiteRegistration* Register_PRELU() {
1635   static TfLiteRegistration r = {
1636       activations::PreluInit, activations::PreluFree, activations::PreluPrepare,
1637       activations::PreluEval<activations::kGenericOptimized>};
1638   return &r;
1639 }
1640 
Register_LEAKY_RELU()1641 TfLiteRegistration* Register_LEAKY_RELU() {
1642   static TfLiteRegistration r = {
1643       activations::LeakyReluInit, activations::LeakyReluFree,
1644       activations::LeakyReluPrepare, activations::LeakyReluEval};
1645   return &r;
1646 }
1647 
Register_HARD_SWISH()1648 TfLiteRegistration* Register_HARD_SWISH() {
1649   static TfLiteRegistration r = {
1650       activations::HardSwishInit, activations::HardSwishFree,
1651       activations::HardSwishPrepare,
1652       activations::HardSwishEval<activations::kGenericOptimized>};
1653   return &r;
1654 }
1655 
Register_HARD_SWISH_REF()1656 TfLiteRegistration* Register_HARD_SWISH_REF() {
1657   static TfLiteRegistration r = {
1658       activations::HardSwishInit, activations::HardSwishFree,
1659       activations::HardSwishPrepare,
1660       activations::HardSwishEval<activations::kReference>};
1661   return &r;
1662 }
1663 
1664 }  // namespace builtin
1665 }  // namespace ops
1666 }  // namespace tflite
1667