• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/micro/kernels/softmax.h"
17 
18 #include "tensorflow/lite/c/builtin_op_data.h"
19 #include "tensorflow/lite/c/common.h"
20 #include "tensorflow/lite/kernels/internal/common.h"
21 #include "tensorflow/lite/kernels/internal/quantization_util.h"
22 #include "tensorflow/lite/kernels/internal/reference/softmax.h"
23 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
24 #include "tensorflow/lite/kernels/kernel_util.h"
25 #include "tensorflow/lite/kernels/op_macros.h"
26 #include "tensorflow/lite/micro/kernels/kernel_util.h"
27 #include "tensorflow/lite/micro/kernels/xtensa/xtensa.h"
28 
29 namespace tflite {
30 namespace {
31 
32 #if defined(HIFIMINI)
33 struct OpData {
34   uint16_t* exp_lut;
35 };
36 #elif defined(FUSION_F1)
37 struct OpData {
38   SoftmaxParams params;
39   int scratch_tensor_index;
40 };
41 #endif
42 
43 #if defined(HIFIMINI)
44 // Number of unique int8_t and int16_t values.  Used in exponent lookup table
45 // computation.
46 constexpr int kInt8Range =
47     std::numeric_limits<int8_t>::max() - std::numeric_limits<int8_t>::min() + 1;
48 constexpr int kInt16Range = std::numeric_limits<int16_t>::max() -
49                             std::numeric_limits<int16_t>::min() + 1;
50 // Each 16-bit precalculated exponent is expressed as a Q0.16 fixedpoint
51 // value. We special-case e^0 since 1.0 requires 1 integer bit to
52 // express.
53 constexpr int kExpFractionalBits = 16;
54 // e^0 expressed as Q1.15 exceeds the int16_t range, so it must be handled
55 // specially.
56 constexpr int kMaxExponentValue = (1 << kExpFractionalBits);
57 
58 // Quantized softmax with int8_t input and int16_t output.
59 // Passing OpData by value does not have much savings in this op, but following
60 // that as a best practice, at least for the xtensa kernels. See b/155656675 for
61 // more details.
SoftmaxHifimini(OpData op_data,const RuntimeShape & input_shape,const int8_t * input_data,const RuntimeShape & output_shape,int16_t * output_data)62 TfLiteStatus SoftmaxHifimini(OpData op_data, const RuntimeShape& input_shape,
63                              const int8_t* input_data,
64                              const RuntimeShape& output_shape,
65                              int16_t* output_data) {
66   // The last dimension is depth.  Outer size is the total input size
67   // divided by depth.
68   const int trailing_dim = input_shape.DimensionsCount() - 1;
69   const int outer_size =
70       MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
71   const int depth =
72       MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
73 
74   for (int i = 0; i < outer_size; ++i) {
75     int8_t max_in_row = std::numeric_limits<int8_t>::min();
76     for (int c = 0; c < depth; ++c) {
77       max_in_row = std::max(max_in_row, input_data[i * depth + c]);
78     }
79 
80     uint32_t sum_of_exps = 0;
81     for (int c = 0; c < depth; ++c) {
82       TFLITE_DCHECK(max_in_row >= input_data[i * depth + c]);
83       uint8_t input_diff = max_in_row - input_data[i * depth + c];
84 
85       sum_of_exps +=
86           input_diff == 0 ? kMaxExponentValue : op_data.exp_lut[input_diff];
87     }
88 
89     // Ensure we cannot overflow the full_range_output value.  We need to
90     // guarantee that kInt16Range * max(input_data) / sum_of_exps < kInt16Range.
91     TFLITE_DCHECK(sum_of_exps >= kMaxExponentValue);
92 
93     for (int c = 0; c < depth; ++c) {
94       uint8_t input_diff = max_in_row - input_data[i * depth + c];
95       // Special case for diff == 0
96       uint32_t unscaled_output =
97           input_diff == 0 ? kMaxExponentValue : op_data.exp_lut[input_diff];
98       int64_t scaled_output = static_cast<int64_t>(unscaled_output) *
99                               static_cast<int64_t>(kInt16Range);
100       int32_t full_range_output =
101           scaled_output / sum_of_exps + std::numeric_limits<int16_t>::min();
102       // Round up if remainder exceeds half of the divider value.
103       uint32_t remainder = scaled_output % sum_of_exps;
104       if (remainder * 2 >= sum_of_exps) {
105         full_range_output++;
106       }
107       output_data[i * depth + c] = static_cast<int16_t>(std::max(
108           std::min(full_range_output,
109                    static_cast<int32_t>(std::numeric_limits<int16_t>::max())),
110           static_cast<int32_t>(std::numeric_limits<int16_t>::min())));
111     }
112   }
113   return kTfLiteOk;
114 }
115 
CalculateSoftmaxOpDataHifimini(TfLiteContext * context,const TfLiteTensor * input,TfLiteTensor * output,const TfLiteSoftmaxParams * params,OpData * op_data)116 TfLiteStatus CalculateSoftmaxOpDataHifimini(TfLiteContext* context,
117                                             const TfLiteTensor* input,
118                                             TfLiteTensor* output,
119                                             const TfLiteSoftmaxParams* params,
120                                             OpData* op_data) {
121   if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
122     if (input->type == kTfLiteUInt8) {
123       TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
124     } else {
125       if (output->type == kTfLiteInt16) {
126         TF_LITE_ENSURE_EQ(context, output->params.zero_point,
127                           std::numeric_limits<int16_t>::min());
128         // NOTE: Current int16_t softmax output does not require symmetric
129         // scaling
130         // - so no need to verify scale here.
131       } else {
132         TF_LITE_ENSURE_EQ(context, output->params.zero_point,
133                           std::numeric_limits<int8_t>::min());
134         TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
135       }
136     }
137 
138     // Precompute e^(-x * input_scale * beta) for every possible int8_t input.
139     // This computation is used for every iteration of Softmax.  We must compute
140     // using pre-scaled inputs to avoid introducing additional error, while
141     // restricting our input range to the int8_t range. This is valid since beta
142     // and input scale are constant for a given op in the graph. Skip index 0
143     // since that is a special case which requires 1 integer bit instead of 0.
144     for (int i = 1; i <= kInt8Range; i++) {
145       float scaled_input = i * input->params.scale;
146       float exp_value =
147           std::exp((-scaled_input) * static_cast<float>(params->beta));
148 
149       float exponent_scaled =
150           std::round(exp_value * static_cast<float>(1 << kExpFractionalBits));
151       op_data->exp_lut[i] = static_cast<uint16_t>(exponent_scaled);
152     }
153   }
154   return kTfLiteOk;
155 }
156 
PrepareHifimini(TfLiteContext * context,TfLiteNode * node)157 TfLiteStatus PrepareHifimini(TfLiteContext* context, TfLiteNode* node) {
158   auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
159 
160   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
161   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
162   const TfLiteTensor* input = GetInput(context, node, 0);
163   TfLiteTensor* output = GetOutput(context, node, 0);
164   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
165 
166   TFLITE_DCHECK(node->user_data != nullptr);
167   OpData* op_data = static_cast<OpData*>(node->user_data);
168 
169   // Allocate an array to precompute exponents over all int8_t inputs, applying
170   // the scale and beta before calculating exp. It is mandatory to apply beta
171   // and scale here, since each softmax op may have different beta and scale
172   // values. Beta and scale will remain constant for a given softmax op.
173   op_data->exp_lut = static_cast<uint16_t*>(context->AllocatePersistentBuffer(
174       context, (kInt8Range + 1) * sizeof(uint16_t)));
175   TF_LITE_ENSURE(context, op_data->exp_lut != nullptr);
176 
177   TF_LITE_ENSURE_STATUS(
178       CalculateSoftmaxOpDataHifimini(context, input, output, params, op_data));
179 
180   return kTfLiteOk;
181 }
182 #endif  // defined(HIFIMINI)
183 
184 #if defined(FUSION_F1)
PrepareHifi4(TfLiteContext * context,TfLiteNode * node)185 TfLiteStatus PrepareHifi4(TfLiteContext* context, TfLiteNode* node) {
186   TF_LITE_ENSURE_OK(context, SoftmaxPrepare(context, node));
187 
188   // Calculate scratch memory requirements and request scratch buffer
189   const TfLiteTensor* input = GetInput(context, node, 0);
190   const TfLiteTensor* output = GetOutput(context, node, 0);
191 
192   const RuntimeShape& input_shape = GetTensorShape(input);
193   const RuntimeShape& output_shape = GetTensorShape(output);
194   const int trailing_dim = input_shape.DimensionsCount() - 1;
195   const int depth =
196       MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
197 
198   if (input->type == kTfLiteInt8) {
199     int required_scratch =
200         get_softmax_scratch_size(PREC_ASYM8S, PREC_ASYM8S, depth);
201     TF_LITE_ENSURE(context, required_scratch > 0);
202 
203     auto* data = static_cast<OpData*>(node->user_data);
204     TF_LITE_ENSURE_OK(
205         context, context->RequestScratchBufferInArena(
206                      context, required_scratch, &(data->scratch_tensor_index)));
207   }
208 
209   return kTfLiteOk;
210 }
211 
EvalHifi4(const OpData * op_data,const TfLiteEvalTensor * input,TfLiteEvalTensor * output,TfLiteContext * context)212 TfLiteStatus EvalHifi4(const OpData* op_data, const TfLiteEvalTensor* input,
213                        TfLiteEvalTensor* output, TfLiteContext* context) {
214   const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
215   const int8_t* input_data = tflite::micro::GetTensorData<int8_t>(input);
216   const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
217   int16_t* output_data = tflite::micro::GetTensorData<int16_t>(output);
218   const int trailing_dim = input_shape.DimensionsCount() - 1;
219   const int outer_size =
220       MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
221   const int depth =
222       MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
223 
224   void* p_scratch = static_cast<void*>(
225       context->GetScratchBuffer(context, op_data->scratch_tensor_index));
226 
227   for (int i = 0; i < outer_size; ++i) {
228     int err = xa_nn_vec_softmax_asym8s_16(
229         &output_data[i * depth], &input_data[i * depth],
230         op_data->params.diff_min, op_data->params.input_left_shift,
231         op_data->params.input_multiplier, depth, p_scratch);
232     TF_LITE_ENSURE(context, err == 0);
233   }
234   return kTfLiteOk;
235 }
236 
237 #endif  // defined(FUSION_F1)
238 
Init(TfLiteContext * context,const char * buffer,size_t length)239 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
240 #if defined(HIFIMINI) || defined(FUSION_F1)
241   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
242   return context->AllocatePersistentBuffer(context, sizeof(OpData));
243 #else
244   return SoftmaxInit(context, buffer, length);
245 #endif
246 }
247 
Prepare(TfLiteContext * context,TfLiteNode * node)248 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
249 #if defined(HIFIMINI)
250   return PrepareHifimini(context, node);
251 #elif defined(FUSION_F1)
252   return PrepareHifi4(context, node);
253 #else
254   return SoftmaxPrepare(context, node);
255 #endif
256 }
257 
Eval(TfLiteContext * context,TfLiteNode * node)258 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
259   const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
260   TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
261   TFLITE_DCHECK(node->user_data != nullptr);
262 
263 #if defined(HIFIMINI)
264   auto* op_data = static_cast<OpData*>(node->user_data);
265 
266   if (input->type == kTfLiteInt8 && output->type == kTfLiteInt16) {
267     return SoftmaxHifimini(*op_data, tflite::micro::GetTensorShape(input),
268                            tflite::micro::GetTensorData<int8_t>(input),
269                            tflite::micro::GetTensorShape(output),
270                            tflite::micro::GetTensorData<int16_t>(output));
271   } else {
272     TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
273                        TfLiteTypeGetName(input->type), input->type);
274     return kTfLiteError;
275   }
276 #else  // !defined(HIFIMINI)
277   switch (input->type) {
278     case kTfLiteFloat32: {
279       SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
280       tflite::reference_ops::Softmax(
281           op_data, tflite::micro::GetTensorShape(input),
282           tflite::micro::GetTensorData<float>(input),
283           tflite::micro::GetTensorShape(output),
284           tflite::micro::GetTensorData<float>(output));
285       return kTfLiteOk;
286     }
287     case kTfLiteInt8: {
288       if (output->type == kTfLiteInt16) {
289 #if defined(FUSION_F1)
290         return EvalHifi4(static_cast<OpData*>(node->user_data), input, output,
291                          context);
292 #else
293         SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
294         tflite::reference_ops::Softmax(
295             op_data, tflite::micro::GetTensorShape(input),
296             tflite::micro::GetTensorData<int8_t>(input),
297             tflite::micro::GetTensorShape(output),
298             tflite::micro::GetTensorData<int16_t>(output));
299 #endif
300       } else {
301         SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
302         tflite::reference_ops::Softmax(
303             op_data, tflite::micro::GetTensorShape(input),
304             tflite::micro::GetTensorData<int8_t>(input),
305             tflite::micro::GetTensorShape(output),
306             tflite::micro::GetTensorData<int8_t>(output));
307       }
308       return kTfLiteOk;
309     }
310     default:
311       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
312                          TfLiteTypeGetName(input->type), input->type);
313       return kTfLiteError;
314   }
315 #endif  // !defined(HIFIMINI)
316 }
317 
318 }  // namespace
319 
Register_SOFTMAX()320 TfLiteRegistration Register_SOFTMAX() {
321   return {/*init=*/Init,
322           /*free=*/nullptr,
323           /*prepare=*/Prepare,
324           /*invoke=*/Eval,
325           /*profiling_string=*/nullptr,
326           /*builtin_code=*/0,
327           /*custom_name=*/nullptr,
328           /*version=*/0};
329 }
330 
331 }  // namespace tflite
332