• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
17 
18 #include <algorithm>
19 
20 #include "ruy/profiler/instrumentation.h"  // from @ruy
21 #include "tensorflow/lite/kernels/internal/common.h"
22 #include "tensorflow/lite/kernels/internal/cppmath.h"
23 #include "tensorflow/lite/kernels/internal/max.h"
24 #include "tensorflow/lite/kernels/internal/min.h"
25 #include "tensorflow/lite/kernels/internal/quantization_util.h"
26 #include "tensorflow/lite/kernels/internal/types.h"
27 
28 // Check if the reduction at index is the first one along the dimensions given
29 // in axis.
IsFirstReduction(const int * index,const int num_axis,const int * axis)30 inline bool IsFirstReduction(const int* index, const int num_axis,
31                              const int* axis) {
32   if (num_axis == 0) {
33     return true;
34   }
35 
36   TFLITE_DCHECK(index != nullptr);
37   TFLITE_DCHECK(axis != nullptr);
38   for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
39     if (index[axis[axis_idx]] != 0) {
40       return false;
41     }
42   }
43 
44   return true;
45 }
46 
47 namespace tflite {
48 
49 namespace reference_ops {
50 
51 // A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
52 // This method iterates through input data and reduce elements along the
53 // dimensions given in axis.
54 template <typename In, typename Out>
Reduce(const In * input_data,const int * input_dims,const int * output_dims,const int input_num_dims,const int output_num_dims,const int * axis,const int num_axis,int * input_iter,Out reducer (Out current,const In in),Out * output_data)55 inline bool Reduce(const In* input_data, const int* input_dims,
56                    const int* output_dims, const int input_num_dims,
57                    const int output_num_dims, const int* axis,
58                    const int num_axis, int* input_iter,
59                    Out reducer(Out current, const In in), Out* output_data) {
60   // Reset input iterator.
61   for (int idx = 0; idx < input_num_dims; ++idx) {
62     input_iter[idx] = 0;
63   }
64   // Iterate through input_data.
65   do {
66     size_t input_offset =
67         ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
68     size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
69                                                input_iter, num_axis, axis);
70     output_data[output_offset] =
71         reducer(output_data[output_offset], input_data[input_offset]);
72   } while (NextIndex(input_num_dims, input_dims, input_iter));
73   return true;
74 }
75 
76 // Similar to above Reduce function but takes two reducer functions.
77 // The 'reducer_first' is called with the first value of the reduction,
78 // 'reducer_next' is then called for all the others.
79 template <typename In, typename Out>
Reduce(const In * input_data,const int * input_dims,const int * output_dims,const int input_num_dims,const int output_num_dims,const int * axis,const int num_axis,int * input_iter,const std::function<Out (In in)> & reducer_first,const std::function<Out (Out current,In in)> & reducer_next,Out * output_data)80 inline bool Reduce(const In* input_data, const int* input_dims,
81                    const int* output_dims, const int input_num_dims,
82                    const int output_num_dims, const int* axis,
83                    const int num_axis, int* input_iter,
84                    const std::function<Out(In in)>& reducer_first,
85                    const std::function<Out(Out current, In in)>& reducer_next,
86                    Out* output_data) {
87   // Reset input iterator.
88   for (int idx = 0; idx < input_num_dims; ++idx) {
89     input_iter[idx] = 0;
90   }
91   // Iterate through input_data.
92   do {
93     size_t input_offset =
94         ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
95     size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
96                                                input_iter, num_axis, axis);
97     if (IsFirstReduction(input_iter, num_axis, axis)) {
98       output_data[output_offset] = reducer_first(input_data[input_offset]);
99     } else {
100       output_data[output_offset] =
101           reducer_next(output_data[output_offset], input_data[input_offset]);
102     }
103   } while (NextIndex(input_num_dims, input_dims, input_iter));
104   return true;
105 }
106 
107 // This method parses the input 'axis' to remove duplicates and handle negative
108 // values, and returns a valid 'out_axis'
ResolveAxis(const int num_dims,const int * axis,const int64_t num_axis,int * out_axis,int * out_num_axis)109 inline bool ResolveAxis(const int num_dims, const int* axis,
110                         const int64_t num_axis, int* out_axis,
111                         int* out_num_axis) {
112   *out_num_axis = 0;  // Just in case.
113   // Short-circuit axis resolution for scalars; the axis will go unused.
114   if (num_dims == 0) {
115     return true;
116   }
117   // o(n^2) is fine since out_num_axis should be really small, mostly <= 4
118   for (int64_t idx = 0; idx < num_axis; ++idx) {
119     // Handle negative index. A positive index 'p_idx' can be represented as a
120     // negative index 'n_idx' as: n_idx = p_idx-num_dims
121     // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1]  */
122     int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
123     TFLITE_DCHECK(current >= 0 && current < num_dims);
124     if (current < 0 || current >= num_dims) {
125       return false;
126     }
127     bool is_dup = false;
128     for (int j = 0; j < *out_num_axis; ++j) {
129       if (out_axis[j] == current) {
130         is_dup = true;
131         break;
132       }
133     }
134     if (!is_dup) {
135       out_axis[*out_num_axis] = current;
136       *out_num_axis += 1;
137     }
138   }
139   return true;
140 }
141 
142 // This method expects that output_data has been initialized.
143 template <typename In, typename Out>
ReduceSumImpl(const In * input_data,const int * input_dims,const int * output_dims,const int input_num_dims,const int output_num_dims,const int * axis,const int num_axis,int * input_iter,Out * output_data)144 inline bool ReduceSumImpl(const In* input_data, const int* input_dims,
145                           const int* output_dims, const int input_num_dims,
146                           const int output_num_dims, const int* axis,
147                           const int num_axis, int* input_iter,
148                           Out* output_data) {
149   auto reducer = [](const Out current, const In in) -> Out {
150     const Out actual_in = static_cast<Out>(in);
151     return current + actual_in;
152   };
153   return Reduce<In, Out>(input_data, input_dims, output_dims, input_num_dims,
154                          output_num_dims, axis, num_axis, input_iter, reducer,
155                          output_data);
156 }
157 
158 template <typename T>
InitTensorDataForReduce(const int * dims,const int num_dims,const T init_value,T * data)159 inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
160                                     const T init_value, T* data) {
161   size_t num_elements = 1;
162   for (int idx = 0; idx < num_dims; ++idx) {
163     size_t current = static_cast<size_t>(dims[idx]);
164     // Overflow prevention.
165     if (current > 0 &&
166         num_elements > std::numeric_limits<size_t>::max() / current) {
167       return false;
168     }
169     num_elements *= current;
170   }
171   for (size_t idx = 0; idx < num_elements; ++idx) {
172     data[idx] = init_value;
173   }
174   return true;
175 }
176 
177 // Computes the generic value (i.e., sum/max/min/prod) of elements across
178 // dimensions given in axis. It needs to pass in init_value and reducer.
179 template <typename T>
ReduceGeneric(const T * input_data,const int * input_dims,const int input_num_dims,T * output_data,const int * output_dims,const int output_num_dims,const int * axis,const int64_t num_axis_dimensions,bool keep_dims,int * temp_index,int * resolved_axis,T init_value,T reducer (const T current,const T in))180 inline bool ReduceGeneric(const T* input_data, const int* input_dims,
181                           const int input_num_dims, T* output_data,
182                           const int* output_dims, const int output_num_dims,
183                           const int* axis, const int64_t num_axis_dimensions,
184                           bool keep_dims, int* temp_index, int* resolved_axis,
185                           T init_value,
186                           T reducer(const T current, const T in)) {
187   // Reset output data.
188   if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
189                                output_data)) {
190     return false;
191   }
192 
193   // Return early when input shape has zero dim. This is done after initializing
194   // data for output tensor because there are cases that the input tensor is
195   // empty but output tensor is not. In that case, output tensor should be
196   // filled with init_value.
197   for (int i = 0; i < input_num_dims; ++i) {
198     if (input_dims[i] == 0) return true;
199   }
200 
201   // Resolve axis.
202   int num_resolved_axis = 0;
203   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
204                    &num_resolved_axis)) {
205     return false;
206   }
207 
208   return Reduce<T, T>(input_data, input_dims, output_dims, input_num_dims,
209                       output_num_dims, resolved_axis, num_resolved_axis,
210                       temp_index, reducer, output_data);
211 }
212 
213 // Computes the mean of elements across dimensions given in axis.
214 // It does so in two stages, first calculates the sum of elements along the axis
215 // then divides it by the number of element in axis.
216 template <typename T, typename U>
Mean(const T * input_data,const int * input_dims,const int input_num_dims,T * output_data,const int * output_dims,const int output_num_dims,const int * axis,const int num_axis_dimensions,bool keep_dims,int * temp_index,int * resolved_axis,U * temp_sum)217 inline bool Mean(const T* input_data, const int* input_dims,
218                  const int input_num_dims, T* output_data,
219                  const int* output_dims, const int output_num_dims,
220                  const int* axis, const int num_axis_dimensions, bool keep_dims,
221                  int* temp_index, int* resolved_axis, U* temp_sum) {
222   ruy::profiler::ScopeLabel label("Mean");
223   // Reset output data.
224   size_t num_outputs = 1;
225   for (int idx = 0; idx < output_num_dims; ++idx) {
226     size_t current = static_cast<size_t>(output_dims[idx]);
227     // Overflow prevention.
228     if (num_outputs > std::numeric_limits<size_t>::max() / current) {
229       return false;
230     }
231     num_outputs *= current;
232   }
233   for (size_t idx = 0; idx < num_outputs; ++idx) {
234     output_data[idx] = T();
235     temp_sum[idx] = U();
236   }
237 
238   // Resolve axis.
239   int num_resolved_axis = 0;
240   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
241                    &num_resolved_axis)) {
242     return false;
243   }
244 
245   if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
246                            output_num_dims, resolved_axis, num_resolved_axis,
247                            temp_index, temp_sum)) {
248     return false;
249   }
250 
251   // Calculate mean by dividing output_data by num of aggregated element.
252   size_t num_elements_in_axis = 1;
253   for (int idx = 0; idx < num_resolved_axis; ++idx) {
254     size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
255     // Overflow prevention.
256     if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
257       return false;
258     }
259     num_elements_in_axis *= current;
260   }
261 
262   if (num_elements_in_axis > 0) {
263     for (size_t idx = 0; idx < num_outputs; ++idx) {
264       output_data[idx] =
265           static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
266     }
267   }
268   return true;
269 }
270 
271 template <typename T>
Mean(const tflite::MeanParams & op_params,const RuntimeShape & unextended_input_shape,const T * input_data,const RuntimeShape & unextended_output_shape,T * output_data)272 inline void Mean(const tflite::MeanParams& op_params,
273                  const RuntimeShape& unextended_input_shape,
274                  const T* input_data,
275                  const RuntimeShape& unextended_output_shape, T* output_data) {
276   ruy::profiler::ScopeLabel label("Mean4D");
277 
278   // Current implementation only supports dimension equals 4 and simultaneous
279   // reduction over width and height.
280   TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
281   TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
282   const RuntimeShape input_shape =
283       RuntimeShape::ExtendedShape(4, unextended_input_shape);
284   const RuntimeShape output_shape =
285       RuntimeShape::ExtendedShape(4, unextended_output_shape);
286 
287   const int output_batch = output_shape.Dims(0);
288   const int output_height = output_shape.Dims(1);
289   const int output_width = output_shape.Dims(2);
290   const int output_depth = output_shape.Dims(3);
291 
292   const int input_height = input_shape.Dims(1);
293   const int input_width = input_shape.Dims(2);
294 
295   TFLITE_CHECK_EQ(op_params.axis_count, 2);
296   TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
297                (op_params.axis[0] == 2 && op_params.axis[1] == 1));
298   TFLITE_CHECK_EQ(output_height, 1);
299   TFLITE_CHECK_EQ(output_width, 1);
300 
301   for (int out_b = 0; out_b < output_batch; ++out_b) {
302     for (int out_d = 0; out_d < output_depth; ++out_d) {
303       float value = 0;
304       for (int in_h = 0; in_h < input_height; ++in_h) {
305         for (int in_w = 0; in_w < input_width; ++in_w) {
306           value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
307         }
308       }
309       output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
310           value / (input_width * input_height);
311     }
312   }
313 }
314 
Mean(const tflite::MeanParams & op_params,const RuntimeShape & unextended_input_shape,const uint8_t * input_data,int32_t input_zero_point,float input_scale,const RuntimeShape & unextended_output_shape,uint8_t * output_data,int32_t output_zero_point,float output_scale)315 inline void Mean(const tflite::MeanParams& op_params,
316                  const RuntimeShape& unextended_input_shape,
317                  const uint8_t* input_data, int32_t input_zero_point,
318                  float input_scale, const RuntimeShape& unextended_output_shape,
319                  uint8_t* output_data, int32_t output_zero_point,
320                  float output_scale) {
321   ruy::profiler::ScopeLabel label("Mean4D/Uint8");
322 
323   // Current implementation only supports dimension equals 4 and simultaneous
324   // reduction over width and height.
325   TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
326   TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
327   const RuntimeShape input_shape =
328       RuntimeShape::ExtendedShape(4, unextended_input_shape);
329   const RuntimeShape output_shape =
330       RuntimeShape::ExtendedShape(4, unextended_output_shape);
331   const int output_batch = output_shape.Dims(0);
332   const int output_height = output_shape.Dims(1);
333   const int output_width = output_shape.Dims(2);
334   const int output_depth = output_shape.Dims(3);
335   const int input_height = input_shape.Dims(1);
336   const int input_width = input_shape.Dims(2);
337   const float num_elements_in_axis = input_width * input_height;
338 
339   TFLITE_CHECK_EQ(op_params.axis_count, 2);
340   TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
341                (op_params.axis[0] == 2 && op_params.axis[1] == 1));
342   TFLITE_CHECK_EQ(output_height, 1);
343   TFLITE_CHECK_EQ(output_width, 1);
344 
345   constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
346   constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
347 
348   float temp = input_zero_point * input_scale / output_scale;
349   temp = temp > 0 ? temp + 0.5f : temp - 0.5f;
350   int32_t bias = output_zero_point - static_cast<int32_t>(temp);
351   double real_scale =
352       static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
353 
354   int32_t multiplier;
355   int shift;
356   QuantizeMultiplier(real_scale, &multiplier, &shift);
357   for (int out_b = 0; out_b < output_batch; ++out_b) {
358     for (int out_d = 0; out_d < output_depth; ++out_d) {
359       int32_t acc = 0;
360       for (int in_h = 0; in_h < input_height; ++in_h) {
361         for (int in_w = 0; in_w < input_width; ++in_w) {
362           acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
363         }
364       }
365       acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
366       acc += bias;
367       acc = std::min(std::max(acc, kMinValue), kMaxValue);
368       output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
369           static_cast<uint8_t>(acc);
370     }
371   }
372 }
373 
374 // Computes the mean of elements across dimensions given in axis.
375 // It does so in two stages, first calculates the sum of elements along the axis
376 // then divides it by the number of element in axis for quantized values.
377 template <typename T, typename U>
QuantizedMeanOrSum(const T * input_data,int32_t input_zero_point,float input_scale,const int * input_dims,const int input_num_dims,T * output_data,int32_t output_zero_point,float output_scale,const int * output_dims,const int output_num_dims,const int * axis,const int num_axis_dimensions,bool keep_dims,int * temp_index,int * resolved_axis,U * temp_sum,bool compute_sum)378 inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
379                                float input_scale, const int* input_dims,
380                                const int input_num_dims, T* output_data,
381                                int32_t output_zero_point, float output_scale,
382                                const int* output_dims,
383                                const int output_num_dims, const int* axis,
384                                const int num_axis_dimensions, bool keep_dims,
385                                int* temp_index, int* resolved_axis, U* temp_sum,
386                                bool compute_sum) {
387   const bool uint8_case = std::is_same<T, uint8_t>::value;
388   const bool int16_case = std::is_same<T, int16_t>::value;
389   if (uint8_case) {
390     ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8");
391   } else if (int16_case) {
392     ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16");
393   } else {
394     ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8");
395   }
396   // Reset output data.
397   size_t num_outputs = 1;
398   for (int idx = 0; idx < output_num_dims; ++idx) {
399     size_t current = static_cast<size_t>(output_dims[idx]);
400     // Overflow prevention.
401     if (num_outputs > std::numeric_limits<size_t>::max() / current) {
402       return false;
403     }
404     num_outputs *= current;
405   }
406   for (size_t idx = 0; idx < num_outputs; ++idx) {
407     output_data[idx] = T();
408     temp_sum[idx] = U();
409   }
410 
411   // Return early when input shape has zero dim. This is done after initializing
412   // data for output tensor because there are cases that the input tensor is
413   // empty but output tensor is not. In that case, output tensor should be
414   // filled with init_value.
415   for (int i = 0; i < input_num_dims; ++i) {
416     if (input_dims[i] == 0) return true;
417   }
418 
419   // Resolve axis.
420   int num_resolved_axis = 0;
421   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
422                    &num_resolved_axis)) {
423     return false;
424   }
425 
426   if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
427                            output_num_dims, resolved_axis, num_resolved_axis,
428                            temp_index, temp_sum)) {
429     return false;
430   }
431 
432   // Calculate mean by dividing output_data by num of aggregated element.
433   size_t num_elements_in_axis = 1;
434   for (int idx = 0; idx < num_resolved_axis; ++idx) {
435     size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
436     // Overflow prevention.
437     if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
438       return false;
439     }
440     num_elements_in_axis *= current;
441   }
442 
443   if (num_elements_in_axis > 0) {
444     const float scale = input_scale / output_scale;
445     if (compute_sum) {
446       // TODO(b/116341117): Eliminate float and do this completely in 8bit.
447       const float bias = -input_zero_point * scale * num_elements_in_axis;
448       for (size_t idx = 0; idx < num_outputs; ++idx) {
449         const U value =
450             static_cast<U>(TfLiteRound(temp_sum[idx] * scale + bias)) +
451             output_zero_point;
452         output_data[idx] = static_cast<T>(value);
453       }
454     } else {
455       const float bias = -input_zero_point * scale;
456       for (size_t idx = 0; idx < num_outputs; ++idx) {
457         float float_mean = static_cast<float>(temp_sum[idx]) /
458                            static_cast<float>(num_elements_in_axis);
459         float result = TfLiteMin(
460             TfLiteRound(float_mean * scale + bias) + output_zero_point,
461             static_cast<float>(std::numeric_limits<T>::max()));
462         result = TfLiteMax(result,
463                            static_cast<float>(std::numeric_limits<T>::min()));
464         output_data[idx] = static_cast<T>(result);
465       }
466     }
467   }
468   return true;
469 }
470 
471 template <typename T>
QuantizedReduceProd(const T * input_data,int32_t input_zero_point,const RuntimeShape & input_shape,T * output_data,int32_t output_zero_point,const RuntimeShape & output_shape,const int * axis,const int64_t num_axis_dimensions,bool keep_dims,int * temp_index,int * resolved_axis,int32_t * temp_prod,int32_t scaling_multiplier,int scaling_shift)472 inline bool QuantizedReduceProd(const T* input_data, int32_t input_zero_point,
473                                 const RuntimeShape& input_shape, T* output_data,
474                                 int32_t output_zero_point,
475                                 const RuntimeShape& output_shape,
476                                 const int* axis,
477                                 const int64_t num_axis_dimensions,
478                                 bool keep_dims, int* temp_index,
479                                 int* resolved_axis, int32_t* temp_prod,
480                                 int32_t scaling_multiplier, int scaling_shift) {
481   const int32_t kMinValue = std::numeric_limits<T>::min();
482   const int32_t kMaxValue = std::numeric_limits<T>::max();
483 
484   // Resolve axis.
485   int num_resolved_axis = 0;
486   if (!ResolveAxis(input_shape.DimensionsCount(), axis, num_axis_dimensions,
487                    resolved_axis, &num_resolved_axis)) {
488     return false;
489   }
490 
491   // Calculate the reduced product by rescaling each multiplication step to
492   // avoid an overflow.
493   auto reducer_first = [&](T in) -> int32_t { return in - input_zero_point; };
494 
495   auto reducer_next = [&](int32_t current, T in) -> int32_t {
496     const int64_t result =
497         static_cast<int64_t>(current) * (in - input_zero_point);
498     return MultiplyByQuantizedMultiplier(result, scaling_multiplier,
499                                          scaling_shift);
500   };
501 
502   if (!Reduce<T, int32_t>(
503           input_data, input_shape.DimsData(), output_shape.DimsData(),
504           input_shape.DimensionsCount(), output_shape.DimensionsCount(),
505           resolved_axis, num_resolved_axis, temp_index, reducer_first,
506           reducer_next, temp_prod)) {
507     return false;
508   }
509 
510   for (int i = 0; i < output_shape.FlatSize(); i++) {
511     int32_t result =
512         MultiplyByQuantizedMultiplier(static_cast<int64_t>(temp_prod[i]),
513                                       scaling_multiplier, scaling_shift) +
514         output_zero_point;
515     result = std::min(std::max(result, kMinValue), kMaxValue);
516     output_data[i] = static_cast<T>(result);
517   }
518 
519   return true;
520 }
521 
522 }  // namespace reference_ops
523 
524 }  // namespace tflite
525 
526 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
527