• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
17 
18 #include "ruy/profiler/instrumentation.h"  // from @ruy
19 #include "tensorflow/lite/kernels/internal/common.h"
20 #include "tensorflow/lite/kernels/internal/cppmath.h"
21 #include "tensorflow/lite/kernels/internal/max.h"
22 #include "tensorflow/lite/kernels/internal/min.h"
23 #include "tensorflow/lite/kernels/internal/quantization_util.h"
24 #include "tensorflow/lite/kernels/internal/types.h"
25 
26 // Check if the reduction at index is the first one along the dimensions given
27 // in axis.
IsFirstReduction(const int * index,const int num_axis,const int * axis)28 inline bool IsFirstReduction(const int* index, const int num_axis,
29                              const int* axis) {
30   if (num_axis == 0) {
31     return true;
32   }
33 
34   TFLITE_DCHECK(index != nullptr);
35   TFLITE_DCHECK(axis != nullptr);
36   for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
37     if (index[axis[axis_idx]] != 0) {
38       return false;
39     }
40   }
41 
42   return true;
43 }
44 
45 namespace tflite {
46 
47 namespace reference_ops {
48 
49 // A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
50 // This method iterates through input data and reduce elements along the
51 // dimensions given in axis.
52 template <typename In, typename Out>
Reduce(const In * input_data,const int * input_dims,const int * output_dims,const int input_num_dims,const int output_num_dims,const int * axis,const int num_axis,int * input_iter,Out reducer (Out current,const In in),Out * output_data)53 inline bool Reduce(const In* input_data, const int* input_dims,
54                    const int* output_dims, const int input_num_dims,
55                    const int output_num_dims, const int* axis,
56                    const int num_axis, int* input_iter,
57                    Out reducer(Out current, const In in), Out* output_data) {
58   // Reset input iterator.
59   for (int idx = 0; idx < input_num_dims; ++idx) {
60     input_iter[idx] = 0;
61   }
62   // Iterate through input_data.
63   do {
64     size_t input_offset =
65         ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
66     size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
67                                                input_iter, num_axis, axis);
68     output_data[output_offset] =
69         reducer(output_data[output_offset], input_data[input_offset]);
70   } while (NextIndex(input_num_dims, input_dims, input_iter));
71   return true;
72 }
73 
74 // Similar to above Reduce function but takes two reducer functions.
75 // The 'reducer_first' is called with the first value of the reduction,
76 // 'reducer_next' is then called for all the others.
77 template <typename In, typename Out>
Reduce(const In * input_data,const int * input_dims,const int * output_dims,const int input_num_dims,const int output_num_dims,const int * axis,const int num_axis,int * input_iter,const std::function<Out (In in)> & reducer_first,const std::function<Out (Out current,In in)> & reducer_next,Out * output_data)78 inline bool Reduce(const In* input_data, const int* input_dims,
79                    const int* output_dims, const int input_num_dims,
80                    const int output_num_dims, const int* axis,
81                    const int num_axis, int* input_iter,
82                    const std::function<Out(In in)>& reducer_first,
83                    const std::function<Out(Out current, In in)>& reducer_next,
84                    Out* output_data) {
85   // Reset input iterator.
86   for (int idx = 0; idx < input_num_dims; ++idx) {
87     input_iter[idx] = 0;
88   }
89   // Iterate through input_data.
90   do {
91     size_t input_offset =
92         ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
93     size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
94                                                input_iter, num_axis, axis);
95     if (IsFirstReduction(input_iter, num_axis, axis)) {
96       output_data[output_offset] = reducer_first(input_data[input_offset]);
97     } else {
98       output_data[output_offset] =
99           reducer_next(output_data[output_offset], input_data[input_offset]);
100     }
101   } while (NextIndex(input_num_dims, input_dims, input_iter));
102   return true;
103 }
104 
105 // This method parses the input 'axis' to remove duplicates and handle negative
106 // values, and returns a valid 'out_axis'
ResolveAxis(const int num_dims,const int * axis,const int64_t num_axis,int * out_axis,int * out_num_axis)107 inline bool ResolveAxis(const int num_dims, const int* axis,
108                         const int64_t num_axis, int* out_axis,
109                         int* out_num_axis) {
110   *out_num_axis = 0;  // Just in case.
111   // Short-circuit axis resolution for scalars; the axis will go unused.
112   if (num_dims == 0) {
113     return true;
114   }
115   // o(n^2) is fine since out_num_axis should be really small, mostly <= 4
116   for (int64_t idx = 0; idx < num_axis; ++idx) {
117     // Handle negative index. A positive index 'p_idx' can be represented as a
118     // negative index 'n_idx' as: n_idx = p_idx-num_dims
119     // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1]  */
120     int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
121     TFLITE_DCHECK(current >= 0 && current < num_dims);
122     if (current < 0 || current >= num_dims) {
123       return false;
124     }
125     bool is_dup = false;
126     for (int j = 0; j < *out_num_axis; ++j) {
127       if (out_axis[j] == current) {
128         is_dup = true;
129         break;
130       }
131     }
132     if (!is_dup) {
133       out_axis[*out_num_axis] = current;
134       *out_num_axis += 1;
135     }
136   }
137   return true;
138 }
139 
140 // This method expects that output_data has been initialized.
141 template <typename In, typename Out>
ReduceSumImpl(const In * input_data,const int * input_dims,const int * output_dims,const int input_num_dims,const int output_num_dims,const int * axis,const int num_axis,int * input_iter,Out * output_data)142 inline bool ReduceSumImpl(const In* input_data, const int* input_dims,
143                           const int* output_dims, const int input_num_dims,
144                           const int output_num_dims, const int* axis,
145                           const int num_axis, int* input_iter,
146                           Out* output_data) {
147   auto reducer = [](const Out current, const In in) -> Out {
148     const Out actual_in = static_cast<Out>(in);
149     return current + actual_in;
150   };
151   return Reduce<In, Out>(input_data, input_dims, output_dims, input_num_dims,
152                          output_num_dims, axis, num_axis, input_iter, reducer,
153                          output_data);
154 }
155 
156 template <typename T>
InitTensorDataForReduce(const int * dims,const int num_dims,const T init_value,T * data)157 inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
158                                     const T init_value, T* data) {
159   size_t num_elements = 1;
160   for (int idx = 0; idx < num_dims; ++idx) {
161     size_t current = static_cast<size_t>(dims[idx]);
162     // Overflow prevention.
163     if (current > 0 &&
164         num_elements > std::numeric_limits<size_t>::max() / current) {
165       return false;
166     }
167     num_elements *= current;
168   }
169   for (size_t idx = 0; idx < num_elements; ++idx) {
170     data[idx] = init_value;
171   }
172   return true;
173 }
174 
175 // Computes the generic value (i.e., sum/max/min/prod) of elements across
176 // dimensions given in axis. It needs to pass in init_value and reducer.
177 template <typename T>
ReduceGeneric(const T * input_data,const int * input_dims,const int input_num_dims,T * output_data,const int * output_dims,const int output_num_dims,const int * axis,const int64_t num_axis_dimensions,bool keep_dims,int * temp_index,int * resolved_axis,T init_value,T reducer (const T current,const T in))178 inline bool ReduceGeneric(const T* input_data, const int* input_dims,
179                           const int input_num_dims, T* output_data,
180                           const int* output_dims, const int output_num_dims,
181                           const int* axis, const int64_t num_axis_dimensions,
182                           bool keep_dims, int* temp_index, int* resolved_axis,
183                           T init_value,
184                           T reducer(const T current, const T in)) {
185   // Reset output data.
186   if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
187                                output_data)) {
188     return false;
189   }
190 
191   // Return early when input shape has zero dim. This is done after initializing
192   // data for output tensor because there are cases that the input tensor is
193   // empty but output tensor is not. In that case, output tensor should be
194   // filled with init_value.
195   for (int i = 0; i < input_num_dims; ++i) {
196     if (input_dims[i] == 0) return true;
197   }
198 
199   // Resolve axis.
200   int num_resolved_axis = 0;
201   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
202                    &num_resolved_axis)) {
203     return false;
204   }
205 
206   return Reduce<T, T>(input_data, input_dims, output_dims, input_num_dims,
207                       output_num_dims, resolved_axis, num_resolved_axis,
208                       temp_index, reducer, output_data);
209 }
210 
211 // Computes the mean of elements across dimensions given in axis.
212 // It does so in two stages, first calculates the sum of elements along the axis
213 // then divides it by the number of element in axis.
214 template <typename T, typename U>
Mean(const T * input_data,const int * input_dims,const int input_num_dims,T * output_data,const int * output_dims,const int output_num_dims,const int * axis,const int num_axis_dimensions,bool keep_dims,int * temp_index,int * resolved_axis,U * temp_sum)215 inline bool Mean(const T* input_data, const int* input_dims,
216                  const int input_num_dims, T* output_data,
217                  const int* output_dims, const int output_num_dims,
218                  const int* axis, const int num_axis_dimensions, bool keep_dims,
219                  int* temp_index, int* resolved_axis, U* temp_sum) {
220   ruy::profiler::ScopeLabel label("Mean");
221   // Reset output data.
222   size_t num_outputs = 1;
223   for (int idx = 0; idx < output_num_dims; ++idx) {
224     size_t current = static_cast<size_t>(output_dims[idx]);
225     // Overflow prevention.
226     if (num_outputs > std::numeric_limits<size_t>::max() / current) {
227       return false;
228     }
229     num_outputs *= current;
230   }
231   for (size_t idx = 0; idx < num_outputs; ++idx) {
232     output_data[idx] = T();
233     temp_sum[idx] = U();
234   }
235 
236   // Resolve axis.
237   int num_resolved_axis = 0;
238   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
239                    &num_resolved_axis)) {
240     return false;
241   }
242 
243   if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
244                            output_num_dims, resolved_axis, num_resolved_axis,
245                            temp_index, temp_sum)) {
246     return false;
247   }
248 
249   // Calculate mean by dividing output_data by num of aggregated element.
250   size_t num_elements_in_axis = 1;
251   for (int idx = 0; idx < num_resolved_axis; ++idx) {
252     size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
253     // Overflow prevention.
254     if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
255       return false;
256     }
257     num_elements_in_axis *= current;
258   }
259 
260   if (num_elements_in_axis > 0) {
261     for (size_t idx = 0; idx < num_outputs; ++idx) {
262       output_data[idx] =
263           static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
264     }
265   }
266   return true;
267 }
268 
269 template <typename T>
Mean(const tflite::MeanParams & op_params,const RuntimeShape & unextended_input_shape,const T * input_data,const RuntimeShape & unextended_output_shape,T * output_data)270 inline void Mean(const tflite::MeanParams& op_params,
271                  const RuntimeShape& unextended_input_shape,
272                  const T* input_data,
273                  const RuntimeShape& unextended_output_shape, T* output_data) {
274   ruy::profiler::ScopeLabel label("Mean4D");
275 
276   // Current implementation only supports dimension equals 4 and simultaneous
277   // reduction over width and height.
278   TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
279   TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
280   const RuntimeShape input_shape =
281       RuntimeShape::ExtendedShape(4, unextended_input_shape);
282   const RuntimeShape output_shape =
283       RuntimeShape::ExtendedShape(4, unextended_output_shape);
284 
285   const int output_batch = output_shape.Dims(0);
286   const int output_height = output_shape.Dims(1);
287   const int output_width = output_shape.Dims(2);
288   const int output_depth = output_shape.Dims(3);
289 
290   const int input_height = input_shape.Dims(1);
291   const int input_width = input_shape.Dims(2);
292 
293   TFLITE_CHECK_EQ(op_params.axis_count, 2);
294   TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
295                (op_params.axis[0] == 2 && op_params.axis[1] == 1));
296   TFLITE_CHECK_EQ(output_height, 1);
297   TFLITE_CHECK_EQ(output_width, 1);
298 
299   for (int out_b = 0; out_b < output_batch; ++out_b) {
300     for (int out_d = 0; out_d < output_depth; ++out_d) {
301       float value = 0;
302       for (int in_h = 0; in_h < input_height; ++in_h) {
303         for (int in_w = 0; in_w < input_width; ++in_w) {
304           value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
305         }
306       }
307       output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
308           value / (input_width * input_height);
309     }
310   }
311 }
312 
Mean(const tflite::MeanParams & op_params,const RuntimeShape & unextended_input_shape,const uint8_t * input_data,int32_t input_zero_point,float input_scale,const RuntimeShape & unextended_output_shape,uint8_t * output_data,int32_t output_zero_point,float output_scale)313 inline void Mean(const tflite::MeanParams& op_params,
314                  const RuntimeShape& unextended_input_shape,
315                  const uint8_t* input_data, int32_t input_zero_point,
316                  float input_scale, const RuntimeShape& unextended_output_shape,
317                  uint8_t* output_data, int32_t output_zero_point,
318                  float output_scale) {
319   ruy::profiler::ScopeLabel label("Mean4D/Uint8");
320 
321   // Current implementation only supports dimension equals 4 and simultaneous
322   // reduction over width and height.
323   TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
324   TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
325   const RuntimeShape input_shape =
326       RuntimeShape::ExtendedShape(4, unextended_input_shape);
327   const RuntimeShape output_shape =
328       RuntimeShape::ExtendedShape(4, unextended_output_shape);
329   const int output_batch = output_shape.Dims(0);
330   const int output_height = output_shape.Dims(1);
331   const int output_width = output_shape.Dims(2);
332   const int output_depth = output_shape.Dims(3);
333   const int input_height = input_shape.Dims(1);
334   const int input_width = input_shape.Dims(2);
335   const float num_elements_in_axis = input_width * input_height;
336 
337   TFLITE_CHECK_EQ(op_params.axis_count, 2);
338   TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
339                (op_params.axis[0] == 2 && op_params.axis[1] == 1));
340   TFLITE_CHECK_EQ(output_height, 1);
341   TFLITE_CHECK_EQ(output_width, 1);
342 
343   constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
344   constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
345 
346   float temp = input_zero_point * input_scale / output_scale;
347   temp = temp > 0 ? temp + 0.5f : temp - 0.5f;
348   int32_t bias = output_zero_point - static_cast<int32_t>(temp);
349   double real_scale =
350       static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
351 
352   int32_t multiplier;
353   int shift;
354   QuantizeMultiplier(real_scale, &multiplier, &shift);
355   for (int out_b = 0; out_b < output_batch; ++out_b) {
356     for (int out_d = 0; out_d < output_depth; ++out_d) {
357       int32_t acc = 0;
358       for (int in_h = 0; in_h < input_height; ++in_h) {
359         for (int in_w = 0; in_w < input_width; ++in_w) {
360           acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
361         }
362       }
363       acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
364       acc += bias;
365       acc = std::min(std::max(acc, kMinValue), kMaxValue);
366       output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
367           static_cast<uint8_t>(acc);
368     }
369   }
370 }
371 
372 // Computes the mean of elements across dimensions given in axis.
373 // It does so in two stages, first calculates the sum of elements along the axis
374 // then divides it by the number of element in axis for quantized values.
375 template <typename T, typename U>
QuantizedMeanOrSum(const T * input_data,int32_t input_zero_point,float input_scale,const int * input_dims,const int input_num_dims,T * output_data,int32_t output_zero_point,float output_scale,const int * output_dims,const int output_num_dims,const int * axis,const int num_axis_dimensions,bool keep_dims,int * temp_index,int * resolved_axis,U * temp_sum,bool compute_sum)376 inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
377                                float input_scale, const int* input_dims,
378                                const int input_num_dims, T* output_data,
379                                int32_t output_zero_point, float output_scale,
380                                const int* output_dims,
381                                const int output_num_dims, const int* axis,
382                                const int num_axis_dimensions, bool keep_dims,
383                                int* temp_index, int* resolved_axis, U* temp_sum,
384                                bool compute_sum) {
385   const bool uint8_case = std::is_same<T, uint8_t>::value;
386   const bool int16_case = std::is_same<T, int16_t>::value;
387   if (uint8_case) {
388     ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8");
389   } else if (int16_case) {
390     ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16");
391   } else {
392     ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8");
393   }
394   // Reset output data.
395   size_t num_outputs = 1;
396   for (int idx = 0; idx < output_num_dims; ++idx) {
397     size_t current = static_cast<size_t>(output_dims[idx]);
398     // Overflow prevention.
399     if (num_outputs > std::numeric_limits<size_t>::max() / current) {
400       return false;
401     }
402     num_outputs *= current;
403   }
404   for (size_t idx = 0; idx < num_outputs; ++idx) {
405     output_data[idx] = T();
406     temp_sum[idx] = U();
407   }
408 
409   // Return early when input shape has zero dim. This is done after initializing
410   // data for output tensor because there are cases that the input tensor is
411   // empty but output tensor is not. In that case, output tensor should be
412   // filled with init_value.
413   for (int i = 0; i < input_num_dims; ++i) {
414     if (input_dims[i] == 0) return true;
415   }
416 
417   // Resolve axis.
418   int num_resolved_axis = 0;
419   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
420                    &num_resolved_axis)) {
421     return false;
422   }
423 
424   if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
425                            output_num_dims, resolved_axis, num_resolved_axis,
426                            temp_index, temp_sum)) {
427     return false;
428   }
429 
430   // Calculate mean by dividing output_data by num of aggregated element.
431   size_t num_elements_in_axis = 1;
432   for (int idx = 0; idx < num_resolved_axis; ++idx) {
433     size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
434     // Overflow prevention.
435     if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
436       return false;
437     }
438     num_elements_in_axis *= current;
439   }
440 
441   if (num_elements_in_axis > 0) {
442     const float scale = input_scale / output_scale;
443     if (compute_sum) {
444       // TODO(b/116341117): Eliminate float and do this completely in 8bit.
445       const float bias = -input_zero_point * scale * num_elements_in_axis;
446       for (size_t idx = 0; idx < num_outputs; ++idx) {
447         const U value =
448             static_cast<U>(TfLiteRound(temp_sum[idx] * scale + bias)) +
449             output_zero_point;
450         output_data[idx] = static_cast<T>(value);
451       }
452     } else {
453       const float bias = -input_zero_point * scale;
454       for (size_t idx = 0; idx < num_outputs; ++idx) {
455         float float_mean = static_cast<float>(temp_sum[idx]) /
456                            static_cast<float>(num_elements_in_axis);
457         float result = TfLiteMin(
458             TfLiteRound(float_mean * scale + bias) + output_zero_point,
459             static_cast<float>(std::numeric_limits<T>::max()));
460         result = TfLiteMax(result,
461                            static_cast<float>(std::numeric_limits<T>::min()));
462         output_data[idx] = static_cast<T>(result);
463       }
464     }
465   }
466   return true;
467 }
468 
469 template <typename T>
QuantizedReduceProd(const T * input_data,int32_t input_zero_point,const RuntimeShape & input_shape,T * output_data,int32_t output_zero_point,const RuntimeShape & output_shape,const int * axis,const int64_t num_axis_dimensions,bool keep_dims,int * temp_index,int * resolved_axis,int32_t * temp_prod,int32_t scaling_multiplier,int scaling_shift)470 inline bool QuantizedReduceProd(const T* input_data, int32_t input_zero_point,
471                                 const RuntimeShape& input_shape, T* output_data,
472                                 int32_t output_zero_point,
473                                 const RuntimeShape& output_shape,
474                                 const int* axis,
475                                 const int64_t num_axis_dimensions,
476                                 bool keep_dims, int* temp_index,
477                                 int* resolved_axis, int32_t* temp_prod,
478                                 int32_t scaling_multiplier, int scaling_shift) {
479   const int32_t kMinValue = std::numeric_limits<T>::min();
480   const int32_t kMaxValue = std::numeric_limits<T>::max();
481 
482   // Resolve axis.
483   int num_resolved_axis = 0;
484   if (!ResolveAxis(input_shape.DimensionsCount(), axis, num_axis_dimensions,
485                    resolved_axis, &num_resolved_axis)) {
486     return false;
487   }
488 
489   // Calculate the reduced product by rescaling each multiplication step to
490   // avoid an overflow.
491   auto reducer_first = [&](T in) -> int32_t { return in - input_zero_point; };
492 
493   auto reducer_next = [&](int32_t current, T in) -> int32_t {
494     const int64_t result =
495         static_cast<int64_t>(current) * (in - input_zero_point);
496     return MultiplyByQuantizedMultiplier(result, scaling_multiplier,
497                                          scaling_shift);
498   };
499 
500   if (!Reduce<T, int32_t>(
501           input_data, input_shape.DimsData(), output_shape.DimsData(),
502           input_shape.DimensionsCount(), output_shape.DimensionsCount(),
503           resolved_axis, num_resolved_axis, temp_index, reducer_first,
504           reducer_next, temp_prod)) {
505     return false;
506   }
507 
508   for (int i = 0; i < output_shape.FlatSize(); i++) {
509     int32_t result =
510         MultiplyByQuantizedMultiplier(static_cast<int64_t>(temp_prod[i]),
511                                       scaling_multiplier, scaling_shift) +
512         output_zero_point;
513     result = std::min(std::max(result, kMinValue), kMaxValue);
514     output_data[i] = static_cast<T>(result);
515   }
516 
517   return true;
518 }
519 
520 }  // namespace reference_ops
521 
522 }  // namespace tflite
523 
524 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
525