• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
16 #define TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
17 
18 #include <stddef.h>
19 #include <stdint.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include <algorithm>
24 #include <cmath>
25 #include <complex>
26 #include <functional>
27 #include <initializer_list>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <string>
32 #include <tuple>
33 #include <type_traits>
34 #include <utility>
35 #include <vector>
36 
37 #include <gmock/gmock.h>
38 #include <gtest/gtest.h>
39 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
40 #include "tensorflow/core/platform/logging.h"
41 #include "tensorflow/lite/core/api/op_resolver.h"
42 #include "tensorflow/lite/interpreter.h"
43 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
44 #include "tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h"
45 #include "tensorflow/lite/schema/schema_generated.h"
46 #include "tensorflow/lite/string_type.h"
47 #include "tensorflow/lite/string_util.h"
48 #include "tensorflow/lite/testing/util.h"  // IWYU pragma: keep
49 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
50 #include "tensorflow/lite/type_to_tflitetype.h"
51 
52 namespace tflite {
53 
54 // A gmock matcher that check that elements of a float vector match to a given
55 // tolerance.
56 std::vector<::testing::Matcher<float>> ArrayFloatNear(
57     const std::vector<float>& values, float max_abs_error = 1e-5);
58 
59 // A gmock matcher that check that elements of a complex vector match to a given
60 // tolerance.
61 std::vector<::testing::Matcher<std::complex<float>>> ArrayComplex64Near(
62     const std::vector<std::complex<float>>& values, float max_abs_error = 1e-5);
63 
64 template <typename T>
Quantize(const std::vector<float> & data,float scale,int32_t zero_point)65 inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
66                                int32_t zero_point) {
67   std::vector<T> q;
68   for (const auto& f : data) {
69     q.push_back(static_cast<T>(std::max<float>(
70         std::numeric_limits<T>::min(),
71         std::min<float>(std::numeric_limits<T>::max(),
72                         std::round(zero_point + (f / scale))))));
73   }
74   return q;
75 }
76 
77 template <typename T>
Dequantize(const std::vector<T> & data,float scale,int32_t zero_point)78 inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
79                                      int32_t zero_point) {
80   std::vector<float> f;
81   f.reserve(data.size());
82   for (const T& q : data) {
83     f.push_back(scale * (q - zero_point));
84   }
85   return f;
86 }
87 
88 // A test model that contains a single operator. All operator inputs and
89 // output are external to the model, so the tests can directly access them.
90 // Typical usage:
91 //    SingleOpModel m;
92 //    int a = m.AddInput({TensorType_FLOAT32, a_shape});
93 //    int b = m.AddInput({TensorType_FLOAT32, b_shape});
94 //    int c = m.AddOutput({TensorType_FLOAT32, {}});
95 //    m.SetBuiltinOp(...);
96 //    m.BuildInterpreter({GetShape(a), GetShape(b)});
97 //    m.PopulateTensor(a, {...});
98 //    m.PopulateTensor(b, {...});
99 //    m.Invoke();
100 //    EXPECT_THAT(m.ExtractVector<float>(c), ArrayFloatNear({...}));
101 //
102 
103 // A helper struct to construct test tensors. This is particularly useful for
104 // quantized tensor which must have their scale and zero_point defined before
105 // the actual data is known. This mimics what happens in practice: quantization
106 // parameters are calculated during training or post training..
107 struct TensorData {
108   // NOLINTNEXTLINE
109   TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape = {},
110              float min = 0.0f, float max = 0.0f, float scale = 0.0f,
111              int32_t zero_point = 0, bool per_channel_quantization = false,
112              std::vector<float> per_channel_quantization_scales = {},
113              std::vector<int64_t> per_channel_quantization_offsets = {},
114              int32_t channel_index = 0, std::vector<int> traversal_order = {},
115              std::vector<TfLiteDimensionType> format = {},
116              std::vector<int> block_size = {}, std::vector<int> block_map = {},
117              std::vector<int> shape_signature = {})
typeTensorData118       : type(type),
119         shape(shape),
120         min(min),
121         max(max),
122         scale(scale),
123         zero_point(zero_point),
124         per_channel_quantization(per_channel_quantization),
125         per_channel_quantization_scales(
126             std::move(per_channel_quantization_scales)),
127         per_channel_quantization_offsets(
128             std::move(per_channel_quantization_offsets)),
129         channel_index(channel_index),
130         traversal_order(traversal_order),
131         format(format),
132         block_size(block_size),
133         block_map(block_map),
134         shape_signature(shape_signature) {}
135   TensorType type;
136   std::vector<int> shape;
137   float min;
138   float max;
139   float scale;
140   int32_t zero_point;
141   bool per_channel_quantization;
142   std::vector<float> per_channel_quantization_scales;
143   std::vector<int64_t> per_channel_quantization_offsets;
144   int32_t channel_index;
145   std::vector<int> traversal_order;
146   std::vector<TfLiteDimensionType> format;
147   std::vector<int> block_size;
148   std::vector<int> block_map;
149   std::vector<int> shape_signature;
150 };
151 
152 class SingleOpResolver : public OpResolver {
153  public:
154   SingleOpResolver(const BuiltinOperator op, TfLiteRegistration* registration,
155                    int version = 1)
op_(op)156       : op_(op), registration_(*registration) {
157     registration_.builtin_code = static_cast<int32_t>(op);
158     registration_.version = version;
159   }
FindOp(BuiltinOperator op,int version)160   const TfLiteRegistration* FindOp(BuiltinOperator op,
161                                    int version) const override {
162     if (op == op_) {
163       return &registration_;
164     }
165     return nullptr;
166   }
FindOp(const char * op,int version)167   const TfLiteRegistration* FindOp(const char* op, int version) const override {
168     return nullptr;
169   }
170 
171  private:
172   const BuiltinOperator op_;
173   TfLiteRegistration registration_;
174 };
175 
176 class SingleOpModel {
177  public:
SingleOpModel()178   SingleOpModel() {}
179   ~SingleOpModel();
180 
181   // Set a delegate that is applied right after graph is prepared. This is
182   // useful for testing other runtimes like NN API or GPU.
183   // Note: the caller still owns the memory of the passed-in `delegate`.
SetDelegate(TfLiteDelegate * delegate)184   void SetDelegate(TfLiteDelegate* delegate) {
185     delegate_ = delegate;
186     // As this is a manually-set TF Lite delegate, we assume the intention of
187     // the test is to test against the particular delegate, hence bypassing
188     // applying TfLite default delegates (i.e. the XNNPACK delegate).
189     if (delegate_ != nullptr) {
190       SetBypassDefaultDelegates();
191     }
192   }
193 
194   TfLiteStatus ApplyDelegate();
195 
196   // Copying or assignment is disallowed to simplify ownership semantics.
197   SingleOpModel(const SingleOpModel&) = delete;
198   SingleOpModel& operator=(const SingleOpModel&) = delete;
199 
200   // Add a TensorType input tensor and return its index.
201   int AddInput(const TensorData& t);
202   int AddVariableInput(const TensorData& t);
203 
204   int AddIntermediate(TensorType type, const std::vector<float>& scale,
205                       const std::vector<int64_t>& zero_point);
206 
207   // Templated version of AddConstInput() taking pointer and size.
208   template <typename T>
AddConstInput(const TensorData & t,const T * data,size_t size)209   int AddConstInput(const TensorData& t, const T* data, size_t size) {
210     int id = 0;
211     if (t.per_channel_quantization) {
212       id = AddTensorPerChannelQuant(t, data, size);
213     } else {
214       id = AddTensor(t, data, size);
215     }
216     inputs_.push_back(id);
217     return id;
218   }
219 
220   // Templated version of AddConstInput() taking vector and shape.
221   template <typename T>
AddConstInput(TensorType type,const std::vector<T> & data,std::initializer_list<int> shape)222   int AddConstInput(TensorType type, const std::vector<T>& data,
223                     std::initializer_list<int> shape) {
224     return AddConstInput(TensorData{type, shape}, data.data(), data.size());
225   }
226 
227   // Templated version of AddConstInput() taking TensorType, initializer_list
228   // and shape.
229   template <typename T>
AddConstInput(TensorType type,std::initializer_list<T> data,std::initializer_list<int> shape)230   int AddConstInput(TensorType type, std::initializer_list<T> data,
231                     std::initializer_list<int> shape) {
232     return AddConstInput<T>(TensorData{type, shape}, data.begin(), data.size());
233   }
234 
235   // Templated version of AddConstInput() taking TensorData, initializer_list
236   // and shape.
237   template <typename T>
AddConstInput(const TensorData & t,std::initializer_list<T> data)238   int AddConstInput(const TensorData& t, std::initializer_list<T> data) {
239     return AddConstInput(t, data.begin(), data.size());
240   }
241 
242   // Templated version of AddConstInput() taking TensorData and vector.
243   template <typename T>
AddConstInput(const TensorData & t,const std::vector<T> & data)244   int AddConstInput(const TensorData& t, const std::vector<T>& data) {
245     return AddConstInput(t, data.data(), data.size());
246   }
247 
248   // TODO(b/166202747): Use a better way to do type specialization. Reduce
249   // duplicate code in the two functions below.
AddConstSparseInput(const TensorData & t,const std::vector<int8_t> & data)250   int AddConstSparseInput(const TensorData& t,
251                           const std::vector<int8_t>& data) {
252     int id = tensors_.size();
253     const int dims_count = t.traversal_order.size();
254     std::vector<int8_t> dense_data(data);
255 
256     tflite::internal::sparsity::FormatConverter<int8_t> converter(
257         t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
258     converter.DenseToSparse(dense_data.data());
259 
260     const auto& dim_metadata = converter.GetDimMetadata();
261     const auto& sparse_data = converter.GetData();
262 
263     // Build sparsity parameter.
264     std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
265         dims_count);
266     for (int i = 0; i < dims_count; i++) {
267       const int metadata_idx = 2 * i;
268       if (i < t.shape.size() &&
269           t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
270         auto array_segments =
271             CreateInt32Vector(builder_, builder_.CreateVector<int>(
272                                             dim_metadata[metadata_idx]))
273                 .Union();
274         auto array_indices =
275             CreateInt32Vector(builder_, builder_.CreateVector<int>(
276                                             dim_metadata[metadata_idx + 1]))
277                 .Union();
278         fb_dim_metadata[i] = CreateDimensionMetadata(
279             builder_, DimensionType_SPARSE_CSR, 0,
280             SparseIndexVector_Int32Vector, array_segments,
281             SparseIndexVector_Int32Vector, array_indices);
282       } else {
283         fb_dim_metadata[i] = CreateDimensionMetadata(
284             builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
285       }
286     }
287 
288     flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
289         builder_, builder_.CreateVector<int>(t.traversal_order),
290         builder_.CreateVector<int>(t.block_map),
291         builder_.CreateVector(fb_dim_metadata));
292 
293     int buffer_id = 0;
294     if (!data.empty()) {
295       // Initialize buffers list with empty buffer to allow for non-const
296       // tensors.
297       if (buffers_.empty()) {
298         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
299       }
300 
301       // Add compressed data as a Buffer to buffers list.
302       buffer_id = buffers_.size();
303       auto data_buffer = builder_.CreateVector(
304           reinterpret_cast<const uint8_t*>(sparse_data.data()),
305           sparse_data.size());
306       buffers_.push_back(CreateBuffer(builder_, data_buffer));
307     }
308 
309     tensors_.push_back(CreateTensor(
310         builder_, builder_.CreateVector<int>(t.shape), t.type,
311         /*buffer=*/buffer_id,
312         /*name=*/0, /*quantization=*/0, /*is_variable=*/false, s_param));
313 
314     inputs_.push_back(id);
315     tensor_data_[id] = t;
316 
317     return id;
318   }
319 
320   // Add a constant sparse tensor as input.
321   template <typename T>
322   int AddConstSparseInput(const TensorData& t, const std::vector<T>& data,
323                           bool symmetric_quantize = false) {
324     int id = tensors_.size();
325     const int dims_count = t.traversal_order.size();
326     std::vector<T> dense_data(data);
327 
328     tflite::internal::sparsity::FormatConverter<T> converter(
329         t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
330     converter.DenseToSparse(dense_data.data());
331 
332     const auto dim_metadata = converter.GetDimMetadata();
333     const auto sparse_data = converter.GetData();
334 
335     // Build sparsity parameter.
336     std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
337         dims_count);
338     for (int i = 0; i < dims_count; i++) {
339       const int metadata_idx = 2 * i;
340       if (i < t.shape.size() &&
341           t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
342         auto array_segments =
343             CreateInt32Vector(builder_,
344                               builder_.CreateVector(dim_metadata[metadata_idx]))
345                 .Union();
346         auto array_indices =
347             CreateInt32Vector(
348                 builder_, builder_.CreateVector(dim_metadata[metadata_idx + 1]))
349                 .Union();
350         fb_dim_metadata[i] = CreateDimensionMetadata(
351             builder_, DimensionType_SPARSE_CSR, 0,
352             SparseIndexVector_Int32Vector, array_segments,
353             SparseIndexVector_Int32Vector, array_indices);
354       } else {
355         fb_dim_metadata[i] = CreateDimensionMetadata(
356             builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
357       }
358     }
359 
360     flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
361         builder_, builder_.CreateVector(t.traversal_order),
362         builder_.CreateVector(t.block_map),
363         builder_.CreateVector(fb_dim_metadata));
364 
365     flatbuffers::Offset<QuantizationParameters> q_params = 0;
366     int buffer_id = 0;
367     if (!data.empty()) {
368       // Initialize buffers list with empty buffer to allow for non-const
369       // tensors.
370       if (buffers_.empty()) {
371         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
372       }
373 
374       // Add compressed data as a Buffer to buffers list.
375       buffer_id = buffers_.size();
376       // When the quantization parameter is set for the added tensor, we
377       // quantize the given data.
378       bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
379       if (symmetric_quantize) {
380         const int length = sparse_data.size();
381         std::vector<int8_t> q(length);
382         float min, max, scaling_factor;
383         tensor_utils::SymmetricQuantizeFloats(
384             sparse_data.data(), length, q.data(), &min, &max, &scaling_factor);
385         std::vector<float> scales{scaling_factor};
386         std::vector<int64_t> zero_points{0};
387         q_params = CreateQuantizationParameters(
388             builder_, 0, 0, builder_.CreateVector<float>(scales),
389             builder_.CreateVector<int64_t>(zero_points));
390         auto data_buffer = builder_.CreateVector(
391             reinterpret_cast<const uint8_t*>(q.data()), q.size());
392         buffers_.push_back(CreateBuffer(builder_, data_buffer));
393       } else if (is_quantized) {
394         CHECK_EQ(t.type, TensorType_INT8)
395             << "The INT8 quantization is only supported for sparsified tensor";
396         auto q = Quantize<int8_t>(sparse_data, t.scale, t.zero_point);
397         std::vector<float> scales{t.scale};
398         std::vector<int64_t> zero_points{0};
399         q_params = CreateQuantizationParameters(
400             builder_, t.min, t.max, builder_.CreateVector<float>(scales),
401             builder_.CreateVector<int64_t>(zero_points));
402         auto data_buffer = builder_.CreateVector(
403             reinterpret_cast<const uint8_t*>(q.data()), q.size());
404         buffers_.push_back(CreateBuffer(builder_, data_buffer));
405       } else {
406         auto data_buffer = builder_.CreateVector(
407             reinterpret_cast<const uint8_t*>(sparse_data.data()),
408             sizeof(T) * sparse_data.size());
409         buffers_.push_back(CreateBuffer(builder_, data_buffer));
410       }
411     }
412 
413     tensors_.push_back(
414         CreateTensor(builder_, builder_.CreateVector<int>(t.shape),
415                      symmetric_quantize ? TensorType_INT8 : t.type,
416                      /*buffer=*/buffer_id,
417                      /*name=*/0, q_params, /*is_variable=*/false, s_param));
418 
419     inputs_.push_back(id);
420     tensor_data_[id] = t;
421 
422     return id;
423   }
424 
425   // Add a null input tensor (optional input) and return kTfLiteOptionalTensor.
426   int AddNullInput();
427 
428   // Add a TensorType output tensor and return its index.
429   int AddOutput(const TensorData& t);
430 
431   template <typename T>
QuantizeAndPopulate(int index,const std::vector<float> & data)432   void QuantizeAndPopulate(int index, const std::vector<float>& data) {
433     TfLiteTensor* t = interpreter_->tensor(index);
434     auto q = Quantize<T>(data, t->params.scale, t->params.zero_point);
435     PopulateTensor(index, 0, q.data(), q.data() + q.size());
436   }
437 
SymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)438   void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
439     std::vector<int8_t> q = QuantizeTensor(index, data);
440     PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
441                    reinterpret_cast<uint8_t*>(q.data() + q.size()));
442   }
443 
SignedSymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)444   void SignedSymmetricQuantizeAndPopulate(int index,
445                                           const std::vector<float>& data) {
446     std::vector<int8_t> q = QuantizeTensor(index, data);
447     PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
448   }
449 
450   // Quantize and populate data for filter with per channel quantization.
PerChannelSymmetricQuantizeAndPopulate(int index,const std::vector<float> & input_data)451   void PerChannelSymmetricQuantizeAndPopulate(
452       int index, const std::vector<float>& input_data) {
453     TfLiteTensor* t = interpreter_->tensor(index);
454     auto* params =
455         reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
456     const int channel_index = params->quantized_dimension;
457 
458     std::vector<int32_t> shape(t->dims->size);
459     for (size_t i = 0; i < shape.size(); ++i) {
460       shape[i] = t->dims->data[i];
461     }
462     const int32_t num_inputs = input_data.size();
463     const int32_t num_channel = shape[channel_index];
464     std::vector<int8_t> quantized_output(num_inputs);
465     std::vector<float> scales_inv(num_channel);
466     for (int i = 0; i < num_channel; ++i) {
467       const float scale = params->scale->size == 1 ? params->scale->data[0]
468                                                    : params->scale->data[i];
469       scales_inv[i] = 1.0f / scale;
470     }
471     optimize::utils::SymmetricPerChannelQuantizeValues(
472         input_data.data(), scales_inv, shape, channel_index, &quantized_output);
473 
474     PopulateTensor(index, /*offset=*/0, quantized_output.data(),
475                    quantized_output.data() + quantized_output.size());
476   }
477 
478   template <typename T>
PerChannelQuantizeBiasPopulateTensor(const std::vector<float> & input_data,int index,TfLiteAffineQuantization * params)479   void PerChannelQuantizeBiasPopulateTensor(
480       const std::vector<float>& input_data, int index,
481       TfLiteAffineQuantization* params) {
482     const int32_t num_inputs = input_data.size();
483     std::vector<T> quantized_output(num_inputs);
484     for (int i = 0; i < num_inputs; ++i) {
485       const float scale = params->scale->size == 1 ? params->scale->data[0]
486                                                    : params->scale->data[i];
487       quantized_output[i] = input_data[i] / scale;
488     }
489   }
490 
491   template <typename T>
PerChannelQuantizeBiasPopulateTensor(int index,const std::vector<float> & input_data,const TfLiteAffineQuantization * params)492   void PerChannelQuantizeBiasPopulateTensor(
493       int index, const std::vector<float>& input_data,
494       const TfLiteAffineQuantization* params) {
495     const int32_t num_inputs = input_data.size();
496     std::vector<T> quantized_output(num_inputs);
497     for (int i = 0; i < num_inputs; ++i) {
498       const float scale = params->scale->size == 1 ? params->scale->data[0]
499                                                    : params->scale->data[i];
500       quantized_output[i] = input_data[i] / scale;
501     }
502     PopulateTensor(index, /*offset=*/0, quantized_output.data(),
503                    quantized_output.data() + quantized_output.size());
504   }
505 
506   // Quantize and populate data for bias with per channel quantization.
PerChannelQuantizeBias(int index,const std::vector<float> & input_data)507   void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
508     TfLiteTensor* t = interpreter_->tensor(index);
509     auto* params =
510         reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
511     CHECK(t->type == kTfLiteInt32 || t->type == kTfLiteInt64);
512     if (t->type == kTfLiteInt32) {
513       PerChannelQuantizeBiasPopulateTensor<int32_t>(index, input_data, params);
514     } else {
515       PerChannelQuantizeBiasPopulateTensor<int64_t>(index, input_data, params);
516     }
517   }
518 
GetShape(int id)519   const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
520 
GetScale(int id)521   float GetScale(int id) { return tensor_data_.at(id).scale; }
GetZeroPoint(int id)522   int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; }
523 
524   // Define the operator in this model.
525   void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
526                     flatbuffers::Offset<void> builtin_options);
527   void SetCustomOp(const string& name,
528                    const std::vector<uint8_t>& custom_option,
529                    const std::function<TfLiteRegistration*()>& registration);
530 
531   // Allocate tensors and apply delegate.
532   // Note that this is called by default in BuiltInterpreter().
533   void AllocateAndDelegate(bool apply_delegate);
534 
535   // Build the interpreter for this model. Also, resize and allocate all
536   // tensors given the shapes of the inputs.
537   // Note, if `allocate_and_delegate` is `false`, then the value of
538   // `apply_delegate` is ignored.
539   void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
540                         int num_threads, bool allow_fp32_relax_to_fp16,
541                         bool apply_delegate, bool allocate_and_delegate = true);
542 
543   void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
544 
545   // Executes inference and return status code.
546   TfLiteStatus Invoke();
547 
PopulateStringTensor(int index,const std::vector<string> & content)548   void PopulateStringTensor(int index, const std::vector<string>& content) {
549     auto tensor = interpreter_->tensor(index);
550     DynamicBuffer buf;
551     for (const string& s : content) {
552       buf.AddString(s.data(), s.length());
553     }
554     buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
555   }
556 
557   // Populate the tensor given its index.
558   // TODO(b/110696148) clean up and merge with vector-taking variant below.
559   template <typename T>
PopulateTensor(int index,const std::initializer_list<T> & data)560   void PopulateTensor(int index, const std::initializer_list<T>& data) {
561     T* v = interpreter_->typed_tensor<T>(index);
562     if (!v) {
563       auto* t = interpreter_->tensor(index);
564       CHECK(t) << "No tensor with index " << index << ".";
565       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
566       CHECK_EQ(t->type, typeToTfLiteType<T>())
567           << "Type mismatch for tensor with index " << index << ". Requested "
568           << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
569           << TfLiteTypeGetName(t->type) << ".";
570       LOG(FATAL) << "Unknown tensor error.";
571     }
572     for (const T& f : data) {
573       *v = f;
574       ++v;
575     }
576   }
577 
578   // Populate the tensor given its index.
579   // TODO(b/110696148) clean up and merge with initializer_list-taking variant
580   // above.
581   template <typename T>
PopulateTensor(int index,const std::vector<T> & data)582   void PopulateTensor(int index, const std::vector<T>& data) {
583     T* v = interpreter_->typed_tensor<T>(index);
584     if (!v) {
585       auto* t = interpreter_->tensor(index);
586       CHECK(t) << "No tensor with index " << index << ".";
587       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
588       CHECK_EQ(t->type, typeToTfLiteType<T>())
589           << "Type mismatch for tensor with index " << index << ". Requested "
590           << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
591           << TfLiteTypeGetName(t->type) << ".";
592       LOG(FATAL) << "Unknown tensor error.";
593     }
594     for (const T& f : data) {
595       *v = f;
596       ++v;
597     }
598   }
599 
600   // Partially populate the tensor, starting at the given offset.
601   template <typename T>
PopulateTensor(int index,int offset,T * begin,T * end)602   void PopulateTensor(int index, int offset, T* begin, T* end) {
603     T* v = interpreter_->typed_tensor<T>(index);
604     if (!v) {
605       auto* t = interpreter_->tensor(index);
606       CHECK(t) << "No tensor with index " << index << ".";
607       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
608       CHECK(v) << "Type mismatch for tensor with index " << index
609                << ". Requested " << typeToTfLiteType<T>() << ", got "
610                << t->type;
611     }
612     memcpy(v + offset, begin, (end - begin) * sizeof(T));
613   }
614 
615   // Return a vector with the flattened contents of a tensor.
616   template <typename T>
ExtractVector(int index)617   std::vector<T> ExtractVector(int index) const {
618     const T* v = interpreter_->typed_tensor<T>(index);
619     const auto* tensor = interpreter_->tensor(index);
620     CHECK(v) << "Could not extract vector at index: " << index;
621     int tensor_size;
622     if (tensor->sparsity) {
623       // Getting the size of the sparse buffer this way is based on the
624       // assumption that the last dimension of the tensor is a compressed
625       // dimension.
626       tensor_size = tensor->sparsity
627                         ->dim_metadata[tensor->sparsity->dim_metadata_size - 1]
628                         .array_indices->size;
629     } else {
630       tensor_size = GetTensorSize(index);
631     }
632 
633     return std::vector<T>(v, v + tensor_size);
634   }
635 
636   // Return the TFLite model buffer, only available after BuildInterpreter.
GetModelBuffer()637   const uint8_t* GetModelBuffer() { return builder_.GetBufferPointer(); }
638 
GetTensorShape(int index)639   std::vector<int> GetTensorShape(int index) {
640     std::vector<int> result;
641     TfLiteTensor* t = interpreter_->tensor(index);
642     result.reserve(t->dims->size);
643     for (int i = 0; i < t->dims->size; ++i) {
644       result.push_back(t->dims->data[i]);
645     }
646     return result;
647   }
648 
649   // Sets the number of threads available to the interpreter.
650   // Reconstruct the interpreter if reset_interpreter is true.
651   void SetNumThreads(int num_threads, bool reset_interpreter = false) {
652     CHECK(interpreter_ != nullptr);
653     if (reset_interpreter) {
654       // Reconstruct interpreter as number of threads may affect internal state,
655       // e.g. stratch buffer allocation.
656       BuildInterpreter(input_shapes_, num_threads, allocate_and_delegate_,
657                        apply_delegate_, allocate_and_delegate_);
658     }
659     interpreter_->SetNumThreads(num_threads);
660   }
661 
SetResolver(std::unique_ptr<OpResolver> resolver)662   void SetResolver(std::unique_ptr<OpResolver> resolver) {
663     resolver_ = std::move(resolver);
664   }
665 
666   // Indicate whether the test has the NNAPI delegate applied.
667   static bool GetForceUseNnapi();
668   int CountOpsExecutedByCpuKernel();
669 
670  protected:
671   int32_t GetTensorSize(int index) const;
672 
673   // Tell TF Lite runtime to skip applying default delegates (i.e. XNNPACK
674   // delegate) when handling this op-level model.
SetBypassDefaultDelegates()675   void SetBypassDefaultDelegates() { bypass_default_delegates_ = true; }
676 
677   flatbuffers::FlatBufferBuilder builder_;
678   std::unique_ptr<tflite::Interpreter> interpreter_;
679   std::unique_ptr<OpResolver> resolver_;
680 
681   std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
682   std::vector<flatbuffers::Offset<Operator>> operators_;
683   std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
684 
685   template <typename T>
686   int AddTensor(TensorData t, const T* data, size_t size,
687                 bool is_variable = false) {
688     int id = tensors_.size();
689 
690     // This is slightly different depending on whether we are adding a
691     // quantized or a regular tensor.
692     bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
693 
694     flatbuffers::Offset<QuantizationParameters> q_params = 0;
695 
696     if (is_quantized) {
697       if (t.min != 0 || t.max != 0) {
698         if (t.type == TensorType_UINT8) {
699           std::tie(t.scale, t.zero_point) =
700               QuantizationParams<uint8_t>(t.min, t.max);
701         } else if (t.type == TensorType_INT8) {
702           std::tie(t.scale, t.zero_point) =
703               QuantizationParams<int8_t>(t.min, t.max);
704         } else if (t.type == TensorType_INT32) {
705           std::tie(t.scale, t.zero_point) =
706               QuantizationParams<int32_t>(t.min, t.max);
707         } else if (t.type == TensorType_INT16) {
708           std::tie(t.scale, t.zero_point) =
709               QuantizationParams<int16_t>(t.min, t.max);
710         } else {
711           LOG(FATAL) << "No support for the requested quantized type";
712         }
713         t.min = 0;
714         t.max = 0;
715       }
716 
717       std::vector<float> scales{t.scale};
718       std::vector<int64_t> zero_points{t.zero_point};
719       q_params = CreateQuantizationParameters(
720           builder_, /*min=*/0, /*max=*/0, builder_.CreateVector<float>(scales),
721           builder_.CreateVector<int64_t>(zero_points));
722     }
723 
724     int buffer_id = 0;
725     if (size) {
726       // Initialize buffers list with empty buffer to allow for non-const
727       // tensors.
728       if (buffers_.empty()) {
729         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
730       }
731 
732       builder_.ForceVectorAlignment(size, sizeof(T), 16);
733       // Add data as a Buffer to buffers list.
734       buffer_id = buffers_.size();
735       auto data_buffer = builder_.CreateVector(
736           reinterpret_cast<const uint8_t*>(data), sizeof(T) * size);
737       buffers_.push_back(CreateBuffer(builder_, data_buffer));
738     }
739 
740     tensors_.push_back(CreateTensor(
741         builder_, builder_.CreateVector<int>(t.shape), t.type,
742         /*buffer=*/buffer_id,
743         /*name=*/0, q_params, is_variable,
744         /*sparsity=*/0, builder_.CreateVector<int>(t.shape_signature)));
745 
746     tensor_data_[id] = t;
747 
748     return id;
749   }
750 
751  private:
752   template <typename T>
QuantizationParams(float f_min,float f_max)753   std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
754     int32_t zero_point = 0;
755     float scale = 0;
756     const T qmin = std::numeric_limits<T>::min();
757     const T qmax = std::numeric_limits<T>::max();
758     const float qmin_double = qmin;
759     const float qmax_double = qmax;
760     // 0 should always be a representable value. Let's assume that the initial
761     // min,max range contains 0.
762     CHECK_LE(f_min, 0);
763     CHECK_GE(f_max, 0);
764     if (f_min == f_max) {
765       // Special case where the min,max range is a point. Should be {0}.
766       CHECK_EQ(f_min, 0);
767       CHECK_EQ(f_max, 0);
768       return {scale, zero_point};
769     }
770 
771     // General case.
772     //
773     // First determine the scale.
774     scale = (f_max - f_min) / (qmax_double - qmin_double);
775 
776     // Zero-point computation.
777     // First the initial floating-point computation. The zero-point can be
778     // determined from solving an affine equation for any known pair
779     // (real value, corresponding quantized value).
780     // We know two such pairs: (rmin, qmin) and (rmax, qmax).
781     // The arithmetic error on the zero point computed from either pair
782     // will be roughly machine_epsilon * (sum of absolute values of terms)
783     // so we want to use the variant that adds the smaller terms.
784     const float zero_point_from_min = qmin_double - f_min / scale;
785     const float zero_point_from_max = qmax_double - f_max / scale;
786 
787     const float zero_point_from_min_error =
788         std::abs(qmin_double) + std::abs(f_min / scale);
789 
790     const float zero_point_from_max_error =
791         std::abs(qmax_double) + std::abs(f_max / scale);
792 
793     const float zero_point_double =
794         zero_point_from_min_error < zero_point_from_max_error
795             ? zero_point_from_min
796             : zero_point_from_max;
797 
798     // Now we need to nudge the zero point to be an integer
799     // (our zero points are integer, and this is motivated by the requirement
800     // to be able to represent the real value "0" exactly as a quantized value,
801     // which is required in multiple places, for example in Im2col with SAME
802     //  padding).
803 
804     T nudged_zero_point = 0;
805     if (zero_point_double < qmin_double) {
806       nudged_zero_point = qmin;
807     } else if (zero_point_double > qmax_double) {
808       nudged_zero_point = qmax;
809     } else {
810       nudged_zero_point = static_cast<T>(std::round(zero_point_double));
811     }
812 
813     // The zero point should always be in the range of quantized value,
814     // // [qmin, qmax].
815     CHECK_GE(nudged_zero_point, qmin);
816     CHECK_LE(nudged_zero_point, qmax);
817 
818     zero_point = nudged_zero_point;
819     // finally, return the values
820     return {scale, zero_point};
821   }
822 
AddTensorPerChannelQuant(const TensorData & t)823   int AddTensorPerChannelQuant(const TensorData& t) {
824     // type does not matter when adding empty data.
825     return AddTensorPerChannelQuant<uint8_t>(t, nullptr, 0);
826   }
827 
828   template <typename T>
AddTensorPerChannelQuant(const TensorData & t,const T * data,size_t size)829   int AddTensorPerChannelQuant(const TensorData& t, const T* data,
830                                size_t size) {
831     const int id = tensors_.size();
832     flatbuffers::Offset<QuantizationParameters> q_params = 0;
833     q_params = CreateQuantizationParameters(
834         builder_, /*min=*/0, /*max=*/0,
835         /*scale=*/
836         builder_.CreateVector<float>(t.per_channel_quantization_scales),
837         /*zero point=*/
838         builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
839         QuantizationDetails_NONE, 0, t.channel_index);
840 
841     int buffer_id = 0;
842     if (size) {
843       // Initialize buffers list with empty buffer to allow for non-const
844       // tensors.
845       if (buffers_.empty()) {
846         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
847       }
848 
849       // Add data as a Buffer to buffers list.
850       buffer_id = buffers_.size();
851       auto data_buffer = builder_.CreateVector(
852           reinterpret_cast<const uint8_t*>(data), sizeof(T) * size);
853       buffers_.push_back(CreateBuffer(builder_, data_buffer));
854     }
855 
856     tensors_.push_back(
857         CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
858                      /*buffer=*/buffer_id,
859                      /*name=*/0, q_params, /*is_variable=*/false));
860     tensor_data_[id] = t;
861     return id;
862   }
863 
QuantizeTensor(int index,const std::vector<float> & data)864   std::vector<int8_t> QuantizeTensor(int index,
865                                      const std::vector<float>& data) {
866     TfLiteTensor* t = interpreter_->tensor(index);
867     const int length = data.size();
868     std::vector<int8_t> q(length);
869     float min, max, scaling_factor;
870     tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
871                                           &max, &scaling_factor);
872     // Update quantization params.
873     t->params.scale = scaling_factor;
874     t->params.zero_point = 0;
875     // Populate the new quantization params.
876     TfLiteQuantizationFree(&t->quantization);
877     t->quantization.type = kTfLiteAffineQuantization;
878     auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
879         malloc(sizeof(TfLiteAffineQuantization)));
880     affine_quantization->quantized_dimension = 0;
881     affine_quantization->scale = TfLiteFloatArrayCreate(1);
882     affine_quantization->zero_point = TfLiteIntArrayCreate(1);
883     affine_quantization->scale->data[0] = scaling_factor;
884     affine_quantization->zero_point->data[0] = 0;
885     t->quantization.params = affine_quantization;
886     return q;
887   }
888 
889   // Checks if acceleration has been done as expected.
890   // Currently supports only NNAPI.
891   // It verifies if the test was configured to run with NNAPI acceleration
892   // or not (SetForceUseNnapi(true)).
893   // In affirmative case it checks if:
894   // - the test case has been listed in the list of nnapi-accelerated cases
895   // - the test is running on a device (NNAPI has been loaded)
896   //
897   // The list of nnapi-accelerated test cases is a file containing regex to
898   // include or exclude specific test cases plus the minimum android SDK version
899   // the acceleration should be enabled for. For example:
900   // To enable the test BorderFloat in TopKV2OpTest only from
901   // android_sdk_version 29:
902   //
903   // TopKV2OpTest/BorderFloat,29
904   //
905   // And to have it always excluded while enabling all other Float tests
906   // (the order of the rules is important, the first one matching is used):
907   //
908   // -TopKV2OpTest/BorderFloat
909   // TopKV2OpTest/.+Float
910 
911   void ValidateAcceleration();
912 
913   // If the test was configured to use NNAPI and NNAPI was actually loaded,
914   // checks if the single operation in the model has been accelerated.
915   void ExpectOpAcceleratedWithNnapi(const std::string& test_id);
916 
917   std::map<int, TensorData> tensor_data_;
918   std::vector<int32_t> inputs_;
919   std::vector<int32_t> intermediates_;
920   std::vector<int32_t> outputs_;
921   std::vector<flatbuffers::Offset<Tensor>> tensors_;
922   std::vector<flatbuffers::Offset<Buffer>> buffers_;
923   TfLiteDelegate* delegate_ = nullptr;  // not own the memory.
924   std::vector<std::vector<int>> input_shapes_;
925   int num_applied_delegates_ = 0;
926   bool allow_fp32_relax_to_fp16_ = false;
927   bool apply_delegate_ = true;
928   bool allocate_and_delegate_ = true;
929 
930   // Whether to bypass the application of TF Lite default delegates (i.e.
931   // XNNPACK delegate) at rutnime.
932   bool bypass_default_delegates_ = false;
933 };
934 
935 // Populate string tensors.
936 template <>
937 inline void SingleOpModel::PopulateTensor<string>(
938     int index, const std::initializer_list<string>& data) {
939   PopulateStringTensor(index, data);
940 }
941 
942 // Base class for single op unit tests.
943 // The tests are parameterized to test multiple kernels for a single op.
944 // The parameters are strings like "optimized" and "reference" to have better
945 // readability in test reports.
946 //
947 // To use this class:
948 // * Define a constant map from strings to TfLiteRegistration.
949 // * Implement a test class that inherits SingleOpTest.
950 // * Instantiate the test cases with SingleOpTest::GetKernelTags helper
951 //   function.
952 // * Call GetRegistration to get the TfLiteRegistration to be used before
953 //   building the interpreter.
954 class SingleOpTest : public ::testing::TestWithParam<string> {
955  public:
GetKernelTags(const std::map<string,TfLiteRegistration * > & kernel_map)956   static std::vector<string> GetKernelTags(
957       const std::map<string, TfLiteRegistration*>& kernel_map) {
958     std::vector<string> tags;
959     tags.reserve(kernel_map.size());
960     for (const auto& it : kernel_map) {
961       tags.push_back(it.first);
962     }
963     return tags;
964   }
965 
966  protected:
967   virtual const std::map<string, TfLiteRegistration*>& GetKernelMap() = 0;
GetRegistration()968   TfLiteRegistration* GetRegistration() {
969     return GetKernelMap().at(GetParam());
970   }
971 };
972 
973 // Returns the corresponding TensorType given the type T.
974 template <typename T>
GetTensorType()975 TensorType GetTensorType() {
976   if (std::is_same<T, float>::value) return TensorType_FLOAT32;
977   if (std::is_same<T, TfLiteFloat16>::value) return TensorType_FLOAT16;
978   if (std::is_same<T, double>::value) return TensorType_FLOAT64;
979   if (std::is_same<T, int8_t>::value) return TensorType_INT8;
980   if (std::is_same<T, int16_t>::value) return TensorType_INT16;
981   if (std::is_same<T, uint16_t>::value) return TensorType_UINT16;
982   if (std::is_same<T, int32_t>::value) return TensorType_INT32;
983   if (std::is_same<T, uint32_t>::value) return TensorType_UINT32;
984   if (std::is_same<T, int64_t>::value) return TensorType_INT64;
985   if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
986   if (std::is_same<T, string>::value) return TensorType_STRING;
987   if (std::is_same<T, bool>::value) return TensorType_BOOL;
988   return TensorType_MIN;  // default value
989 }
990 
991 // Strings have a special implementation that is in test_util.cc
992 template <>
993 std::vector<string> SingleOpModel::ExtractVector(int index) const;
994 
995 // The TypeUnion struct specializations hold a collection of related types.
996 // Each struct holds: 1. a primitive type (e.g. float), 2. a TensorType (e.g.
997 // TensorType_FLOAT32, and 3. a TfLiteType (e.g. kTfLiteFloat32). The latter
998 // two are actually enum values and not raw types, but these specializations
999 // make it easy to use gUnit Typed Test Suite:
1000 // https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#typed-tests
1001 template <typename T>
1002 struct TypeUnion;
1003 
1004 template <>
1005 struct TypeUnion<float> {
1006  public:
1007   // NOLINTNEXTLINE
1008   static constexpr TensorType tensor_type = TensorType::TensorType_FLOAT32;
1009   // NOLINTNEXTLINE
1010   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteFloat32;
1011   typedef float ScalarType;
1012 };
1013 
1014 template <>
1015 struct TypeUnion<int32_t> {
1016  public:
1017   // NOLINTNEXTLINE
1018   static constexpr TensorType tensor_type = TensorType::TensorType_INT32;
1019   // NOLINTNEXTLINE
1020   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt32;
1021   typedef int32_t ScalarType;
1022 };
1023 
1024 template <>
1025 struct TypeUnion<uint32_t> {
1026  public:
1027   // NOLINTNEXTLINE
1028   static constexpr TensorType tensor_type = TensorType::TensorType_UINT32;
1029   // NOLINTNEXTLINE
1030   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt32;
1031   typedef uint32_t ScalarType;
1032 };
1033 
1034 template <>
1035 struct TypeUnion<int16_t> {
1036  public:
1037   // NOLINTNEXTLINE
1038   static constexpr TensorType tensor_type = TensorType::TensorType_INT16;
1039   // NOLINTNEXTLINE
1040   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt16;
1041   typedef int16_t ScalarType;
1042 };
1043 
1044 template <>
1045 struct TypeUnion<uint16_t> {
1046  public:
1047   // NOLINTNEXTLINE
1048   static constexpr TensorType tensor_type = TensorType::TensorType_UINT16;
1049   // NOLINTNEXTLINE
1050   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt16;
1051   typedef uint16_t ScalarType;
1052 };
1053 
1054 template <>
1055 struct TypeUnion<int8_t> {
1056  public:
1057   // NOLINTNEXTLINE
1058   static constexpr TensorType tensor_type = TensorType::TensorType_INT8;
1059   // NOLINTNEXTLINE
1060   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt8;
1061   typedef int8_t ScalarType;
1062 };
1063 
1064 template <>
1065 struct TypeUnion<uint8_t> {
1066  public:
1067   // NOLINTNEXTLINE
1068   static constexpr TensorType tensor_type = TensorType::TensorType_UINT8;
1069   // NOLINTNEXTLINE
1070   static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt8;
1071   typedef uint8_t ScalarType;
1072 };
1073 
1074 class MultiOpModel : public SingleOpModel {
1075  public:
1076   MultiOpModel() : SingleOpModel() {}
1077   ~MultiOpModel() {}
1078 
1079   void AddBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
1080                     const flatbuffers::Offset<void>& builtin_options,
1081                     const std::vector<int32_t>& inputs,
1082                     const std::vector<int32_t>& outputs);
1083 
1084   void AddCustomOp(const string& name,
1085                    const std::vector<uint8_t>& custom_option,
1086                    const std::function<TfLiteRegistration*()>& registration,
1087                    const std::vector<int32_t>& inputs,
1088                    const std::vector<int32_t>& outputs);
1089 
1090   template <typename T>
1091   int AddInnerTensor(TensorData t) {
1092     return AddTensor<T>(t, {}, false);
1093   }
1094 };
1095 }  // namespace tflite
1096 
1097 #endif  // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
1098