1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
16 #define TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
17
18 #include <stddef.h>
19 #include <stdint.h>
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include <algorithm>
24 #include <cmath>
25 #include <complex>
26 #include <functional>
27 #include <initializer_list>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <string>
32 #include <tuple>
33 #include <type_traits>
34 #include <utility>
35 #include <vector>
36
37 #include <gmock/gmock.h>
38 #include <gtest/gtest.h>
39 #include "flatbuffers/flatbuffers.h" // from @flatbuffers
40 #include "tensorflow/core/platform/logging.h"
41 #include "tensorflow/lite/core/api/op_resolver.h"
42 #include "tensorflow/lite/interpreter.h"
43 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
44 #include "tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h"
45 #include "tensorflow/lite/schema/schema_generated.h"
46 #include "tensorflow/lite/string_type.h"
47 #include "tensorflow/lite/string_util.h"
48 #include "tensorflow/lite/testing/util.h" // IWYU pragma: keep
49 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
50 #include "tensorflow/lite/type_to_tflitetype.h"
51
52 namespace tflite {
53
54 // A gmock matcher that check that elements of a float vector match to a given
55 // tolerance.
56 std::vector<::testing::Matcher<float>> ArrayFloatNear(
57 const std::vector<float>& values, float max_abs_error = 1e-5);
58
59 // A gmock matcher that check that elements of a complex vector match to a given
60 // tolerance.
61 std::vector<::testing::Matcher<std::complex<float>>> ArrayComplex64Near(
62 const std::vector<std::complex<float>>& values, float max_abs_error = 1e-5);
63
64 template <typename T>
Quantize(const std::vector<float> & data,float scale,int32_t zero_point)65 inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
66 int32_t zero_point) {
67 std::vector<T> q;
68 for (const auto& f : data) {
69 q.push_back(static_cast<T>(std::max<float>(
70 std::numeric_limits<T>::min(),
71 std::min<float>(std::numeric_limits<T>::max(),
72 std::round(zero_point + (f / scale))))));
73 }
74 return q;
75 }
76
77 template <typename T>
Dequantize(const std::vector<T> & data,float scale,int32_t zero_point)78 inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
79 int32_t zero_point) {
80 std::vector<float> f;
81 f.reserve(data.size());
82 for (const T& q : data) {
83 f.push_back(scale * (q - zero_point));
84 }
85 return f;
86 }
87
88 // A test model that contains a single operator. All operator inputs and
89 // output are external to the model, so the tests can directly access them.
90 // Typical usage:
91 // SingleOpModel m;
92 // int a = m.AddInput({TensorType_FLOAT32, a_shape});
93 // int b = m.AddInput({TensorType_FLOAT32, b_shape});
94 // int c = m.AddOutput({TensorType_FLOAT32, {}});
95 // m.SetBuiltinOp(...);
96 // m.BuildInterpreter({GetShape(a), GetShape(b)});
97 // m.PopulateTensor(a, {...});
98 // m.PopulateTensor(b, {...});
99 // m.Invoke();
100 // EXPECT_THAT(m.ExtractVector<float>(c), ArrayFloatNear({...}));
101 //
102
103 // A helper struct to construct test tensors. This is particularly useful for
104 // quantized tensor which must have their scale and zero_point defined before
105 // the actual data is known. This mimics what happens in practice: quantization
106 // parameters are calculated during training or post training..
107 struct TensorData {
108 // NOLINTNEXTLINE
109 TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape = {},
110 float min = 0.0f, float max = 0.0f, float scale = 0.0f,
111 int32_t zero_point = 0, bool per_channel_quantization = false,
112 std::vector<float> per_channel_quantization_scales = {},
113 std::vector<int64_t> per_channel_quantization_offsets = {},
114 int32_t channel_index = 0, std::vector<int> traversal_order = {},
115 std::vector<TfLiteDimensionType> format = {},
116 std::vector<int> block_size = {}, std::vector<int> block_map = {},
117 std::vector<int> shape_signature = {})
typeTensorData118 : type(type),
119 shape(shape),
120 min(min),
121 max(max),
122 scale(scale),
123 zero_point(zero_point),
124 per_channel_quantization(per_channel_quantization),
125 per_channel_quantization_scales(
126 std::move(per_channel_quantization_scales)),
127 per_channel_quantization_offsets(
128 std::move(per_channel_quantization_offsets)),
129 channel_index(channel_index),
130 traversal_order(traversal_order),
131 format(format),
132 block_size(block_size),
133 block_map(block_map),
134 shape_signature(shape_signature) {}
135 TensorType type;
136 std::vector<int> shape;
137 float min;
138 float max;
139 float scale;
140 int32_t zero_point;
141 bool per_channel_quantization;
142 std::vector<float> per_channel_quantization_scales;
143 std::vector<int64_t> per_channel_quantization_offsets;
144 int32_t channel_index;
145 std::vector<int> traversal_order;
146 std::vector<TfLiteDimensionType> format;
147 std::vector<int> block_size;
148 std::vector<int> block_map;
149 std::vector<int> shape_signature;
150 };
151
152 class SingleOpResolver : public OpResolver {
153 public:
154 SingleOpResolver(const BuiltinOperator op, TfLiteRegistration* registration,
155 int version = 1)
op_(op)156 : op_(op), registration_(*registration) {
157 registration_.builtin_code = static_cast<int32_t>(op);
158 registration_.version = version;
159 }
FindOp(BuiltinOperator op,int version)160 const TfLiteRegistration* FindOp(BuiltinOperator op,
161 int version) const override {
162 if (op == op_) {
163 return ®istration_;
164 }
165 return nullptr;
166 }
FindOp(const char * op,int version)167 const TfLiteRegistration* FindOp(const char* op, int version) const override {
168 return nullptr;
169 }
170
171 private:
172 const BuiltinOperator op_;
173 TfLiteRegistration registration_;
174 };
175
176 class SingleOpModel {
177 public:
SingleOpModel()178 SingleOpModel() {}
179 ~SingleOpModel();
180
181 // Set a delegate that is applied right after graph is prepared. This is
182 // useful for testing other runtimes like NN API or GPU.
183 // Note: the caller still owns the memory of the passed-in `delegate`.
SetDelegate(TfLiteDelegate * delegate)184 void SetDelegate(TfLiteDelegate* delegate) {
185 delegate_ = delegate;
186 // As this is a manually-set TF Lite delegate, we assume the intention of
187 // the test is to test against the particular delegate, hence bypassing
188 // applying TfLite default delegates (i.e. the XNNPACK delegate).
189 if (delegate_ != nullptr) {
190 SetBypassDefaultDelegates();
191 }
192 }
193
194 TfLiteStatus ApplyDelegate();
195
196 // Copying or assignment is disallowed to simplify ownership semantics.
197 SingleOpModel(const SingleOpModel&) = delete;
198 SingleOpModel& operator=(const SingleOpModel&) = delete;
199
200 // Add a TensorType input tensor and return its index.
201 int AddInput(const TensorData& t);
202 int AddVariableInput(const TensorData& t);
203
204 int AddIntermediate(TensorType type, const std::vector<float>& scale,
205 const std::vector<int64_t>& zero_point);
206
207 // Templated version of AddConstInput() taking pointer and size.
208 template <typename T>
AddConstInput(const TensorData & t,const T * data,size_t size)209 int AddConstInput(const TensorData& t, const T* data, size_t size) {
210 int id = 0;
211 if (t.per_channel_quantization) {
212 id = AddTensorPerChannelQuant(t, data, size);
213 } else {
214 id = AddTensor(t, data, size);
215 }
216 inputs_.push_back(id);
217 return id;
218 }
219
220 // Templated version of AddConstInput() taking vector and shape.
221 template <typename T>
AddConstInput(TensorType type,const std::vector<T> & data,std::initializer_list<int> shape)222 int AddConstInput(TensorType type, const std::vector<T>& data,
223 std::initializer_list<int> shape) {
224 return AddConstInput(TensorData{type, shape}, data.data(), data.size());
225 }
226
227 // Templated version of AddConstInput() taking TensorType, initializer_list
228 // and shape.
229 template <typename T>
AddConstInput(TensorType type,std::initializer_list<T> data,std::initializer_list<int> shape)230 int AddConstInput(TensorType type, std::initializer_list<T> data,
231 std::initializer_list<int> shape) {
232 return AddConstInput<T>(TensorData{type, shape}, data.begin(), data.size());
233 }
234
235 // Templated version of AddConstInput() taking TensorData, initializer_list
236 // and shape.
237 template <typename T>
AddConstInput(const TensorData & t,std::initializer_list<T> data)238 int AddConstInput(const TensorData& t, std::initializer_list<T> data) {
239 return AddConstInput(t, data.begin(), data.size());
240 }
241
242 // Templated version of AddConstInput() taking TensorData and vector.
243 template <typename T>
AddConstInput(const TensorData & t,const std::vector<T> & data)244 int AddConstInput(const TensorData& t, const std::vector<T>& data) {
245 return AddConstInput(t, data.data(), data.size());
246 }
247
248 // TODO(b/166202747): Use a better way to do type specialization. Reduce
249 // duplicate code in the two functions below.
AddConstSparseInput(const TensorData & t,const std::vector<int8_t> & data)250 int AddConstSparseInput(const TensorData& t,
251 const std::vector<int8_t>& data) {
252 int id = tensors_.size();
253 const int dims_count = t.traversal_order.size();
254 std::vector<int8_t> dense_data(data);
255
256 tflite::internal::sparsity::FormatConverter<int8_t> converter(
257 t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
258 converter.DenseToSparse(dense_data.data());
259
260 const auto& dim_metadata = converter.GetDimMetadata();
261 const auto& sparse_data = converter.GetData();
262
263 // Build sparsity parameter.
264 std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
265 dims_count);
266 for (int i = 0; i < dims_count; i++) {
267 const int metadata_idx = 2 * i;
268 if (i < t.shape.size() &&
269 t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
270 auto array_segments =
271 CreateInt32Vector(builder_, builder_.CreateVector<int>(
272 dim_metadata[metadata_idx]))
273 .Union();
274 auto array_indices =
275 CreateInt32Vector(builder_, builder_.CreateVector<int>(
276 dim_metadata[metadata_idx + 1]))
277 .Union();
278 fb_dim_metadata[i] = CreateDimensionMetadata(
279 builder_, DimensionType_SPARSE_CSR, 0,
280 SparseIndexVector_Int32Vector, array_segments,
281 SparseIndexVector_Int32Vector, array_indices);
282 } else {
283 fb_dim_metadata[i] = CreateDimensionMetadata(
284 builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
285 }
286 }
287
288 flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
289 builder_, builder_.CreateVector<int>(t.traversal_order),
290 builder_.CreateVector<int>(t.block_map),
291 builder_.CreateVector(fb_dim_metadata));
292
293 int buffer_id = 0;
294 if (!data.empty()) {
295 // Initialize buffers list with empty buffer to allow for non-const
296 // tensors.
297 if (buffers_.empty()) {
298 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
299 }
300
301 // Add compressed data as a Buffer to buffers list.
302 buffer_id = buffers_.size();
303 auto data_buffer = builder_.CreateVector(
304 reinterpret_cast<const uint8_t*>(sparse_data.data()),
305 sparse_data.size());
306 buffers_.push_back(CreateBuffer(builder_, data_buffer));
307 }
308
309 tensors_.push_back(CreateTensor(
310 builder_, builder_.CreateVector<int>(t.shape), t.type,
311 /*buffer=*/buffer_id,
312 /*name=*/0, /*quantization=*/0, /*is_variable=*/false, s_param));
313
314 inputs_.push_back(id);
315 tensor_data_[id] = t;
316
317 return id;
318 }
319
320 // Add a constant sparse tensor as input.
321 template <typename T>
322 int AddConstSparseInput(const TensorData& t, const std::vector<T>& data,
323 bool symmetric_quantize = false) {
324 int id = tensors_.size();
325 const int dims_count = t.traversal_order.size();
326 std::vector<T> dense_data(data);
327
328 tflite::internal::sparsity::FormatConverter<T> converter(
329 t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
330 converter.DenseToSparse(dense_data.data());
331
332 const auto dim_metadata = converter.GetDimMetadata();
333 const auto sparse_data = converter.GetData();
334
335 // Build sparsity parameter.
336 std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
337 dims_count);
338 for (int i = 0; i < dims_count; i++) {
339 const int metadata_idx = 2 * i;
340 if (i < t.shape.size() &&
341 t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
342 auto array_segments =
343 CreateInt32Vector(builder_,
344 builder_.CreateVector(dim_metadata[metadata_idx]))
345 .Union();
346 auto array_indices =
347 CreateInt32Vector(
348 builder_, builder_.CreateVector(dim_metadata[metadata_idx + 1]))
349 .Union();
350 fb_dim_metadata[i] = CreateDimensionMetadata(
351 builder_, DimensionType_SPARSE_CSR, 0,
352 SparseIndexVector_Int32Vector, array_segments,
353 SparseIndexVector_Int32Vector, array_indices);
354 } else {
355 fb_dim_metadata[i] = CreateDimensionMetadata(
356 builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
357 }
358 }
359
360 flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
361 builder_, builder_.CreateVector(t.traversal_order),
362 builder_.CreateVector(t.block_map),
363 builder_.CreateVector(fb_dim_metadata));
364
365 flatbuffers::Offset<QuantizationParameters> q_params = 0;
366 int buffer_id = 0;
367 if (!data.empty()) {
368 // Initialize buffers list with empty buffer to allow for non-const
369 // tensors.
370 if (buffers_.empty()) {
371 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
372 }
373
374 // Add compressed data as a Buffer to buffers list.
375 buffer_id = buffers_.size();
376 // When the quantization parameter is set for the added tensor, we
377 // quantize the given data.
378 bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
379 if (symmetric_quantize) {
380 const int length = sparse_data.size();
381 std::vector<int8_t> q(length);
382 float min, max, scaling_factor;
383 tensor_utils::SymmetricQuantizeFloats(
384 sparse_data.data(), length, q.data(), &min, &max, &scaling_factor);
385 std::vector<float> scales{scaling_factor};
386 std::vector<int64_t> zero_points{0};
387 q_params = CreateQuantizationParameters(
388 builder_, 0, 0, builder_.CreateVector<float>(scales),
389 builder_.CreateVector<int64_t>(zero_points));
390 auto data_buffer = builder_.CreateVector(
391 reinterpret_cast<const uint8_t*>(q.data()), q.size());
392 buffers_.push_back(CreateBuffer(builder_, data_buffer));
393 } else if (is_quantized) {
394 CHECK_EQ(t.type, TensorType_INT8)
395 << "The INT8 quantization is only supported for sparsified tensor";
396 auto q = Quantize<int8_t>(sparse_data, t.scale, t.zero_point);
397 std::vector<float> scales{t.scale};
398 std::vector<int64_t> zero_points{0};
399 q_params = CreateQuantizationParameters(
400 builder_, t.min, t.max, builder_.CreateVector<float>(scales),
401 builder_.CreateVector<int64_t>(zero_points));
402 auto data_buffer = builder_.CreateVector(
403 reinterpret_cast<const uint8_t*>(q.data()), q.size());
404 buffers_.push_back(CreateBuffer(builder_, data_buffer));
405 } else {
406 auto data_buffer = builder_.CreateVector(
407 reinterpret_cast<const uint8_t*>(sparse_data.data()),
408 sizeof(T) * sparse_data.size());
409 buffers_.push_back(CreateBuffer(builder_, data_buffer));
410 }
411 }
412
413 tensors_.push_back(
414 CreateTensor(builder_, builder_.CreateVector<int>(t.shape),
415 symmetric_quantize ? TensorType_INT8 : t.type,
416 /*buffer=*/buffer_id,
417 /*name=*/0, q_params, /*is_variable=*/false, s_param));
418
419 inputs_.push_back(id);
420 tensor_data_[id] = t;
421
422 return id;
423 }
424
425 // Add a null input tensor (optional input) and return kTfLiteOptionalTensor.
426 int AddNullInput();
427
428 // Add a TensorType output tensor and return its index.
429 int AddOutput(const TensorData& t);
430
431 template <typename T>
QuantizeAndPopulate(int index,const std::vector<float> & data)432 void QuantizeAndPopulate(int index, const std::vector<float>& data) {
433 TfLiteTensor* t = interpreter_->tensor(index);
434 auto q = Quantize<T>(data, t->params.scale, t->params.zero_point);
435 PopulateTensor(index, 0, q.data(), q.data() + q.size());
436 }
437
SymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)438 void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
439 std::vector<int8_t> q = QuantizeTensor(index, data);
440 PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
441 reinterpret_cast<uint8_t*>(q.data() + q.size()));
442 }
443
SignedSymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)444 void SignedSymmetricQuantizeAndPopulate(int index,
445 const std::vector<float>& data) {
446 std::vector<int8_t> q = QuantizeTensor(index, data);
447 PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
448 }
449
450 // Quantize and populate data for filter with per channel quantization.
PerChannelSymmetricQuantizeAndPopulate(int index,const std::vector<float> & input_data)451 void PerChannelSymmetricQuantizeAndPopulate(
452 int index, const std::vector<float>& input_data) {
453 TfLiteTensor* t = interpreter_->tensor(index);
454 auto* params =
455 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
456 const int channel_index = params->quantized_dimension;
457
458 std::vector<int32_t> shape(t->dims->size);
459 for (size_t i = 0; i < shape.size(); ++i) {
460 shape[i] = t->dims->data[i];
461 }
462 const int32_t num_inputs = input_data.size();
463 const int32_t num_channel = shape[channel_index];
464 std::vector<int8_t> quantized_output(num_inputs);
465 std::vector<float> scales_inv(num_channel);
466 for (int i = 0; i < num_channel; ++i) {
467 const float scale = params->scale->size == 1 ? params->scale->data[0]
468 : params->scale->data[i];
469 scales_inv[i] = 1.0f / scale;
470 }
471 optimize::utils::SymmetricPerChannelQuantizeValues(
472 input_data.data(), scales_inv, shape, channel_index, &quantized_output);
473
474 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
475 quantized_output.data() + quantized_output.size());
476 }
477
478 template <typename T>
PerChannelQuantizeBiasPopulateTensor(const std::vector<float> & input_data,int index,TfLiteAffineQuantization * params)479 void PerChannelQuantizeBiasPopulateTensor(
480 const std::vector<float>& input_data, int index,
481 TfLiteAffineQuantization* params) {
482 const int32_t num_inputs = input_data.size();
483 std::vector<T> quantized_output(num_inputs);
484 for (int i = 0; i < num_inputs; ++i) {
485 const float scale = params->scale->size == 1 ? params->scale->data[0]
486 : params->scale->data[i];
487 quantized_output[i] = input_data[i] / scale;
488 }
489 }
490
491 template <typename T>
PerChannelQuantizeBiasPopulateTensor(int index,const std::vector<float> & input_data,const TfLiteAffineQuantization * params)492 void PerChannelQuantizeBiasPopulateTensor(
493 int index, const std::vector<float>& input_data,
494 const TfLiteAffineQuantization* params) {
495 const int32_t num_inputs = input_data.size();
496 std::vector<T> quantized_output(num_inputs);
497 for (int i = 0; i < num_inputs; ++i) {
498 const float scale = params->scale->size == 1 ? params->scale->data[0]
499 : params->scale->data[i];
500 quantized_output[i] = input_data[i] / scale;
501 }
502 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
503 quantized_output.data() + quantized_output.size());
504 }
505
506 // Quantize and populate data for bias with per channel quantization.
PerChannelQuantizeBias(int index,const std::vector<float> & input_data)507 void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
508 TfLiteTensor* t = interpreter_->tensor(index);
509 auto* params =
510 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
511 CHECK(t->type == kTfLiteInt32 || t->type == kTfLiteInt64);
512 if (t->type == kTfLiteInt32) {
513 PerChannelQuantizeBiasPopulateTensor<int32_t>(index, input_data, params);
514 } else {
515 PerChannelQuantizeBiasPopulateTensor<int64_t>(index, input_data, params);
516 }
517 }
518
GetShape(int id)519 const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
520
GetScale(int id)521 float GetScale(int id) { return tensor_data_.at(id).scale; }
GetZeroPoint(int id)522 int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; }
523
524 // Define the operator in this model.
525 void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
526 flatbuffers::Offset<void> builtin_options);
527 void SetCustomOp(const string& name,
528 const std::vector<uint8_t>& custom_option,
529 const std::function<TfLiteRegistration*()>& registration);
530
531 // Allocate tensors and apply delegate.
532 // Note that this is called by default in BuiltInterpreter().
533 void AllocateAndDelegate(bool apply_delegate);
534
535 // Build the interpreter for this model. Also, resize and allocate all
536 // tensors given the shapes of the inputs.
537 // Note, if `allocate_and_delegate` is `false`, then the value of
538 // `apply_delegate` is ignored.
539 void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
540 int num_threads, bool allow_fp32_relax_to_fp16,
541 bool apply_delegate, bool allocate_and_delegate = true);
542
543 void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
544
545 // Executes inference and return status code.
546 TfLiteStatus Invoke();
547
PopulateStringTensor(int index,const std::vector<string> & content)548 void PopulateStringTensor(int index, const std::vector<string>& content) {
549 auto tensor = interpreter_->tensor(index);
550 DynamicBuffer buf;
551 for (const string& s : content) {
552 buf.AddString(s.data(), s.length());
553 }
554 buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
555 }
556
557 // Populate the tensor given its index.
558 // TODO(b/110696148) clean up and merge with vector-taking variant below.
559 template <typename T>
PopulateTensor(int index,const std::initializer_list<T> & data)560 void PopulateTensor(int index, const std::initializer_list<T>& data) {
561 T* v = interpreter_->typed_tensor<T>(index);
562 if (!v) {
563 auto* t = interpreter_->tensor(index);
564 CHECK(t) << "No tensor with index " << index << ".";
565 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
566 CHECK_EQ(t->type, typeToTfLiteType<T>())
567 << "Type mismatch for tensor with index " << index << ". Requested "
568 << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
569 << TfLiteTypeGetName(t->type) << ".";
570 LOG(FATAL) << "Unknown tensor error.";
571 }
572 for (const T& f : data) {
573 *v = f;
574 ++v;
575 }
576 }
577
578 // Populate the tensor given its index.
579 // TODO(b/110696148) clean up and merge with initializer_list-taking variant
580 // above.
581 template <typename T>
PopulateTensor(int index,const std::vector<T> & data)582 void PopulateTensor(int index, const std::vector<T>& data) {
583 T* v = interpreter_->typed_tensor<T>(index);
584 if (!v) {
585 auto* t = interpreter_->tensor(index);
586 CHECK(t) << "No tensor with index " << index << ".";
587 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
588 CHECK_EQ(t->type, typeToTfLiteType<T>())
589 << "Type mismatch for tensor with index " << index << ". Requested "
590 << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
591 << TfLiteTypeGetName(t->type) << ".";
592 LOG(FATAL) << "Unknown tensor error.";
593 }
594 for (const T& f : data) {
595 *v = f;
596 ++v;
597 }
598 }
599
600 // Partially populate the tensor, starting at the given offset.
601 template <typename T>
PopulateTensor(int index,int offset,T * begin,T * end)602 void PopulateTensor(int index, int offset, T* begin, T* end) {
603 T* v = interpreter_->typed_tensor<T>(index);
604 if (!v) {
605 auto* t = interpreter_->tensor(index);
606 CHECK(t) << "No tensor with index " << index << ".";
607 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
608 CHECK(v) << "Type mismatch for tensor with index " << index
609 << ". Requested " << typeToTfLiteType<T>() << ", got "
610 << t->type;
611 }
612 memcpy(v + offset, begin, (end - begin) * sizeof(T));
613 }
614
615 // Return a vector with the flattened contents of a tensor.
616 template <typename T>
ExtractVector(int index)617 std::vector<T> ExtractVector(int index) const {
618 const T* v = interpreter_->typed_tensor<T>(index);
619 const auto* tensor = interpreter_->tensor(index);
620 CHECK(v) << "Could not extract vector at index: " << index;
621 int tensor_size;
622 if (tensor->sparsity) {
623 // Getting the size of the sparse buffer this way is based on the
624 // assumption that the last dimension of the tensor is a compressed
625 // dimension.
626 tensor_size = tensor->sparsity
627 ->dim_metadata[tensor->sparsity->dim_metadata_size - 1]
628 .array_indices->size;
629 } else {
630 tensor_size = GetTensorSize(index);
631 }
632
633 return std::vector<T>(v, v + tensor_size);
634 }
635
636 // Return the TFLite model buffer, only available after BuildInterpreter.
GetModelBuffer()637 const uint8_t* GetModelBuffer() { return builder_.GetBufferPointer(); }
638
GetTensorShape(int index)639 std::vector<int> GetTensorShape(int index) {
640 std::vector<int> result;
641 TfLiteTensor* t = interpreter_->tensor(index);
642 result.reserve(t->dims->size);
643 for (int i = 0; i < t->dims->size; ++i) {
644 result.push_back(t->dims->data[i]);
645 }
646 return result;
647 }
648
649 // Sets the number of threads available to the interpreter.
650 // Reconstruct the interpreter if reset_interpreter is true.
651 void SetNumThreads(int num_threads, bool reset_interpreter = false) {
652 CHECK(interpreter_ != nullptr);
653 if (reset_interpreter) {
654 // Reconstruct interpreter as number of threads may affect internal state,
655 // e.g. stratch buffer allocation.
656 BuildInterpreter(input_shapes_, num_threads, allocate_and_delegate_,
657 apply_delegate_, allocate_and_delegate_);
658 }
659 interpreter_->SetNumThreads(num_threads);
660 }
661
SetResolver(std::unique_ptr<OpResolver> resolver)662 void SetResolver(std::unique_ptr<OpResolver> resolver) {
663 resolver_ = std::move(resolver);
664 }
665
666 // Indicate whether the test has the NNAPI delegate applied.
667 static bool GetForceUseNnapi();
668 int CountOpsExecutedByCpuKernel();
669
670 protected:
671 int32_t GetTensorSize(int index) const;
672
673 // Tell TF Lite runtime to skip applying default delegates (i.e. XNNPACK
674 // delegate) when handling this op-level model.
SetBypassDefaultDelegates()675 void SetBypassDefaultDelegates() { bypass_default_delegates_ = true; }
676
677 flatbuffers::FlatBufferBuilder builder_;
678 std::unique_ptr<tflite::Interpreter> interpreter_;
679 std::unique_ptr<OpResolver> resolver_;
680
681 std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
682 std::vector<flatbuffers::Offset<Operator>> operators_;
683 std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
684
685 template <typename T>
686 int AddTensor(TensorData t, const T* data, size_t size,
687 bool is_variable = false) {
688 int id = tensors_.size();
689
690 // This is slightly different depending on whether we are adding a
691 // quantized or a regular tensor.
692 bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
693
694 flatbuffers::Offset<QuantizationParameters> q_params = 0;
695
696 if (is_quantized) {
697 if (t.min != 0 || t.max != 0) {
698 if (t.type == TensorType_UINT8) {
699 std::tie(t.scale, t.zero_point) =
700 QuantizationParams<uint8_t>(t.min, t.max);
701 } else if (t.type == TensorType_INT8) {
702 std::tie(t.scale, t.zero_point) =
703 QuantizationParams<int8_t>(t.min, t.max);
704 } else if (t.type == TensorType_INT32) {
705 std::tie(t.scale, t.zero_point) =
706 QuantizationParams<int32_t>(t.min, t.max);
707 } else if (t.type == TensorType_INT16) {
708 std::tie(t.scale, t.zero_point) =
709 QuantizationParams<int16_t>(t.min, t.max);
710 } else {
711 LOG(FATAL) << "No support for the requested quantized type";
712 }
713 t.min = 0;
714 t.max = 0;
715 }
716
717 std::vector<float> scales{t.scale};
718 std::vector<int64_t> zero_points{t.zero_point};
719 q_params = CreateQuantizationParameters(
720 builder_, /*min=*/0, /*max=*/0, builder_.CreateVector<float>(scales),
721 builder_.CreateVector<int64_t>(zero_points));
722 }
723
724 int buffer_id = 0;
725 if (size) {
726 // Initialize buffers list with empty buffer to allow for non-const
727 // tensors.
728 if (buffers_.empty()) {
729 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
730 }
731
732 builder_.ForceVectorAlignment(size, sizeof(T), 16);
733 // Add data as a Buffer to buffers list.
734 buffer_id = buffers_.size();
735 auto data_buffer = builder_.CreateVector(
736 reinterpret_cast<const uint8_t*>(data), sizeof(T) * size);
737 buffers_.push_back(CreateBuffer(builder_, data_buffer));
738 }
739
740 tensors_.push_back(CreateTensor(
741 builder_, builder_.CreateVector<int>(t.shape), t.type,
742 /*buffer=*/buffer_id,
743 /*name=*/0, q_params, is_variable,
744 /*sparsity=*/0, builder_.CreateVector<int>(t.shape_signature)));
745
746 tensor_data_[id] = t;
747
748 return id;
749 }
750
751 private:
752 template <typename T>
QuantizationParams(float f_min,float f_max)753 std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
754 int32_t zero_point = 0;
755 float scale = 0;
756 const T qmin = std::numeric_limits<T>::min();
757 const T qmax = std::numeric_limits<T>::max();
758 const float qmin_double = qmin;
759 const float qmax_double = qmax;
760 // 0 should always be a representable value. Let's assume that the initial
761 // min,max range contains 0.
762 CHECK_LE(f_min, 0);
763 CHECK_GE(f_max, 0);
764 if (f_min == f_max) {
765 // Special case where the min,max range is a point. Should be {0}.
766 CHECK_EQ(f_min, 0);
767 CHECK_EQ(f_max, 0);
768 return {scale, zero_point};
769 }
770
771 // General case.
772 //
773 // First determine the scale.
774 scale = (f_max - f_min) / (qmax_double - qmin_double);
775
776 // Zero-point computation.
777 // First the initial floating-point computation. The zero-point can be
778 // determined from solving an affine equation for any known pair
779 // (real value, corresponding quantized value).
780 // We know two such pairs: (rmin, qmin) and (rmax, qmax).
781 // The arithmetic error on the zero point computed from either pair
782 // will be roughly machine_epsilon * (sum of absolute values of terms)
783 // so we want to use the variant that adds the smaller terms.
784 const float zero_point_from_min = qmin_double - f_min / scale;
785 const float zero_point_from_max = qmax_double - f_max / scale;
786
787 const float zero_point_from_min_error =
788 std::abs(qmin_double) + std::abs(f_min / scale);
789
790 const float zero_point_from_max_error =
791 std::abs(qmax_double) + std::abs(f_max / scale);
792
793 const float zero_point_double =
794 zero_point_from_min_error < zero_point_from_max_error
795 ? zero_point_from_min
796 : zero_point_from_max;
797
798 // Now we need to nudge the zero point to be an integer
799 // (our zero points are integer, and this is motivated by the requirement
800 // to be able to represent the real value "0" exactly as a quantized value,
801 // which is required in multiple places, for example in Im2col with SAME
802 // padding).
803
804 T nudged_zero_point = 0;
805 if (zero_point_double < qmin_double) {
806 nudged_zero_point = qmin;
807 } else if (zero_point_double > qmax_double) {
808 nudged_zero_point = qmax;
809 } else {
810 nudged_zero_point = static_cast<T>(std::round(zero_point_double));
811 }
812
813 // The zero point should always be in the range of quantized value,
814 // // [qmin, qmax].
815 CHECK_GE(nudged_zero_point, qmin);
816 CHECK_LE(nudged_zero_point, qmax);
817
818 zero_point = nudged_zero_point;
819 // finally, return the values
820 return {scale, zero_point};
821 }
822
AddTensorPerChannelQuant(const TensorData & t)823 int AddTensorPerChannelQuant(const TensorData& t) {
824 // type does not matter when adding empty data.
825 return AddTensorPerChannelQuant<uint8_t>(t, nullptr, 0);
826 }
827
828 template <typename T>
AddTensorPerChannelQuant(const TensorData & t,const T * data,size_t size)829 int AddTensorPerChannelQuant(const TensorData& t, const T* data,
830 size_t size) {
831 const int id = tensors_.size();
832 flatbuffers::Offset<QuantizationParameters> q_params = 0;
833 q_params = CreateQuantizationParameters(
834 builder_, /*min=*/0, /*max=*/0,
835 /*scale=*/
836 builder_.CreateVector<float>(t.per_channel_quantization_scales),
837 /*zero point=*/
838 builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
839 QuantizationDetails_NONE, 0, t.channel_index);
840
841 int buffer_id = 0;
842 if (size) {
843 // Initialize buffers list with empty buffer to allow for non-const
844 // tensors.
845 if (buffers_.empty()) {
846 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
847 }
848
849 // Add data as a Buffer to buffers list.
850 buffer_id = buffers_.size();
851 auto data_buffer = builder_.CreateVector(
852 reinterpret_cast<const uint8_t*>(data), sizeof(T) * size);
853 buffers_.push_back(CreateBuffer(builder_, data_buffer));
854 }
855
856 tensors_.push_back(
857 CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
858 /*buffer=*/buffer_id,
859 /*name=*/0, q_params, /*is_variable=*/false));
860 tensor_data_[id] = t;
861 return id;
862 }
863
QuantizeTensor(int index,const std::vector<float> & data)864 std::vector<int8_t> QuantizeTensor(int index,
865 const std::vector<float>& data) {
866 TfLiteTensor* t = interpreter_->tensor(index);
867 const int length = data.size();
868 std::vector<int8_t> q(length);
869 float min, max, scaling_factor;
870 tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
871 &max, &scaling_factor);
872 // Update quantization params.
873 t->params.scale = scaling_factor;
874 t->params.zero_point = 0;
875 // Populate the new quantization params.
876 TfLiteQuantizationFree(&t->quantization);
877 t->quantization.type = kTfLiteAffineQuantization;
878 auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
879 malloc(sizeof(TfLiteAffineQuantization)));
880 affine_quantization->quantized_dimension = 0;
881 affine_quantization->scale = TfLiteFloatArrayCreate(1);
882 affine_quantization->zero_point = TfLiteIntArrayCreate(1);
883 affine_quantization->scale->data[0] = scaling_factor;
884 affine_quantization->zero_point->data[0] = 0;
885 t->quantization.params = affine_quantization;
886 return q;
887 }
888
889 // Checks if acceleration has been done as expected.
890 // Currently supports only NNAPI.
891 // It verifies if the test was configured to run with NNAPI acceleration
892 // or not (SetForceUseNnapi(true)).
893 // In affirmative case it checks if:
894 // - the test case has been listed in the list of nnapi-accelerated cases
895 // - the test is running on a device (NNAPI has been loaded)
896 //
897 // The list of nnapi-accelerated test cases is a file containing regex to
898 // include or exclude specific test cases plus the minimum android SDK version
899 // the acceleration should be enabled for. For example:
900 // To enable the test BorderFloat in TopKV2OpTest only from
901 // android_sdk_version 29:
902 //
903 // TopKV2OpTest/BorderFloat,29
904 //
905 // And to have it always excluded while enabling all other Float tests
906 // (the order of the rules is important, the first one matching is used):
907 //
908 // -TopKV2OpTest/BorderFloat
909 // TopKV2OpTest/.+Float
910
911 void ValidateAcceleration();
912
913 // If the test was configured to use NNAPI and NNAPI was actually loaded,
914 // checks if the single operation in the model has been accelerated.
915 void ExpectOpAcceleratedWithNnapi(const std::string& test_id);
916
917 std::map<int, TensorData> tensor_data_;
918 std::vector<int32_t> inputs_;
919 std::vector<int32_t> intermediates_;
920 std::vector<int32_t> outputs_;
921 std::vector<flatbuffers::Offset<Tensor>> tensors_;
922 std::vector<flatbuffers::Offset<Buffer>> buffers_;
923 TfLiteDelegate* delegate_ = nullptr; // not own the memory.
924 std::vector<std::vector<int>> input_shapes_;
925 int num_applied_delegates_ = 0;
926 bool allow_fp32_relax_to_fp16_ = false;
927 bool apply_delegate_ = true;
928 bool allocate_and_delegate_ = true;
929
930 // Whether to bypass the application of TF Lite default delegates (i.e.
931 // XNNPACK delegate) at rutnime.
932 bool bypass_default_delegates_ = false;
933 };
934
935 // Populate string tensors.
936 template <>
937 inline void SingleOpModel::PopulateTensor<string>(
938 int index, const std::initializer_list<string>& data) {
939 PopulateStringTensor(index, data);
940 }
941
942 // Base class for single op unit tests.
943 // The tests are parameterized to test multiple kernels for a single op.
944 // The parameters are strings like "optimized" and "reference" to have better
945 // readability in test reports.
946 //
947 // To use this class:
948 // * Define a constant map from strings to TfLiteRegistration.
949 // * Implement a test class that inherits SingleOpTest.
950 // * Instantiate the test cases with SingleOpTest::GetKernelTags helper
951 // function.
952 // * Call GetRegistration to get the TfLiteRegistration to be used before
953 // building the interpreter.
954 class SingleOpTest : public ::testing::TestWithParam<string> {
955 public:
GetKernelTags(const std::map<string,TfLiteRegistration * > & kernel_map)956 static std::vector<string> GetKernelTags(
957 const std::map<string, TfLiteRegistration*>& kernel_map) {
958 std::vector<string> tags;
959 tags.reserve(kernel_map.size());
960 for (const auto& it : kernel_map) {
961 tags.push_back(it.first);
962 }
963 return tags;
964 }
965
966 protected:
967 virtual const std::map<string, TfLiteRegistration*>& GetKernelMap() = 0;
GetRegistration()968 TfLiteRegistration* GetRegistration() {
969 return GetKernelMap().at(GetParam());
970 }
971 };
972
973 // Returns the corresponding TensorType given the type T.
974 template <typename T>
GetTensorType()975 TensorType GetTensorType() {
976 if (std::is_same<T, float>::value) return TensorType_FLOAT32;
977 if (std::is_same<T, TfLiteFloat16>::value) return TensorType_FLOAT16;
978 if (std::is_same<T, double>::value) return TensorType_FLOAT64;
979 if (std::is_same<T, int8_t>::value) return TensorType_INT8;
980 if (std::is_same<T, int16_t>::value) return TensorType_INT16;
981 if (std::is_same<T, uint16_t>::value) return TensorType_UINT16;
982 if (std::is_same<T, int32_t>::value) return TensorType_INT32;
983 if (std::is_same<T, uint32_t>::value) return TensorType_UINT32;
984 if (std::is_same<T, int64_t>::value) return TensorType_INT64;
985 if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
986 if (std::is_same<T, string>::value) return TensorType_STRING;
987 if (std::is_same<T, bool>::value) return TensorType_BOOL;
988 return TensorType_MIN; // default value
989 }
990
991 // Strings have a special implementation that is in test_util.cc
992 template <>
993 std::vector<string> SingleOpModel::ExtractVector(int index) const;
994
995 // The TypeUnion struct specializations hold a collection of related types.
996 // Each struct holds: 1. a primitive type (e.g. float), 2. a TensorType (e.g.
997 // TensorType_FLOAT32, and 3. a TfLiteType (e.g. kTfLiteFloat32). The latter
998 // two are actually enum values and not raw types, but these specializations
999 // make it easy to use gUnit Typed Test Suite:
1000 // https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#typed-tests
1001 template <typename T>
1002 struct TypeUnion;
1003
1004 template <>
1005 struct TypeUnion<float> {
1006 public:
1007 // NOLINTNEXTLINE
1008 static constexpr TensorType tensor_type = TensorType::TensorType_FLOAT32;
1009 // NOLINTNEXTLINE
1010 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteFloat32;
1011 typedef float ScalarType;
1012 };
1013
1014 template <>
1015 struct TypeUnion<int32_t> {
1016 public:
1017 // NOLINTNEXTLINE
1018 static constexpr TensorType tensor_type = TensorType::TensorType_INT32;
1019 // NOLINTNEXTLINE
1020 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt32;
1021 typedef int32_t ScalarType;
1022 };
1023
1024 template <>
1025 struct TypeUnion<uint32_t> {
1026 public:
1027 // NOLINTNEXTLINE
1028 static constexpr TensorType tensor_type = TensorType::TensorType_UINT32;
1029 // NOLINTNEXTLINE
1030 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt32;
1031 typedef uint32_t ScalarType;
1032 };
1033
1034 template <>
1035 struct TypeUnion<int16_t> {
1036 public:
1037 // NOLINTNEXTLINE
1038 static constexpr TensorType tensor_type = TensorType::TensorType_INT16;
1039 // NOLINTNEXTLINE
1040 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt16;
1041 typedef int16_t ScalarType;
1042 };
1043
1044 template <>
1045 struct TypeUnion<uint16_t> {
1046 public:
1047 // NOLINTNEXTLINE
1048 static constexpr TensorType tensor_type = TensorType::TensorType_UINT16;
1049 // NOLINTNEXTLINE
1050 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt16;
1051 typedef uint16_t ScalarType;
1052 };
1053
1054 template <>
1055 struct TypeUnion<int8_t> {
1056 public:
1057 // NOLINTNEXTLINE
1058 static constexpr TensorType tensor_type = TensorType::TensorType_INT8;
1059 // NOLINTNEXTLINE
1060 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt8;
1061 typedef int8_t ScalarType;
1062 };
1063
1064 template <>
1065 struct TypeUnion<uint8_t> {
1066 public:
1067 // NOLINTNEXTLINE
1068 static constexpr TensorType tensor_type = TensorType::TensorType_UINT8;
1069 // NOLINTNEXTLINE
1070 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt8;
1071 typedef uint8_t ScalarType;
1072 };
1073
1074 class MultiOpModel : public SingleOpModel {
1075 public:
1076 MultiOpModel() : SingleOpModel() {}
1077 ~MultiOpModel() {}
1078
1079 void AddBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
1080 const flatbuffers::Offset<void>& builtin_options,
1081 const std::vector<int32_t>& inputs,
1082 const std::vector<int32_t>& outputs);
1083
1084 void AddCustomOp(const string& name,
1085 const std::vector<uint8_t>& custom_option,
1086 const std::function<TfLiteRegistration*()>& registration,
1087 const std::vector<int32_t>& inputs,
1088 const std::vector<int32_t>& outputs);
1089
1090 template <typename T>
1091 int AddInnerTensor(TensorData t) {
1092 return AddTensor<T>(t, {}, false);
1093 }
1094 };
1095 } // namespace tflite
1096
1097 #endif // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
1098