1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
16 #define TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
17
18 #include <stddef.h>
19 #include <stdint.h>
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include <algorithm>
24 #include <cmath>
25 #include <complex>
26 #include <functional>
27 #include <initializer_list>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <string>
32 #include <tuple>
33 #include <type_traits>
34 #include <utility>
35 #include <vector>
36
37 #include <gmock/gmock.h>
38 #include <gtest/gtest.h>
39 #include "flatbuffers/flatbuffers.h" // from @flatbuffers
40 #include "tensorflow/core/platform/logging.h"
41 #include "tensorflow/lite/core/api/op_resolver.h"
42 #include "tensorflow/lite/interpreter.h"
43 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
44 #include "tensorflow/lite/schema/schema_generated.h"
45 #include "tensorflow/lite/string_type.h"
46 #include "tensorflow/lite/string_util.h"
47 #include "tensorflow/lite/testing/util.h" // IWYU pragma: keep
48 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
49 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
50 #include "tensorflow/lite/type_to_tflitetype.h"
51
52 namespace tflite {
53
54 // A gmock matcher that check that elements of a float vector match to a given
55 // tolerance.
56 std::vector<::testing::Matcher<float>> ArrayFloatNear(
57 const std::vector<float>& values, float max_abs_error = 1e-5);
58
59 // A gmock matcher that check that elements of a complex vector match to a given
60 // tolerance.
61 std::vector<::testing::Matcher<std::complex<float>>> ArrayComplex64Near(
62 const std::vector<std::complex<float>>& values, float max_abs_error = 1e-5);
63
64 template <typename T>
Quantize(const std::vector<float> & data,float scale,int32_t zero_point)65 inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
66 int32_t zero_point) {
67 std::vector<T> q;
68 for (const auto& f : data) {
69 q.push_back(static_cast<T>(std::max<float>(
70 std::numeric_limits<T>::min(),
71 std::min<float>(std::numeric_limits<T>::max(),
72 std::round(zero_point + (f / scale))))));
73 }
74 return q;
75 }
76
77 template <typename T>
Dequantize(const std::vector<T> & data,float scale,int32_t zero_point)78 inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
79 int32_t zero_point) {
80 std::vector<float> f;
81 f.reserve(data.size());
82 for (const T& q : data) {
83 f.push_back(scale * (q - zero_point));
84 }
85 return f;
86 }
87
88 // A test model that contains a single operator. All operator inputs and
89 // output are external to the model, so the tests can directly access them.
90 // Typical usage:
91 // SingleOpModel m;
92 // int a = m.AddInput({TensorType_FLOAT32, a_shape});
93 // int b = m.AddInput({TensorType_FLOAT32, b_shape});
94 // int c = m.AddOutput({TensorType_FLOAT32, {}});
95 // m.SetBuiltinOp(...);
96 // m.BuildInterpreter({GetShape(a), GetShape(b)});
97 // m.PopulateTensor(a, {...});
98 // m.PopulateTensor(b, {...});
99 // m.Invoke();
100 // EXPECT_THAT(m.ExtractVector<float>(c), ArrayFloatNear({...}));
101 //
102
103 // A helper struct to construct test tensors. This is particularly useful for
104 // quantized tensor which must have their scale and zero_point defined before
105 // the actual data is known. This mimics what happens in practice: quantization
106 // parameters are calculated during training or post training..
107 struct TensorData {
108 // NOLINTNEXTLINE
109 TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape = {},
110 float min = 0.0f, float max = 0.0f, float scale = 0.0f,
111 int32_t zero_point = 0, bool per_channel_quantization = false,
112 std::vector<float> per_channel_quantization_scales = {},
113 std::vector<int64_t> per_channel_quantization_offsets = {},
114 int32_t channel_index = 0, std::vector<int> traversal_order = {},
115 std::vector<TfLiteDimensionType> format = {},
116 std::vector<int> block_size = {}, std::vector<int> block_map = {},
117 std::vector<int> shape_signature = {})
typeTensorData118 : type(type),
119 shape(shape),
120 min(min),
121 max(max),
122 scale(scale),
123 zero_point(zero_point),
124 per_channel_quantization(per_channel_quantization),
125 per_channel_quantization_scales(
126 std::move(per_channel_quantization_scales)),
127 per_channel_quantization_offsets(
128 std::move(per_channel_quantization_offsets)),
129 channel_index(channel_index),
130 traversal_order(traversal_order),
131 format(format),
132 block_size(block_size),
133 block_map(block_map),
134 shape_signature(shape_signature) {}
135 TensorType type;
136 std::vector<int> shape;
137 float min;
138 float max;
139 float scale;
140 int32_t zero_point;
141 bool per_channel_quantization;
142 std::vector<float> per_channel_quantization_scales;
143 std::vector<int64_t> per_channel_quantization_offsets;
144 int32_t channel_index;
145 std::vector<int> traversal_order;
146 std::vector<TfLiteDimensionType> format;
147 std::vector<int> block_size;
148 std::vector<int> block_map;
149 std::vector<int> shape_signature;
150 };
151
152 class SingleOpResolver : public OpResolver {
153 public:
154 SingleOpResolver(const BuiltinOperator op, TfLiteRegistration* registration,
155 int version = 1)
op_(op)156 : op_(op), registration_(*registration) {
157 registration_.builtin_code = static_cast<int32_t>(op);
158 registration_.version = version;
159 }
FindOp(BuiltinOperator op,int version)160 const TfLiteRegistration* FindOp(BuiltinOperator op,
161 int version) const override {
162 if (op == op_) {
163 return ®istration_;
164 }
165 return nullptr;
166 }
FindOp(const char * op,int version)167 const TfLiteRegistration* FindOp(const char* op, int version) const override {
168 return nullptr;
169 }
170
171 private:
172 const BuiltinOperator op_;
173 TfLiteRegistration registration_;
174 };
175
176 class SingleOpModel {
177 public:
SingleOpModel()178 SingleOpModel() {}
179 ~SingleOpModel();
180
181 // Set a delegate that is applied right after graph is prepared. This is
182 // useful for testing other runtimes like NN API or GPU.
SetDelegate(TfLiteDelegate * delegate)183 void SetDelegate(TfLiteDelegate* delegate) { delegate_ = delegate; }
184
185 TfLiteStatus ApplyDelegate();
186
187 // Copying or assignment is disallowed to simplify ownership semantics.
188 SingleOpModel(const SingleOpModel&) = delete;
189 SingleOpModel& operator=(const SingleOpModel&) = delete;
190
191 // Add a TensorType input tensor and return its index.
192 int AddInput(const TensorData& t);
193 int AddVariableInput(const TensorData& t);
194
195 int AddIntermediate(TensorType type, const std::vector<float>& scale,
196 const std::vector<int64_t>& zero_point);
197
198 // Templated version of AddConstInput().
199 template <typename T>
AddConstInput(const TensorData & t,std::initializer_list<T> data)200 int AddConstInput(const TensorData& t, std::initializer_list<T> data) {
201 int id = 0;
202 if (t.per_channel_quantization) {
203 id = AddTensorPerChannelQuant(t, data);
204 } else {
205 id = AddTensor(t, data);
206 }
207 inputs_.push_back(id);
208 return id;
209 }
210 template <typename T>
AddConstInput(TensorType type,std::initializer_list<T> data,std::initializer_list<int> shape)211 int AddConstInput(TensorType type, std::initializer_list<T> data,
212 std::initializer_list<int> shape) {
213 return AddConstInput(TensorData{type, shape}, data);
214 }
215
216 // TODO(b/166202747): Use a better way to do type specialization. Reduce
217 // duplicate code in the two functions below.
AddConstSparseInput(const TensorData & t,const std::vector<int8_t> & data)218 int AddConstSparseInput(const TensorData& t,
219 const std::vector<int8_t>& data) {
220 int id = tensors_.size();
221 const int dims_count = t.traversal_order.size();
222 std::vector<int8_t> dense_data(data);
223
224 tflite::optimize::sparsity::FormatConverter<int8_t> converter(
225 t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
226 converter.DenseToSparse(dense_data.data());
227
228 const auto& dim_metadata = converter.GetDimMetadata();
229 const auto& sparse_data = converter.GetData();
230
231 // Build sparsity parameter.
232 std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
233 dims_count);
234 for (int i = 0; i < dims_count; i++) {
235 const int metadata_idx = 2 * i;
236 if (i < t.shape.size() &&
237 t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
238 auto array_segments =
239 CreateInt32Vector(builder_,
240 builder_.CreateVector(dim_metadata[metadata_idx]))
241 .Union();
242 auto array_indices =
243 CreateInt32Vector(
244 builder_, builder_.CreateVector(dim_metadata[metadata_idx + 1]))
245 .Union();
246 fb_dim_metadata[i] = CreateDimensionMetadata(
247 builder_, DimensionType_SPARSE_CSR, 0,
248 SparseIndexVector_Int32Vector, array_segments,
249 SparseIndexVector_Int32Vector, array_indices);
250 } else {
251 fb_dim_metadata[i] = CreateDimensionMetadata(
252 builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
253 }
254 }
255
256 flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
257 builder_, builder_.CreateVector(t.traversal_order),
258 builder_.CreateVector(t.block_map),
259 builder_.CreateVector(fb_dim_metadata));
260
261 int buffer_id = 0;
262 if (!data.empty()) {
263 // Initialize buffers list with empty buffer to allow for non-const
264 // tensors.
265 if (buffers_.empty()) {
266 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
267 }
268
269 // Add compressed data as a Buffer to buffers list.
270 buffer_id = buffers_.size();
271 auto data_buffer = builder_.CreateVector(
272 reinterpret_cast<const uint8_t*>(sparse_data.data()),
273 sparse_data.size());
274 buffers_.push_back(CreateBuffer(builder_, data_buffer));
275 }
276
277 tensors_.push_back(CreateTensor(
278 builder_, builder_.CreateVector<int>(t.shape), t.type,
279 /*buffer=*/buffer_id,
280 /*name=*/0, /*quantization=*/0, /*is_variable=*/false, s_param));
281
282 inputs_.push_back(id);
283 tensor_data_[id] = t;
284
285 return id;
286 }
287
288 // Add a constant sparse tensor as input.
289 template <typename T>
290 int AddConstSparseInput(const TensorData& t, const std::vector<T>& data,
291 bool symmetric_quantize = false) {
292 int id = tensors_.size();
293 const int dims_count = t.traversal_order.size();
294 std::vector<T> dense_data(data);
295
296 tflite::optimize::sparsity::FormatConverter<T> converter(
297 t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
298 converter.DenseToSparse(dense_data.data());
299
300 const auto dim_metadata = converter.GetDimMetadata();
301 const auto sparse_data = converter.GetData();
302
303 // Build sparsity parameter.
304 std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
305 dims_count);
306 for (int i = 0; i < dims_count; i++) {
307 const int metadata_idx = 2 * i;
308 if (i < t.shape.size() &&
309 t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
310 auto array_segments =
311 CreateInt32Vector(builder_,
312 builder_.CreateVector(dim_metadata[metadata_idx]))
313 .Union();
314 auto array_indices =
315 CreateInt32Vector(
316 builder_, builder_.CreateVector(dim_metadata[metadata_idx + 1]))
317 .Union();
318 fb_dim_metadata[i] = CreateDimensionMetadata(
319 builder_, DimensionType_SPARSE_CSR, 0,
320 SparseIndexVector_Int32Vector, array_segments,
321 SparseIndexVector_Int32Vector, array_indices);
322 } else {
323 fb_dim_metadata[i] = CreateDimensionMetadata(
324 builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
325 }
326 }
327
328 flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
329 builder_, builder_.CreateVector(t.traversal_order),
330 builder_.CreateVector(t.block_map),
331 builder_.CreateVector(fb_dim_metadata));
332
333 flatbuffers::Offset<QuantizationParameters> q_params = 0;
334 int buffer_id = 0;
335 if (!data.empty()) {
336 // Initialize buffers list with empty buffer to allow for non-const
337 // tensors.
338 if (buffers_.empty()) {
339 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
340 }
341
342 // Add compressed data as a Buffer to buffers list.
343 buffer_id = buffers_.size();
344 if (symmetric_quantize) {
345 const int length = sparse_data.size();
346 std::vector<int8_t> q(length);
347 float min, max, scaling_factor;
348 tensor_utils::SymmetricQuantizeFloats(
349 sparse_data.data(), length, q.data(), &min, &max, &scaling_factor);
350 q_params = CreateQuantizationParameters(
351 builder_, 0, 0, builder_.CreateVector<float>({scaling_factor}),
352 builder_.CreateVector<int64_t>({0}));
353 auto data_buffer = builder_.CreateVector(
354 reinterpret_cast<const uint8_t*>(q.data()), q.size());
355 buffers_.push_back(CreateBuffer(builder_, data_buffer));
356 } else {
357 auto data_buffer = builder_.CreateVector(
358 reinterpret_cast<const uint8_t*>(sparse_data.data()),
359 sizeof(T) * sparse_data.size());
360 buffers_.push_back(CreateBuffer(builder_, data_buffer));
361 }
362 }
363
364 tensors_.push_back(
365 CreateTensor(builder_, builder_.CreateVector<int>(t.shape),
366 symmetric_quantize ? TensorType_INT8 : t.type,
367 /*buffer=*/buffer_id,
368 /*name=*/0, q_params, /*is_variable=*/false, s_param));
369
370 inputs_.push_back(id);
371 tensor_data_[id] = t;
372
373 return id;
374 }
375
376 // Add a null input tensor (optional input) and return kTfLiteOptionalTensor.
377 int AddNullInput();
378
379 // Add a TensorType output tensor and return its index.
380 int AddOutput(const TensorData& t);
381
382 template <typename T>
QuantizeAndPopulate(int index,const std::vector<float> & data)383 void QuantizeAndPopulate(int index, const std::vector<float>& data) {
384 TfLiteTensor* t = interpreter_->tensor(index);
385 auto q = Quantize<T>(data, t->params.scale, t->params.zero_point);
386 PopulateTensor(index, 0, q.data(), q.data() + q.size());
387 }
388
SymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)389 void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
390 std::vector<int8_t> q = QuantizeTensor(index, data);
391 PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
392 reinterpret_cast<uint8_t*>(q.data() + q.size()));
393 }
394
SignedSymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)395 void SignedSymmetricQuantizeAndPopulate(int index,
396 const std::vector<float>& data) {
397 std::vector<int8_t> q = QuantizeTensor(index, data);
398 PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
399 }
400
401 // Quantize and populate data for filter with per channel quantization.
PerChannelSymmetricQuantizeAndPopulate(int index,const std::vector<float> & input_data)402 void PerChannelSymmetricQuantizeAndPopulate(
403 int index, const std::vector<float>& input_data) {
404 TfLiteTensor* t = interpreter_->tensor(index);
405 auto* params =
406 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
407 const int channel_index = params->quantized_dimension;
408
409 std::vector<int32_t> shape(t->dims->size);
410 for (size_t i = 0; i < shape.size(); ++i) {
411 shape[i] = t->dims->data[i];
412 }
413 const int32_t num_inputs = input_data.size();
414 const int32_t num_channel = shape[channel_index];
415 std::vector<int8_t> quantized_output(num_inputs);
416 std::vector<float> scales_inv(num_channel);
417 for (int i = 0; i < num_channel; ++i) {
418 const float scale = params->scale->size == 1 ? params->scale->data[0]
419 : params->scale->data[i];
420 scales_inv[i] = 1.0f / scale;
421 }
422 optimize::utils::SymmetricPerChannelQuantizeValues(
423 input_data.data(), scales_inv, shape, channel_index, &quantized_output);
424
425 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
426 quantized_output.data() + quantized_output.size());
427 }
428
429 template <typename T>
PerChannelQuantizeBiasPopulateTensor(const std::vector<float> & input_data,int index,TfLiteAffineQuantization * params)430 void PerChannelQuantizeBiasPopulateTensor(
431 const std::vector<float>& input_data, int index,
432 TfLiteAffineQuantization* params) {
433 const int32_t num_inputs = input_data.size();
434 std::vector<T> quantized_output(num_inputs);
435 for (int i = 0; i < num_inputs; ++i) {
436 const float scale = params->scale->size == 1 ? params->scale->data[0]
437 : params->scale->data[i];
438 quantized_output[i] = input_data[i] / scale;
439 }
440 }
441
442 template <typename T>
PerChannelQuantizeBiasPopulateTensor(int index,const std::vector<float> & input_data,const TfLiteAffineQuantization * params)443 void PerChannelQuantizeBiasPopulateTensor(
444 int index, const std::vector<float>& input_data,
445 const TfLiteAffineQuantization* params) {
446 const int32_t num_inputs = input_data.size();
447 std::vector<T> quantized_output(num_inputs);
448 for (int i = 0; i < num_inputs; ++i) {
449 const float scale = params->scale->size == 1 ? params->scale->data[0]
450 : params->scale->data[i];
451 quantized_output[i] = input_data[i] / scale;
452 }
453 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
454 quantized_output.data() + quantized_output.size());
455 }
456
457 // Quantize and populate data for bias with per channel quantization.
PerChannelQuantizeBias(int index,const std::vector<float> & input_data)458 void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
459 TfLiteTensor* t = interpreter_->tensor(index);
460 auto* params =
461 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
462 CHECK(t->type == kTfLiteInt32 || t->type == kTfLiteInt64);
463 if (t->type == kTfLiteInt32) {
464 PerChannelQuantizeBiasPopulateTensor<int32_t>(index, input_data, params);
465 } else {
466 PerChannelQuantizeBiasPopulateTensor<int64_t>(index, input_data, params);
467 }
468 }
469
GetShape(int id)470 const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
471
GetScale(int id)472 float GetScale(int id) { return tensor_data_.at(id).scale; }
GetZeroPoint(int id)473 int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; }
474
475 // Define the operator in this model.
476 void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
477 flatbuffers::Offset<void> builtin_options);
478 void SetCustomOp(const string& name,
479 const std::vector<uint8_t>& custom_option,
480 const std::function<TfLiteRegistration*()>& registration);
481
482 // Allocate tensors and apply delegate.
483 // Note that this is called by default in BuiltInterpreter().
484 void AllocateAndDelegate(bool apply_delegate);
485
486 // Build the interpreter for this model. Also, resize and allocate all
487 // tensors given the shapes of the inputs.
488 // Note: 'apply_delegate' also serves to tell whether default TfLite delegates
489 // should be applied implicitly for a test case. For example, when testing the
490 // specific implementation of a TfLite delegate, it might be necessary to set
491 // this to false.
492 void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
493 int num_threads, bool allow_fp32_relax_to_fp16,
494 bool apply_delegate, bool allocate_and_delegate = true);
495
496 void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
497
498 // Executes inference, asserting success.
499 void Invoke();
500
501 // Executes inference *without* asserting success.
502 TfLiteStatus InvokeUnchecked();
503
PopulateStringTensor(int index,const std::vector<string> & content)504 void PopulateStringTensor(int index, const std::vector<string>& content) {
505 auto tensor = interpreter_->tensor(index);
506 DynamicBuffer buf;
507 for (const string& s : content) {
508 buf.AddString(s.data(), s.length());
509 }
510 buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
511 }
512
513 // Populate the tensor given its index.
514 // TODO(b/110696148) clean up and merge with vector-taking variant below.
515 template <typename T>
PopulateTensor(int index,const std::initializer_list<T> & data)516 void PopulateTensor(int index, const std::initializer_list<T>& data) {
517 T* v = interpreter_->typed_tensor<T>(index);
518 if (!v) {
519 auto* t = interpreter_->tensor(index);
520 CHECK(t) << "No tensor with index " << index << ".";
521 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
522 CHECK_EQ(t->type, typeToTfLiteType<T>())
523 << "Type mismatch for tensor with index " << index << ". Requested "
524 << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
525 << TfLiteTypeGetName(t->type) << ".";
526 LOG(FATAL) << "Unknown tensor error.";
527 }
528 for (const T& f : data) {
529 *v = f;
530 ++v;
531 }
532 }
533
534 // Populate the tensor given its index.
535 // TODO(b/110696148) clean up and merge with initializer_list-taking variant
536 // above.
537 template <typename T>
PopulateTensor(int index,const std::vector<T> & data)538 void PopulateTensor(int index, const std::vector<T>& data) {
539 T* v = interpreter_->typed_tensor<T>(index);
540 if (!v) {
541 auto* t = interpreter_->tensor(index);
542 CHECK(t) << "No tensor with index " << index << ".";
543 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
544 CHECK_EQ(t->type, typeToTfLiteType<T>())
545 << "Type mismatch for tensor with index " << index << ". Requested "
546 << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
547 << TfLiteTypeGetName(t->type) << ".";
548 LOG(FATAL) << "Unknown tensor error.";
549 }
550 for (const T& f : data) {
551 *v = f;
552 ++v;
553 }
554 }
555
556 // Partially populate the tensor, starting at the given offset.
557 template <typename T>
PopulateTensor(int index,int offset,T * begin,T * end)558 void PopulateTensor(int index, int offset, T* begin, T* end) {
559 T* v = interpreter_->typed_tensor<T>(index);
560 if (!v) {
561 auto* t = interpreter_->tensor(index);
562 CHECK(t) << "No tensor with index " << index << ".";
563 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
564 CHECK(v) << "Type mismatch for tensor with index " << index
565 << ". Requested " << typeToTfLiteType<T>() << ", got "
566 << t->type;
567 }
568 memcpy(v + offset, begin, (end - begin) * sizeof(T));
569 }
570
571 // Return a vector with the flattened contents of a tensor.
572 template <typename T>
ExtractVector(int index)573 std::vector<T> ExtractVector(int index) const {
574 const T* v = interpreter_->typed_tensor<T>(index);
575 const auto* tensor = interpreter_->tensor(index);
576 CHECK(v) << "Could not extract vector at index: " << index;
577 int tensor_size;
578 if (tensor->sparsity) {
579 // Getting the size of the sparse buffer this way is based on the
580 // assumption that the last dimension of the tensor is a compressed
581 // dimension.
582 tensor_size = tensor->sparsity
583 ->dim_metadata[tensor->sparsity->dim_metadata_size - 1]
584 .array_indices->size;
585 } else {
586 tensor_size = GetTensorSize(index);
587 }
588
589 return std::vector<T>(v, v + tensor_size);
590 }
591
592 // Return the TFLite model buffer, only available after BuildInterpreter.
GetModelBuffer()593 const uint8_t* GetModelBuffer() { return builder_.GetBufferPointer(); }
594
GetTensorShape(int index)595 std::vector<int> GetTensorShape(int index) {
596 std::vector<int> result;
597 TfLiteTensor* t = interpreter_->tensor(index);
598 result.reserve(t->dims->size);
599 for (int i = 0; i < t->dims->size; ++i) {
600 result.push_back(t->dims->data[i]);
601 }
602 return result;
603 }
604
SetNumThreads(int num_threads)605 void SetNumThreads(int num_threads) {
606 CHECK(interpreter_ != nullptr);
607 interpreter_->SetNumThreads(num_threads);
608 }
609
SetResolver(std::unique_ptr<OpResolver> resolver)610 void SetResolver(std::unique_ptr<OpResolver> resolver) {
611 resolver_ = std::move(resolver);
612 }
613
614 // Indicate whether the test has the NNAPI delegate applied.
615 static bool GetForceUseNnapi();
616 int CountOpsExecutedByCpuKernel();
617
618 protected:
619 int32_t GetTensorSize(int index) const;
620
621 flatbuffers::FlatBufferBuilder builder_;
622 std::unique_ptr<tflite::Interpreter> interpreter_;
623 std::unique_ptr<OpResolver> resolver_;
624
625 std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
626 std::vector<flatbuffers::Offset<Operator>> operators_;
627 std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
628
629 template <typename T>
630 int AddTensor(TensorData t, std::initializer_list<T> data,
631 bool is_variable = false) {
632 int id = tensors_.size();
633
634 // This is slightly different depending on whether we are adding a
635 // quantized or a regular tensor.
636 bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
637
638 flatbuffers::Offset<QuantizationParameters> q_params = 0;
639
640 if (is_quantized) {
641 if (t.min != 0 || t.max != 0) {
642 if (t.type == TensorType_UINT8) {
643 std::tie(t.scale, t.zero_point) =
644 QuantizationParams<uint8_t>(t.min, t.max);
645 } else if (t.type == TensorType_INT8) {
646 std::tie(t.scale, t.zero_point) =
647 QuantizationParams<int8_t>(t.min, t.max);
648 } else if (t.type == TensorType_INT32) {
649 std::tie(t.scale, t.zero_point) =
650 QuantizationParams<int32_t>(t.min, t.max);
651 } else if (t.type == TensorType_INT16) {
652 std::tie(t.scale, t.zero_point) =
653 QuantizationParams<int16_t>(t.min, t.max);
654 } else {
655 LOG(FATAL) << "No support for the requested quantized type";
656 }
657 t.min = 0;
658 t.max = 0;
659 }
660
661 q_params = CreateQuantizationParameters(
662 builder_, /*min=*/0, /*max=*/0,
663 builder_.CreateVector<float>({t.scale}),
664 builder_.CreateVector<int64_t>({t.zero_point}));
665 }
666
667 int buffer_id = 0;
668 if (data.size()) {
669 // Initialize buffers list with empty buffer to allow for non-const
670 // tensors.
671 if (buffers_.empty()) {
672 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
673 }
674
675 // Add data as a Buffer to buffers list.
676 buffer_id = buffers_.size();
677 auto data_buffer =
678 builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
679 sizeof(T) * data.size());
680 buffers_.push_back(CreateBuffer(builder_, data_buffer));
681 }
682
683 tensors_.push_back(CreateTensor(
684 builder_, builder_.CreateVector<int>(t.shape), t.type,
685 /*buffer=*/buffer_id,
686 /*name=*/0, q_params, is_variable,
687 /*sparsity=*/0, builder_.CreateVector<int>(t.shape_signature)));
688
689 tensor_data_[id] = t;
690
691 return id;
692 }
693
694 private:
695 template <typename T>
QuantizationParams(float f_min,float f_max)696 std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
697 int32_t zero_point = 0;
698 float scale = 0;
699 const T qmin = std::numeric_limits<T>::min();
700 const T qmax = std::numeric_limits<T>::max();
701 const float qmin_double = qmin;
702 const float qmax_double = qmax;
703 // 0 should always be a representable value. Let's assume that the initial
704 // min,max range contains 0.
705 CHECK_LE(f_min, 0);
706 CHECK_GE(f_max, 0);
707 if (f_min == f_max) {
708 // Special case where the min,max range is a point. Should be {0}.
709 CHECK_EQ(f_min, 0);
710 CHECK_EQ(f_max, 0);
711 return {scale, zero_point};
712 }
713
714 // General case.
715 //
716 // First determine the scale.
717 scale = (f_max - f_min) / (qmax_double - qmin_double);
718
719 // Zero-point computation.
720 // First the initial floating-point computation. The zero-point can be
721 // determined from solving an affine equation for any known pair
722 // (real value, corresponding quantized value).
723 // We know two such pairs: (rmin, qmin) and (rmax, qmax).
724 // The arithmetic error on the zero point computed from either pair
725 // will be roughly machine_epsilon * (sum of absolute values of terms)
726 // so we want to use the variant that adds the smaller terms.
727 const float zero_point_from_min = qmin_double - f_min / scale;
728 const float zero_point_from_max = qmax_double - f_max / scale;
729
730 const float zero_point_from_min_error =
731 std::abs(qmin_double) + std::abs(f_min / scale);
732
733 const float zero_point_from_max_error =
734 std::abs(qmax_double) + std::abs(f_max / scale);
735
736 const float zero_point_double =
737 zero_point_from_min_error < zero_point_from_max_error
738 ? zero_point_from_min
739 : zero_point_from_max;
740
741 // Now we need to nudge the zero point to be an integer
742 // (our zero points are integer, and this is motivated by the requirement
743 // to be able to represent the real value "0" exactly as a quantized value,
744 // which is required in multiple places, for example in Im2col with SAME
745 // padding).
746
747 T nudged_zero_point = 0;
748 if (zero_point_double < qmin_double) {
749 nudged_zero_point = qmin;
750 } else if (zero_point_double > qmax_double) {
751 nudged_zero_point = qmax;
752 } else {
753 nudged_zero_point = static_cast<T>(std::round(zero_point_double));
754 }
755
756 // The zero point should always be in the range of quantized value,
757 // // [qmin, qmax].
758 CHECK_GE(nudged_zero_point, qmin);
759 CHECK_LE(nudged_zero_point, qmax);
760
761 zero_point = nudged_zero_point;
762 // finally, return the values
763 return {scale, zero_point};
764 }
765
AddTensorPerChannelQuant(const TensorData & t)766 int AddTensorPerChannelQuant(const TensorData& t) {
767 // type does not matter when adding empty data.
768 return AddTensorPerChannelQuant<uint8_t>(t, {});
769 }
770
771 template <typename T>
AddTensorPerChannelQuant(const TensorData & t,const std::initializer_list<T> & data)772 int AddTensorPerChannelQuant(const TensorData& t,
773 const std::initializer_list<T>& data) {
774 const int id = tensors_.size();
775 flatbuffers::Offset<QuantizationParameters> q_params = 0;
776 q_params = CreateQuantizationParameters(
777 builder_, /*min=*/0, /*max=*/0,
778 /*scale=*/
779 builder_.CreateVector<float>(t.per_channel_quantization_scales),
780 /*zero point=*/
781 builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
782 QuantizationDetails_NONE, 0, t.channel_index);
783
784 int buffer_id = 0;
785 if (data.size()) {
786 // Initialize buffers list with empty buffer to allow for non-const
787 // tensors.
788 if (buffers_.empty()) {
789 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
790 }
791
792 // Add data as a Buffer to buffers list.
793 buffer_id = buffers_.size();
794 auto data_buffer =
795 builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
796 sizeof(T) * data.size());
797 buffers_.push_back(CreateBuffer(builder_, data_buffer));
798 }
799
800 tensors_.push_back(
801 CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
802 /*buffer=*/buffer_id,
803 /*name=*/0, q_params, /*is_variable=*/false));
804 tensor_data_[id] = t;
805 return id;
806 }
807
QuantizeTensor(int index,const std::vector<float> & data)808 std::vector<int8_t> QuantizeTensor(int index,
809 const std::vector<float>& data) {
810 TfLiteTensor* t = interpreter_->tensor(index);
811 const int length = data.size();
812 std::vector<int8_t> q(length);
813 float min, max, scaling_factor;
814 tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
815 &max, &scaling_factor);
816 // Update quantization params.
817 t->params.scale = scaling_factor;
818 t->params.zero_point = 0;
819 // Populate the new quantization params.
820 TfLiteQuantizationFree(&t->quantization);
821 t->quantization.type = kTfLiteAffineQuantization;
822 auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
823 malloc(sizeof(TfLiteAffineQuantization)));
824 affine_quantization->quantized_dimension = 0;
825 affine_quantization->scale = TfLiteFloatArrayCreate(1);
826 affine_quantization->zero_point = TfLiteIntArrayCreate(1);
827 affine_quantization->scale->data[0] = scaling_factor;
828 affine_quantization->zero_point->data[0] = 0;
829 t->quantization.params = affine_quantization;
830 return q;
831 }
832
833 // Checks if acceleration has been done as expected.
834 // Currently supports only NNAPI.
835 // It verifies if the test was configured to run with NNAPI acceleration
836 // or not (SetForceUseNnapi(true)).
837 // In affirmative case it checks if:
838 // - the test case has been listed in the list of nnapi-accelerated cases
839 // - the test is running on a device (NNAPI has been loaded)
840 //
841 // The list of nnapi-accelerated test cases is a file containing regex to
842 // include or exclude specific test cases plus the minimum android SDK version
843 // the acceleration should be enabled for. For example:
844 // To enable the test BorderFloat in TopKV2OpTest only from
845 // android_sdk_version 29:
846 //
847 // TopKV2OpTest/BorderFloat,29
848 //
849 // And to have it always excluded while enabling all other Float tests
850 // (the order of the rules is important, the first one matching is used):
851 //
852 // -TopKV2OpTest/BorderFloat
853 // TopKV2OpTest/.+Float
854
855 void ValidateAcceleration();
856
857 // If the test was configured to use NNAPI and NNAPI was actually loaded,
858 // checks if the single operation in the model has been accelerated.
859 void ExpectOpAcceleratedWithNnapi(const std::string& test_id);
860
861 std::map<int, TensorData> tensor_data_;
862 std::vector<int32_t> inputs_;
863 std::vector<int32_t> intermediates_;
864 std::vector<int32_t> outputs_;
865 std::vector<flatbuffers::Offset<Tensor>> tensors_;
866 std::vector<flatbuffers::Offset<Buffer>> buffers_;
867 TfLiteDelegate* delegate_ = nullptr;
868 int num_applied_delegates_ = 0;
869 };
870
871 // Populate string tensors.
872 template <>
873 inline void SingleOpModel::PopulateTensor<string>(
874 int index, const std::initializer_list<string>& data) {
875 PopulateStringTensor(index, data);
876 }
877
878 // Base class for single op unit tests.
879 // The tests are parameterized to test multiple kernels for a single op.
880 // The parameters are strings like "optimized" and "reference" to have better
881 // readability in test reports.
882 //
883 // To use this class:
884 // * Define a constant map from strings to TfLiteRegistration.
885 // * Implement a test class that inherits SingleOpTest.
886 // * Instantiate the test cases with SingleOpTest::GetKernelTags helper
887 // function.
888 // * Call GetRegistration to get the TfLiteRegistration to be used before
889 // building the interpreter.
890 class SingleOpTest : public ::testing::TestWithParam<string> {
891 public:
GetKernelTags(const std::map<string,TfLiteRegistration * > & kernel_map)892 static std::vector<string> GetKernelTags(
893 const std::map<string, TfLiteRegistration*>& kernel_map) {
894 std::vector<string> tags;
895 tags.reserve(kernel_map.size());
896 for (const auto& it : kernel_map) {
897 tags.push_back(it.first);
898 }
899 return tags;
900 }
901
902 protected:
903 virtual const std::map<string, TfLiteRegistration*>& GetKernelMap() = 0;
GetRegistration()904 TfLiteRegistration* GetRegistration() {
905 return GetKernelMap().at(GetParam());
906 }
907 };
908
909 // Returns the corresponding TensorType given the type T.
910 template <typename T>
GetTensorType()911 TensorType GetTensorType() {
912 if (std::is_same<T, float>::value) return TensorType_FLOAT32;
913 if (std::is_same<T, TfLiteFloat16>::value) return TensorType_FLOAT16;
914 if (std::is_same<T, double>::value) return TensorType_FLOAT64;
915 if (std::is_same<T, int8_t>::value) return TensorType_INT8;
916 if (std::is_same<T, int16_t>::value) return TensorType_INT16;
917 if (std::is_same<T, int32_t>::value) return TensorType_INT32;
918 if (std::is_same<T, uint32_t>::value) return TensorType_UINT32;
919 if (std::is_same<T, int64_t>::value) return TensorType_INT64;
920 if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
921 if (std::is_same<T, string>::value) return TensorType_STRING;
922 if (std::is_same<T, bool>::value) return TensorType_BOOL;
923 return TensorType_MIN; // default value
924 }
925
926 // Strings have a special implementation that is in test_util.cc
927 template <>
928 std::vector<string> SingleOpModel::ExtractVector(int index) const;
929
930 // The TypeUnion struct specializations hold a collection of related types.
931 // Each struct holds: 1. a primitive type (e.g. float), 2. a TensorType (e.g.
932 // TensorType_FLOAT32, and 3. a TfLiteType (e.g. kTfLiteFloat32). The latter
933 // two are actually enum values and not raw types, but these specializations
934 // make it easy to use gUnit Typed Test Suite:
935 // https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#typed-tests
936 template <typename T>
937 struct TypeUnion;
938
939 template <>
940 struct TypeUnion<float> {
941 public:
942 // NOLINTNEXTLINE
943 static constexpr TensorType tensor_type = TensorType::TensorType_FLOAT32;
944 // NOLINTNEXTLINE
945 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteFloat32;
946 typedef float ScalarType;
947 };
948
949 template <>
950 struct TypeUnion<int32_t> {
951 public:
952 // NOLINTNEXTLINE
953 static constexpr TensorType tensor_type = TensorType::TensorType_INT32;
954 // NOLINTNEXTLINE
955 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt32;
956 typedef int32_t ScalarType;
957 };
958
959 template <>
960 struct TypeUnion<uint32_t> {
961 public:
962 // NOLINTNEXTLINE
963 static constexpr TensorType tensor_type = TensorType::TensorType_UINT32;
964 // NOLINTNEXTLINE
965 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt32;
966 typedef uint32_t ScalarType;
967 };
968
969 template <>
970 struct TypeUnion<int16_t> {
971 public:
972 // NOLINTNEXTLINE
973 static constexpr TensorType tensor_type = TensorType::TensorType_INT16;
974 // NOLINTNEXTLINE
975 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt16;
976 typedef int16_t ScalarType;
977 };
978
979 template <>
980 struct TypeUnion<int8_t> {
981 public:
982 // NOLINTNEXTLINE
983 static constexpr TensorType tensor_type = TensorType::TensorType_INT8;
984 // NOLINTNEXTLINE
985 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt8;
986 typedef int8_t ScalarType;
987 };
988
989 template <>
990 struct TypeUnion<uint8_t> {
991 public:
992 // NOLINTNEXTLINE
993 static constexpr TensorType tensor_type = TensorType::TensorType_UINT8;
994 // NOLINTNEXTLINE
995 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt8;
996 typedef uint8_t ScalarType;
997 };
998
999 class MultiOpModel : public SingleOpModel {
1000 public:
1001 MultiOpModel() : SingleOpModel() {}
1002 ~MultiOpModel() {}
1003
1004 void AddBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
1005 const flatbuffers::Offset<void>& builtin_options,
1006 const std::vector<int32_t>& inputs,
1007 const std::vector<int32_t>& outputs);
1008
1009 void AddCustomOp(const string& name,
1010 const std::vector<uint8_t>& custom_option,
1011 const std::function<TfLiteRegistration*()>& registration,
1012 const std::vector<int32_t>& inputs,
1013 const std::vector<int32_t>& outputs);
1014
1015 template <typename T>
1016 int AddInnerTensor(TensorData t) {
1017 return AddTensor<T>(t, {}, false);
1018 }
1019 };
1020 } // namespace tflite
1021
1022 #endif // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
1023