1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
16 #define TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
17
18 #include <stddef.h>
19 #include <stdint.h>
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include <algorithm>
24 #include <cmath>
25 #include <complex>
26 #include <functional>
27 #include <initializer_list>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <string>
32 #include <tuple>
33 #include <type_traits>
34 #include <utility>
35 #include <vector>
36
37 #include <gmock/gmock.h>
38 #include <gtest/gtest.h>
39 #include "flatbuffers/flatbuffers.h" // from @flatbuffers
40 #include "tensorflow/core/platform/logging.h"
41 #include "tensorflow/lite/core/api/op_resolver.h"
42 #include "tensorflow/lite/interpreter.h"
43 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
44 #include "tensorflow/lite/schema/schema_generated.h"
45 #include "tensorflow/lite/string_type.h"
46 #include "tensorflow/lite/string_util.h"
47 #include "tensorflow/lite/testing/util.h" // IWYU pragma: keep
48 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
49 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
50 #include "tensorflow/lite/type_to_tflitetype.h"
51
52 namespace tflite {
53
54 // A gmock matcher that check that elements of a float vector match to a given
55 // tolerance.
56 std::vector<::testing::Matcher<float>> ArrayFloatNear(
57 const std::vector<float>& values, float max_abs_error = 1e-5);
58
59 // A gmock matcher that check that elements of a complex vector match to a given
60 // tolerance.
61 std::vector<::testing::Matcher<std::complex<float>>> ArrayComplex64Near(
62 const std::vector<std::complex<float>>& values, float max_abs_error = 1e-5);
63
64 template <typename T>
Quantize(const std::vector<float> & data,float scale,int32_t zero_point)65 inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
66 int32_t zero_point) {
67 std::vector<T> q;
68 for (const auto& f : data) {
69 q.push_back(static_cast<T>(std::max<float>(
70 std::numeric_limits<T>::min(),
71 std::min<float>(std::numeric_limits<T>::max(),
72 std::round(zero_point + (f / scale))))));
73 }
74 return q;
75 }
76
77 template <typename T>
Dequantize(const std::vector<T> & data,float scale,int32_t zero_point)78 inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
79 int32_t zero_point) {
80 std::vector<float> f;
81 f.reserve(data.size());
82 for (const T& q : data) {
83 f.push_back(scale * (q - zero_point));
84 }
85 return f;
86 }
87
88 // A test model that contains a single operator. All operator inputs and
89 // output are external to the model, so the tests can directly access them.
90 // Typical usage:
91 // SingleOpModel m;
92 // int a = m.AddInput({TensorType_FLOAT32, a_shape});
93 // int b = m.AddInput({TensorType_FLOAT32, b_shape});
94 // int c = m.AddOutput({TensorType_FLOAT32, {}});
95 // m.SetBuiltinOp(...);
96 // m.BuildInterpreter({GetShape(a), GetShape(b)});
97 // m.PopulateTensor(a, {...});
98 // m.PopulateTensor(b, {...});
99 // m.Invoke();
100 // EXPECT_THAT(m.ExtractVector<float>(c), ArrayFloatNear({...}));
101 //
102
103 // A helper struct to construct test tensors. This is particularly useful for
104 // quantized tensor which must have their scale and zero_point defined before
105 // the actual data is known. This mimics what happens in practice: quantization
106 // parameters are calculated during training or post training..
107 struct TensorData {
108 // NOLINTNEXTLINE
109 TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape = {},
110 float min = 0.0f, float max = 0.0f, float scale = 0.0f,
111 int32_t zero_point = 0, bool per_channel_quantization = false,
112 std::vector<float> per_channel_quantization_scales = {},
113 std::vector<int64_t> per_channel_quantization_offsets = {},
114 int32_t channel_index = 0, std::vector<int> traversal_order = {},
115 std::vector<TfLiteDimensionType> format = {},
116 std::vector<int> block_size = {}, std::vector<int> block_map = {},
117 std::vector<int> shape_signature = {})
typeTensorData118 : type(type),
119 shape(shape),
120 min(min),
121 max(max),
122 scale(scale),
123 zero_point(zero_point),
124 per_channel_quantization(per_channel_quantization),
125 per_channel_quantization_scales(
126 std::move(per_channel_quantization_scales)),
127 per_channel_quantization_offsets(
128 std::move(per_channel_quantization_offsets)),
129 channel_index(channel_index),
130 traversal_order(traversal_order),
131 format(format),
132 block_size(block_size),
133 block_map(block_map),
134 shape_signature(shape_signature) {}
135 TensorType type;
136 std::vector<int> shape;
137 float min;
138 float max;
139 float scale;
140 int32_t zero_point;
141 bool per_channel_quantization;
142 std::vector<float> per_channel_quantization_scales;
143 std::vector<int64_t> per_channel_quantization_offsets;
144 int32_t channel_index;
145 std::vector<int> traversal_order;
146 std::vector<TfLiteDimensionType> format;
147 std::vector<int> block_size;
148 std::vector<int> block_map;
149 std::vector<int> shape_signature;
150 };
151
152 class SingleOpResolver : public OpResolver {
153 public:
154 SingleOpResolver(const BuiltinOperator op, TfLiteRegistration* registration,
155 int version = 1)
op_(op)156 : op_(op), registration_(*registration) {
157 registration_.builtin_code = static_cast<int32_t>(op);
158 registration_.version = version;
159 }
FindOp(BuiltinOperator op,int version)160 const TfLiteRegistration* FindOp(BuiltinOperator op,
161 int version) const override {
162 if (op == op_) {
163 return ®istration_;
164 }
165 return nullptr;
166 }
FindOp(const char * op,int version)167 const TfLiteRegistration* FindOp(const char* op, int version) const override {
168 return nullptr;
169 }
170
171 private:
172 const BuiltinOperator op_;
173 TfLiteRegistration registration_;
174 };
175
176 class SingleOpModel {
177 public:
SingleOpModel()178 SingleOpModel() {}
179 ~SingleOpModel();
180
181 // Set a delegate that is applied right after graph is prepared. This is
182 // useful for testing other runtimes like NN API or GPU.
183 // Note: the caller still owns the memory of the passed-in `delegate`.
SetDelegate(TfLiteDelegate * delegate)184 void SetDelegate(TfLiteDelegate* delegate) { delegate_ = delegate; }
185
186 TfLiteStatus ApplyDelegate();
187
188 // Copying or assignment is disallowed to simplify ownership semantics.
189 SingleOpModel(const SingleOpModel&) = delete;
190 SingleOpModel& operator=(const SingleOpModel&) = delete;
191
192 // Add a TensorType input tensor and return its index.
193 int AddInput(const TensorData& t);
194 int AddVariableInput(const TensorData& t);
195
196 int AddIntermediate(TensorType type, const std::vector<float>& scale,
197 const std::vector<int64_t>& zero_point);
198
199 // Templated version of AddConstInput().
200 template <typename T>
AddConstInput(const TensorData & t,std::initializer_list<T> data)201 int AddConstInput(const TensorData& t, std::initializer_list<T> data) {
202 int id = 0;
203 if (t.per_channel_quantization) {
204 id = AddTensorPerChannelQuant(t, data);
205 } else {
206 id = AddTensor(t, data);
207 }
208 inputs_.push_back(id);
209 return id;
210 }
211 template <typename T>
AddConstInput(TensorType type,std::initializer_list<T> data,std::initializer_list<int> shape)212 int AddConstInput(TensorType type, std::initializer_list<T> data,
213 std::initializer_list<int> shape) {
214 return AddConstInput(TensorData{type, shape}, data);
215 }
216
217 // TODO(b/166202747): Use a better way to do type specialization. Reduce
218 // duplicate code in the two functions below.
AddConstSparseInput(const TensorData & t,const std::vector<int8_t> & data)219 int AddConstSparseInput(const TensorData& t,
220 const std::vector<int8_t>& data) {
221 int id = tensors_.size();
222 const int dims_count = t.traversal_order.size();
223 std::vector<int8_t> dense_data(data);
224
225 tflite::optimize::sparsity::FormatConverter<int8_t> converter(
226 t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
227 converter.DenseToSparse(dense_data.data());
228
229 const auto& dim_metadata = converter.GetDimMetadata();
230 const auto& sparse_data = converter.GetData();
231
232 // Build sparsity parameter.
233 std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
234 dims_count);
235 for (int i = 0; i < dims_count; i++) {
236 const int metadata_idx = 2 * i;
237 if (i < t.shape.size() &&
238 t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
239 auto array_segments =
240 CreateInt32Vector(builder_,
241 builder_.CreateVector(dim_metadata[metadata_idx]))
242 .Union();
243 auto array_indices =
244 CreateInt32Vector(
245 builder_, builder_.CreateVector(dim_metadata[metadata_idx + 1]))
246 .Union();
247 fb_dim_metadata[i] = CreateDimensionMetadata(
248 builder_, DimensionType_SPARSE_CSR, 0,
249 SparseIndexVector_Int32Vector, array_segments,
250 SparseIndexVector_Int32Vector, array_indices);
251 } else {
252 fb_dim_metadata[i] = CreateDimensionMetadata(
253 builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
254 }
255 }
256
257 flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
258 builder_, builder_.CreateVector(t.traversal_order),
259 builder_.CreateVector(t.block_map),
260 builder_.CreateVector(fb_dim_metadata));
261
262 int buffer_id = 0;
263 if (!data.empty()) {
264 // Initialize buffers list with empty buffer to allow for non-const
265 // tensors.
266 if (buffers_.empty()) {
267 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
268 }
269
270 // Add compressed data as a Buffer to buffers list.
271 buffer_id = buffers_.size();
272 auto data_buffer = builder_.CreateVector(
273 reinterpret_cast<const uint8_t*>(sparse_data.data()),
274 sparse_data.size());
275 buffers_.push_back(CreateBuffer(builder_, data_buffer));
276 }
277
278 tensors_.push_back(CreateTensor(
279 builder_, builder_.CreateVector<int>(t.shape), t.type,
280 /*buffer=*/buffer_id,
281 /*name=*/0, /*quantization=*/0, /*is_variable=*/false, s_param));
282
283 inputs_.push_back(id);
284 tensor_data_[id] = t;
285
286 return id;
287 }
288
289 // Add a constant sparse tensor as input.
290 template <typename T>
291 int AddConstSparseInput(const TensorData& t, const std::vector<T>& data,
292 bool symmetric_quantize = false) {
293 int id = tensors_.size();
294 const int dims_count = t.traversal_order.size();
295 std::vector<T> dense_data(data);
296
297 tflite::optimize::sparsity::FormatConverter<T> converter(
298 t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
299 converter.DenseToSparse(dense_data.data());
300
301 const auto dim_metadata = converter.GetDimMetadata();
302 const auto sparse_data = converter.GetData();
303
304 // Build sparsity parameter.
305 std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
306 dims_count);
307 for (int i = 0; i < dims_count; i++) {
308 const int metadata_idx = 2 * i;
309 if (i < t.shape.size() &&
310 t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
311 auto array_segments =
312 CreateInt32Vector(builder_,
313 builder_.CreateVector(dim_metadata[metadata_idx]))
314 .Union();
315 auto array_indices =
316 CreateInt32Vector(
317 builder_, builder_.CreateVector(dim_metadata[metadata_idx + 1]))
318 .Union();
319 fb_dim_metadata[i] = CreateDimensionMetadata(
320 builder_, DimensionType_SPARSE_CSR, 0,
321 SparseIndexVector_Int32Vector, array_segments,
322 SparseIndexVector_Int32Vector, array_indices);
323 } else {
324 fb_dim_metadata[i] = CreateDimensionMetadata(
325 builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
326 }
327 }
328
329 flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
330 builder_, builder_.CreateVector(t.traversal_order),
331 builder_.CreateVector(t.block_map),
332 builder_.CreateVector(fb_dim_metadata));
333
334 flatbuffers::Offset<QuantizationParameters> q_params = 0;
335 int buffer_id = 0;
336 if (!data.empty()) {
337 // Initialize buffers list with empty buffer to allow for non-const
338 // tensors.
339 if (buffers_.empty()) {
340 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
341 }
342
343 // Add compressed data as a Buffer to buffers list.
344 buffer_id = buffers_.size();
345 if (symmetric_quantize) {
346 const int length = sparse_data.size();
347 std::vector<int8_t> q(length);
348 float min, max, scaling_factor;
349 tensor_utils::SymmetricQuantizeFloats(
350 sparse_data.data(), length, q.data(), &min, &max, &scaling_factor);
351 q_params = CreateQuantizationParameters(
352 builder_, 0, 0, builder_.CreateVector<float>({scaling_factor}),
353 builder_.CreateVector<int64_t>({0}));
354 auto data_buffer = builder_.CreateVector(
355 reinterpret_cast<const uint8_t*>(q.data()), q.size());
356 buffers_.push_back(CreateBuffer(builder_, data_buffer));
357 } else {
358 auto data_buffer = builder_.CreateVector(
359 reinterpret_cast<const uint8_t*>(sparse_data.data()),
360 sizeof(T) * sparse_data.size());
361 buffers_.push_back(CreateBuffer(builder_, data_buffer));
362 }
363 }
364
365 tensors_.push_back(
366 CreateTensor(builder_, builder_.CreateVector<int>(t.shape),
367 symmetric_quantize ? TensorType_INT8 : t.type,
368 /*buffer=*/buffer_id,
369 /*name=*/0, q_params, /*is_variable=*/false, s_param));
370
371 inputs_.push_back(id);
372 tensor_data_[id] = t;
373
374 return id;
375 }
376
377 // Add a null input tensor (optional input) and return kTfLiteOptionalTensor.
378 int AddNullInput();
379
380 // Add a TensorType output tensor and return its index.
381 int AddOutput(const TensorData& t);
382
383 template <typename T>
QuantizeAndPopulate(int index,const std::vector<float> & data)384 void QuantizeAndPopulate(int index, const std::vector<float>& data) {
385 TfLiteTensor* t = interpreter_->tensor(index);
386 auto q = Quantize<T>(data, t->params.scale, t->params.zero_point);
387 PopulateTensor(index, 0, q.data(), q.data() + q.size());
388 }
389
SymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)390 void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
391 std::vector<int8_t> q = QuantizeTensor(index, data);
392 PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
393 reinterpret_cast<uint8_t*>(q.data() + q.size()));
394 }
395
SignedSymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)396 void SignedSymmetricQuantizeAndPopulate(int index,
397 const std::vector<float>& data) {
398 std::vector<int8_t> q = QuantizeTensor(index, data);
399 PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
400 }
401
402 // Quantize and populate data for filter with per channel quantization.
PerChannelSymmetricQuantizeAndPopulate(int index,const std::vector<float> & input_data)403 void PerChannelSymmetricQuantizeAndPopulate(
404 int index, const std::vector<float>& input_data) {
405 TfLiteTensor* t = interpreter_->tensor(index);
406 auto* params =
407 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
408 const int channel_index = params->quantized_dimension;
409
410 std::vector<int32_t> shape(t->dims->size);
411 for (size_t i = 0; i < shape.size(); ++i) {
412 shape[i] = t->dims->data[i];
413 }
414 const int32_t num_inputs = input_data.size();
415 const int32_t num_channel = shape[channel_index];
416 std::vector<int8_t> quantized_output(num_inputs);
417 std::vector<float> scales_inv(num_channel);
418 for (int i = 0; i < num_channel; ++i) {
419 const float scale = params->scale->size == 1 ? params->scale->data[0]
420 : params->scale->data[i];
421 scales_inv[i] = 1.0f / scale;
422 }
423 optimize::utils::SymmetricPerChannelQuantizeValues(
424 input_data.data(), scales_inv, shape, channel_index, &quantized_output);
425
426 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
427 quantized_output.data() + quantized_output.size());
428 }
429
430 template <typename T>
PerChannelQuantizeBiasPopulateTensor(const std::vector<float> & input_data,int index,TfLiteAffineQuantization * params)431 void PerChannelQuantizeBiasPopulateTensor(
432 const std::vector<float>& input_data, int index,
433 TfLiteAffineQuantization* params) {
434 const int32_t num_inputs = input_data.size();
435 std::vector<T> quantized_output(num_inputs);
436 for (int i = 0; i < num_inputs; ++i) {
437 const float scale = params->scale->size == 1 ? params->scale->data[0]
438 : params->scale->data[i];
439 quantized_output[i] = input_data[i] / scale;
440 }
441 }
442
443 template <typename T>
PerChannelQuantizeBiasPopulateTensor(int index,const std::vector<float> & input_data,const TfLiteAffineQuantization * params)444 void PerChannelQuantizeBiasPopulateTensor(
445 int index, const std::vector<float>& input_data,
446 const TfLiteAffineQuantization* params) {
447 const int32_t num_inputs = input_data.size();
448 std::vector<T> quantized_output(num_inputs);
449 for (int i = 0; i < num_inputs; ++i) {
450 const float scale = params->scale->size == 1 ? params->scale->data[0]
451 : params->scale->data[i];
452 quantized_output[i] = input_data[i] / scale;
453 }
454 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
455 quantized_output.data() + quantized_output.size());
456 }
457
458 // Quantize and populate data for bias with per channel quantization.
PerChannelQuantizeBias(int index,const std::vector<float> & input_data)459 void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
460 TfLiteTensor* t = interpreter_->tensor(index);
461 auto* params =
462 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
463 CHECK(t->type == kTfLiteInt32 || t->type == kTfLiteInt64);
464 if (t->type == kTfLiteInt32) {
465 PerChannelQuantizeBiasPopulateTensor<int32_t>(index, input_data, params);
466 } else {
467 PerChannelQuantizeBiasPopulateTensor<int64_t>(index, input_data, params);
468 }
469 }
470
GetShape(int id)471 const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
472
GetScale(int id)473 float GetScale(int id) { return tensor_data_.at(id).scale; }
GetZeroPoint(int id)474 int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; }
475
476 // Define the operator in this model.
477 void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
478 flatbuffers::Offset<void> builtin_options);
479 void SetCustomOp(const string& name,
480 const std::vector<uint8_t>& custom_option,
481 const std::function<TfLiteRegistration*()>& registration);
482
483 // Allocate tensors and apply delegate.
484 // Note that this is called by default in BuiltInterpreter().
485 void AllocateAndDelegate(bool apply_delegate);
486
487 // Build the interpreter for this model. Also, resize and allocate all
488 // tensors given the shapes of the inputs.
489 // Note, if `allocate_and_delegate` is `false`, then the value of
490 // `apply_delegate` is ignored.
491 void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
492 int num_threads, bool allow_fp32_relax_to_fp16,
493 bool apply_delegate, bool allocate_and_delegate = true);
494
495 void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
496
497 // Executes inference, asserting success.
498 void Invoke();
499
500 // Executes inference *without* asserting success.
501 TfLiteStatus InvokeUnchecked();
502
PopulateStringTensor(int index,const std::vector<string> & content)503 void PopulateStringTensor(int index, const std::vector<string>& content) {
504 auto tensor = interpreter_->tensor(index);
505 DynamicBuffer buf;
506 for (const string& s : content) {
507 buf.AddString(s.data(), s.length());
508 }
509 buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
510 }
511
512 // Populate the tensor given its index.
513 // TODO(b/110696148) clean up and merge with vector-taking variant below.
514 template <typename T>
PopulateTensor(int index,const std::initializer_list<T> & data)515 void PopulateTensor(int index, const std::initializer_list<T>& data) {
516 T* v = interpreter_->typed_tensor<T>(index);
517 if (!v) {
518 auto* t = interpreter_->tensor(index);
519 CHECK(t) << "No tensor with index " << index << ".";
520 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
521 CHECK_EQ(t->type, typeToTfLiteType<T>())
522 << "Type mismatch for tensor with index " << index << ". Requested "
523 << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
524 << TfLiteTypeGetName(t->type) << ".";
525 LOG(FATAL) << "Unknown tensor error.";
526 }
527 for (const T& f : data) {
528 *v = f;
529 ++v;
530 }
531 }
532
533 // Populate the tensor given its index.
534 // TODO(b/110696148) clean up and merge with initializer_list-taking variant
535 // above.
536 template <typename T>
PopulateTensor(int index,const std::vector<T> & data)537 void PopulateTensor(int index, const std::vector<T>& data) {
538 T* v = interpreter_->typed_tensor<T>(index);
539 if (!v) {
540 auto* t = interpreter_->tensor(index);
541 CHECK(t) << "No tensor with index " << index << ".";
542 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
543 CHECK_EQ(t->type, typeToTfLiteType<T>())
544 << "Type mismatch for tensor with index " << index << ". Requested "
545 << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
546 << TfLiteTypeGetName(t->type) << ".";
547 LOG(FATAL) << "Unknown tensor error.";
548 }
549 for (const T& f : data) {
550 *v = f;
551 ++v;
552 }
553 }
554
555 // Partially populate the tensor, starting at the given offset.
556 template <typename T>
PopulateTensor(int index,int offset,T * begin,T * end)557 void PopulateTensor(int index, int offset, T* begin, T* end) {
558 T* v = interpreter_->typed_tensor<T>(index);
559 if (!v) {
560 auto* t = interpreter_->tensor(index);
561 CHECK(t) << "No tensor with index " << index << ".";
562 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
563 CHECK(v) << "Type mismatch for tensor with index " << index
564 << ". Requested " << typeToTfLiteType<T>() << ", got "
565 << t->type;
566 }
567 memcpy(v + offset, begin, (end - begin) * sizeof(T));
568 }
569
570 // Return a vector with the flattened contents of a tensor.
571 template <typename T>
ExtractVector(int index)572 std::vector<T> ExtractVector(int index) const {
573 const T* v = interpreter_->typed_tensor<T>(index);
574 const auto* tensor = interpreter_->tensor(index);
575 CHECK(v) << "Could not extract vector at index: " << index;
576 int tensor_size;
577 if (tensor->sparsity) {
578 // Getting the size of the sparse buffer this way is based on the
579 // assumption that the last dimension of the tensor is a compressed
580 // dimension.
581 tensor_size = tensor->sparsity
582 ->dim_metadata[tensor->sparsity->dim_metadata_size - 1]
583 .array_indices->size;
584 } else {
585 tensor_size = GetTensorSize(index);
586 }
587
588 return std::vector<T>(v, v + tensor_size);
589 }
590
591 // Return the TFLite model buffer, only available after BuildInterpreter.
GetModelBuffer()592 const uint8_t* GetModelBuffer() { return builder_.GetBufferPointer(); }
593
GetTensorShape(int index)594 std::vector<int> GetTensorShape(int index) {
595 std::vector<int> result;
596 TfLiteTensor* t = interpreter_->tensor(index);
597 result.reserve(t->dims->size);
598 for (int i = 0; i < t->dims->size; ++i) {
599 result.push_back(t->dims->data[i]);
600 }
601 return result;
602 }
603
SetNumThreads(int num_threads)604 void SetNumThreads(int num_threads) {
605 CHECK(interpreter_ != nullptr);
606 interpreter_->SetNumThreads(num_threads);
607 }
608
SetResolver(std::unique_ptr<OpResolver> resolver)609 void SetResolver(std::unique_ptr<OpResolver> resolver) {
610 resolver_ = std::move(resolver);
611 }
612
613 // Indicate whether the test has the NNAPI delegate applied.
614 static bool GetForceUseNnapi();
615 int CountOpsExecutedByCpuKernel();
616
617 protected:
618 int32_t GetTensorSize(int index) const;
619
620 flatbuffers::FlatBufferBuilder builder_;
621 std::unique_ptr<tflite::Interpreter> interpreter_;
622 std::unique_ptr<OpResolver> resolver_;
623
624 std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
625 std::vector<flatbuffers::Offset<Operator>> operators_;
626 std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
627
628 template <typename T>
629 int AddTensor(TensorData t, std::initializer_list<T> data,
630 bool is_variable = false) {
631 int id = tensors_.size();
632
633 // This is slightly different depending on whether we are adding a
634 // quantized or a regular tensor.
635 bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
636
637 flatbuffers::Offset<QuantizationParameters> q_params = 0;
638
639 if (is_quantized) {
640 if (t.min != 0 || t.max != 0) {
641 if (t.type == TensorType_UINT8) {
642 std::tie(t.scale, t.zero_point) =
643 QuantizationParams<uint8_t>(t.min, t.max);
644 } else if (t.type == TensorType_INT8) {
645 std::tie(t.scale, t.zero_point) =
646 QuantizationParams<int8_t>(t.min, t.max);
647 } else if (t.type == TensorType_INT32) {
648 std::tie(t.scale, t.zero_point) =
649 QuantizationParams<int32_t>(t.min, t.max);
650 } else if (t.type == TensorType_INT16) {
651 std::tie(t.scale, t.zero_point) =
652 QuantizationParams<int16_t>(t.min, t.max);
653 } else {
654 LOG(FATAL) << "No support for the requested quantized type";
655 }
656 t.min = 0;
657 t.max = 0;
658 }
659
660 q_params = CreateQuantizationParameters(
661 builder_, /*min=*/0, /*max=*/0,
662 builder_.CreateVector<float>({t.scale}),
663 builder_.CreateVector<int64_t>({t.zero_point}));
664 }
665
666 int buffer_id = 0;
667 if (data.size()) {
668 // Initialize buffers list with empty buffer to allow for non-const
669 // tensors.
670 if (buffers_.empty()) {
671 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
672 }
673
674 // Add data as a Buffer to buffers list.
675 buffer_id = buffers_.size();
676 auto data_buffer =
677 builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
678 sizeof(T) * data.size());
679 buffers_.push_back(CreateBuffer(builder_, data_buffer));
680 }
681
682 tensors_.push_back(CreateTensor(
683 builder_, builder_.CreateVector<int>(t.shape), t.type,
684 /*buffer=*/buffer_id,
685 /*name=*/0, q_params, is_variable,
686 /*sparsity=*/0, builder_.CreateVector<int>(t.shape_signature)));
687
688 tensor_data_[id] = t;
689
690 return id;
691 }
692
693 private:
694 template <typename T>
QuantizationParams(float f_min,float f_max)695 std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
696 int32_t zero_point = 0;
697 float scale = 0;
698 const T qmin = std::numeric_limits<T>::min();
699 const T qmax = std::numeric_limits<T>::max();
700 const float qmin_double = qmin;
701 const float qmax_double = qmax;
702 // 0 should always be a representable value. Let's assume that the initial
703 // min,max range contains 0.
704 CHECK_LE(f_min, 0);
705 CHECK_GE(f_max, 0);
706 if (f_min == f_max) {
707 // Special case where the min,max range is a point. Should be {0}.
708 CHECK_EQ(f_min, 0);
709 CHECK_EQ(f_max, 0);
710 return {scale, zero_point};
711 }
712
713 // General case.
714 //
715 // First determine the scale.
716 scale = (f_max - f_min) / (qmax_double - qmin_double);
717
718 // Zero-point computation.
719 // First the initial floating-point computation. The zero-point can be
720 // determined from solving an affine equation for any known pair
721 // (real value, corresponding quantized value).
722 // We know two such pairs: (rmin, qmin) and (rmax, qmax).
723 // The arithmetic error on the zero point computed from either pair
724 // will be roughly machine_epsilon * (sum of absolute values of terms)
725 // so we want to use the variant that adds the smaller terms.
726 const float zero_point_from_min = qmin_double - f_min / scale;
727 const float zero_point_from_max = qmax_double - f_max / scale;
728
729 const float zero_point_from_min_error =
730 std::abs(qmin_double) + std::abs(f_min / scale);
731
732 const float zero_point_from_max_error =
733 std::abs(qmax_double) + std::abs(f_max / scale);
734
735 const float zero_point_double =
736 zero_point_from_min_error < zero_point_from_max_error
737 ? zero_point_from_min
738 : zero_point_from_max;
739
740 // Now we need to nudge the zero point to be an integer
741 // (our zero points are integer, and this is motivated by the requirement
742 // to be able to represent the real value "0" exactly as a quantized value,
743 // which is required in multiple places, for example in Im2col with SAME
744 // padding).
745
746 T nudged_zero_point = 0;
747 if (zero_point_double < qmin_double) {
748 nudged_zero_point = qmin;
749 } else if (zero_point_double > qmax_double) {
750 nudged_zero_point = qmax;
751 } else {
752 nudged_zero_point = static_cast<T>(std::round(zero_point_double));
753 }
754
755 // The zero point should always be in the range of quantized value,
756 // // [qmin, qmax].
757 CHECK_GE(nudged_zero_point, qmin);
758 CHECK_LE(nudged_zero_point, qmax);
759
760 zero_point = nudged_zero_point;
761 // finally, return the values
762 return {scale, zero_point};
763 }
764
AddTensorPerChannelQuant(const TensorData & t)765 int AddTensorPerChannelQuant(const TensorData& t) {
766 // type does not matter when adding empty data.
767 return AddTensorPerChannelQuant<uint8_t>(t, {});
768 }
769
770 template <typename T>
AddTensorPerChannelQuant(const TensorData & t,const std::initializer_list<T> & data)771 int AddTensorPerChannelQuant(const TensorData& t,
772 const std::initializer_list<T>& data) {
773 const int id = tensors_.size();
774 flatbuffers::Offset<QuantizationParameters> q_params = 0;
775 q_params = CreateQuantizationParameters(
776 builder_, /*min=*/0, /*max=*/0,
777 /*scale=*/
778 builder_.CreateVector<float>(t.per_channel_quantization_scales),
779 /*zero point=*/
780 builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
781 QuantizationDetails_NONE, 0, t.channel_index);
782
783 int buffer_id = 0;
784 if (data.size()) {
785 // Initialize buffers list with empty buffer to allow for non-const
786 // tensors.
787 if (buffers_.empty()) {
788 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
789 }
790
791 // Add data as a Buffer to buffers list.
792 buffer_id = buffers_.size();
793 auto data_buffer =
794 builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
795 sizeof(T) * data.size());
796 buffers_.push_back(CreateBuffer(builder_, data_buffer));
797 }
798
799 tensors_.push_back(
800 CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
801 /*buffer=*/buffer_id,
802 /*name=*/0, q_params, /*is_variable=*/false));
803 tensor_data_[id] = t;
804 return id;
805 }
806
QuantizeTensor(int index,const std::vector<float> & data)807 std::vector<int8_t> QuantizeTensor(int index,
808 const std::vector<float>& data) {
809 TfLiteTensor* t = interpreter_->tensor(index);
810 const int length = data.size();
811 std::vector<int8_t> q(length);
812 float min, max, scaling_factor;
813 tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
814 &max, &scaling_factor);
815 // Update quantization params.
816 t->params.scale = scaling_factor;
817 t->params.zero_point = 0;
818 // Populate the new quantization params.
819 TfLiteQuantizationFree(&t->quantization);
820 t->quantization.type = kTfLiteAffineQuantization;
821 auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
822 malloc(sizeof(TfLiteAffineQuantization)));
823 affine_quantization->quantized_dimension = 0;
824 affine_quantization->scale = TfLiteFloatArrayCreate(1);
825 affine_quantization->zero_point = TfLiteIntArrayCreate(1);
826 affine_quantization->scale->data[0] = scaling_factor;
827 affine_quantization->zero_point->data[0] = 0;
828 t->quantization.params = affine_quantization;
829 return q;
830 }
831
832 // Checks if acceleration has been done as expected.
833 // Currently supports only NNAPI.
834 // It verifies if the test was configured to run with NNAPI acceleration
835 // or not (SetForceUseNnapi(true)).
836 // In affirmative case it checks if:
837 // - the test case has been listed in the list of nnapi-accelerated cases
838 // - the test is running on a device (NNAPI has been loaded)
839 //
840 // The list of nnapi-accelerated test cases is a file containing regex to
841 // include or exclude specific test cases plus the minimum android SDK version
842 // the acceleration should be enabled for. For example:
843 // To enable the test BorderFloat in TopKV2OpTest only from
844 // android_sdk_version 29:
845 //
846 // TopKV2OpTest/BorderFloat,29
847 //
848 // And to have it always excluded while enabling all other Float tests
849 // (the order of the rules is important, the first one matching is used):
850 //
851 // -TopKV2OpTest/BorderFloat
852 // TopKV2OpTest/.+Float
853
854 void ValidateAcceleration();
855
856 // If the test was configured to use NNAPI and NNAPI was actually loaded,
857 // checks if the single operation in the model has been accelerated.
858 void ExpectOpAcceleratedWithNnapi(const std::string& test_id);
859
860 std::map<int, TensorData> tensor_data_;
861 std::vector<int32_t> inputs_;
862 std::vector<int32_t> intermediates_;
863 std::vector<int32_t> outputs_;
864 std::vector<flatbuffers::Offset<Tensor>> tensors_;
865 std::vector<flatbuffers::Offset<Buffer>> buffers_;
866 TfLiteDelegate* delegate_ = nullptr; // not own the memory.
867 int num_applied_delegates_ = 0;
868 };
869
870 // Populate string tensors.
871 template <>
872 inline void SingleOpModel::PopulateTensor<string>(
873 int index, const std::initializer_list<string>& data) {
874 PopulateStringTensor(index, data);
875 }
876
877 // Base class for single op unit tests.
878 // The tests are parameterized to test multiple kernels for a single op.
879 // The parameters are strings like "optimized" and "reference" to have better
880 // readability in test reports.
881 //
882 // To use this class:
883 // * Define a constant map from strings to TfLiteRegistration.
884 // * Implement a test class that inherits SingleOpTest.
885 // * Instantiate the test cases with SingleOpTest::GetKernelTags helper
886 // function.
887 // * Call GetRegistration to get the TfLiteRegistration to be used before
888 // building the interpreter.
889 class SingleOpTest : public ::testing::TestWithParam<string> {
890 public:
GetKernelTags(const std::map<string,TfLiteRegistration * > & kernel_map)891 static std::vector<string> GetKernelTags(
892 const std::map<string, TfLiteRegistration*>& kernel_map) {
893 std::vector<string> tags;
894 tags.reserve(kernel_map.size());
895 for (const auto& it : kernel_map) {
896 tags.push_back(it.first);
897 }
898 return tags;
899 }
900
901 protected:
902 virtual const std::map<string, TfLiteRegistration*>& GetKernelMap() = 0;
GetRegistration()903 TfLiteRegistration* GetRegistration() {
904 return GetKernelMap().at(GetParam());
905 }
906 };
907
908 // Returns the corresponding TensorType given the type T.
909 template <typename T>
GetTensorType()910 TensorType GetTensorType() {
911 if (std::is_same<T, float>::value) return TensorType_FLOAT32;
912 if (std::is_same<T, TfLiteFloat16>::value) return TensorType_FLOAT16;
913 if (std::is_same<T, double>::value) return TensorType_FLOAT64;
914 if (std::is_same<T, int8_t>::value) return TensorType_INT8;
915 if (std::is_same<T, int16_t>::value) return TensorType_INT16;
916 if (std::is_same<T, int32_t>::value) return TensorType_INT32;
917 if (std::is_same<T, uint32_t>::value) return TensorType_UINT32;
918 if (std::is_same<T, int64_t>::value) return TensorType_INT64;
919 if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
920 if (std::is_same<T, string>::value) return TensorType_STRING;
921 if (std::is_same<T, bool>::value) return TensorType_BOOL;
922 return TensorType_MIN; // default value
923 }
924
925 // Strings have a special implementation that is in test_util.cc
926 template <>
927 std::vector<string> SingleOpModel::ExtractVector(int index) const;
928
929 // The TypeUnion struct specializations hold a collection of related types.
930 // Each struct holds: 1. a primitive type (e.g. float), 2. a TensorType (e.g.
931 // TensorType_FLOAT32, and 3. a TfLiteType (e.g. kTfLiteFloat32). The latter
932 // two are actually enum values and not raw types, but these specializations
933 // make it easy to use gUnit Typed Test Suite:
934 // https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#typed-tests
935 template <typename T>
936 struct TypeUnion;
937
938 template <>
939 struct TypeUnion<float> {
940 public:
941 // NOLINTNEXTLINE
942 static constexpr TensorType tensor_type = TensorType::TensorType_FLOAT32;
943 // NOLINTNEXTLINE
944 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteFloat32;
945 typedef float ScalarType;
946 };
947
948 template <>
949 struct TypeUnion<int32_t> {
950 public:
951 // NOLINTNEXTLINE
952 static constexpr TensorType tensor_type = TensorType::TensorType_INT32;
953 // NOLINTNEXTLINE
954 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt32;
955 typedef int32_t ScalarType;
956 };
957
958 template <>
959 struct TypeUnion<uint32_t> {
960 public:
961 // NOLINTNEXTLINE
962 static constexpr TensorType tensor_type = TensorType::TensorType_UINT32;
963 // NOLINTNEXTLINE
964 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt32;
965 typedef uint32_t ScalarType;
966 };
967
968 template <>
969 struct TypeUnion<int16_t> {
970 public:
971 // NOLINTNEXTLINE
972 static constexpr TensorType tensor_type = TensorType::TensorType_INT16;
973 // NOLINTNEXTLINE
974 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt16;
975 typedef int16_t ScalarType;
976 };
977
978 template <>
979 struct TypeUnion<int8_t> {
980 public:
981 // NOLINTNEXTLINE
982 static constexpr TensorType tensor_type = TensorType::TensorType_INT8;
983 // NOLINTNEXTLINE
984 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteInt8;
985 typedef int8_t ScalarType;
986 };
987
988 template <>
989 struct TypeUnion<uint8_t> {
990 public:
991 // NOLINTNEXTLINE
992 static constexpr TensorType tensor_type = TensorType::TensorType_UINT8;
993 // NOLINTNEXTLINE
994 static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteUInt8;
995 typedef uint8_t ScalarType;
996 };
997
998 class MultiOpModel : public SingleOpModel {
999 public:
1000 MultiOpModel() : SingleOpModel() {}
1001 ~MultiOpModel() {}
1002
1003 void AddBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
1004 const flatbuffers::Offset<void>& builtin_options,
1005 const std::vector<int32_t>& inputs,
1006 const std::vector<int32_t>& outputs);
1007
1008 void AddCustomOp(const string& name,
1009 const std::vector<uint8_t>& custom_option,
1010 const std::function<TfLiteRegistration*()>& registration,
1011 const std::vector<int32_t>& inputs,
1012 const std::vector<int32_t>& outputs);
1013
1014 template <typename T>
1015 int AddInnerTensor(TensorData t) {
1016 return AddTensor<T>(t, {}, false);
1017 }
1018 };
1019 } // namespace tflite
1020
1021 #endif // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
1022