1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
16 #define TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
17
18 #include <cmath>
19 #include <complex>
20 #include <vector>
21
22 #include <gmock/gmock.h>
23 #include <gtest/gtest.h>
24 #include "tensorflow/core/platform/logging.h"
25 #include "tensorflow/lite/c/common.h"
26 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h"
27 #include "tensorflow/lite/interpreter.h"
28 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
29 #include "tensorflow/lite/kernels/register.h"
30 #include "tensorflow/lite/model.h"
31 #include "tensorflow/lite/schema/schema_generated.h"
32 #include "tensorflow/lite/string_util.h"
33 #include "tensorflow/lite/testing/util.h"
34 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
35 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
36
37 namespace tflite {
38
39 // A gmock matcher that check that elements of a float vector match to a given
40 // tolerance.
41 std::vector<::testing::Matcher<float>> ArrayFloatNear(
42 const std::vector<float>& values, float max_abs_error = 1e-5);
43
44 // A gmock matcher that check that elements of a complex vector match to a given
45 // tolerance.
46 std::vector<::testing::Matcher<std::complex<float>>> ArrayComplex64Near(
47 const std::vector<std::complex<float>>& values, float max_abs_error = 1e-5);
48
49 template <typename T>
Quantize(const std::vector<float> & data,float scale,int32_t zero_point)50 inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
51 int32_t zero_point) {
52 std::vector<T> q;
53 for (const auto& f : data) {
54 q.push_back(static_cast<T>(std::max<float>(
55 std::numeric_limits<T>::min(),
56 std::min<float>(std::numeric_limits<T>::max(),
57 std::round(zero_point + (f / scale))))));
58 }
59 return q;
60 }
61
62 template <typename T>
Dequantize(const std::vector<T> & data,float scale,int32_t zero_point)63 inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
64 int32_t zero_point) {
65 std::vector<float> f;
66 f.reserve(data.size());
67 for (const T& q : data) {
68 f.push_back(scale * (q - zero_point));
69 }
70 return f;
71 }
72
73 // A test model that contains a single operator. All operator inputs and
74 // output are external to the model, so the tests can directly access them.
75 // Typical usage:
76 // SingleOpModel m;
77 // int a = m.AddInput({TensorType_FLOAT32, a_shape});
78 // int b = m.AddInput({TensorType_FLOAT32, b_shape});
79 // int c = m.AddOutput({TensorType_FLOAT32, {}});
80 // m.SetBuiltinOp(...);
81 // m.BuildInterpreter({GetShape(a), GetShape(b)});
82 // m.PopulateTensor(a, {...});
83 // m.PopulateTensor(b, {...});
84 // m.Invoke();
85 // EXPECT_THAT(m.ExtractVector<float>(c), ArrayFloatNear({...}));
86 //
87
88 // A helper struct to construct test tensors. This is particularly useful for
89 // quantized tensor which must have their scale and zero_point defined before
90 // the actual data is known. This mimics what happens in practice: quantization
91 // parameters are calculated during training or post training..
92 struct TensorData {
93 TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape = {},
94 float min = 0.0f, float max = 0.0f, float scale = 0.0f,
95 int32_t zero_point = 0, bool per_channel_quantization = false,
96 std::vector<float> per_channel_quantization_scales = {},
97 std::vector<int64_t> per_channel_quantization_offsets = {},
98 int32_t channel_index = 0)
typeTensorData99 : type(type),
100 shape(shape),
101 min(min),
102 max(max),
103 scale(scale),
104 zero_point(zero_point),
105 per_channel_quantization(per_channel_quantization),
106 per_channel_quantization_scales(
107 std::move(per_channel_quantization_scales)),
108 per_channel_quantization_offsets(
109 std::move(per_channel_quantization_offsets)),
110 channel_index(channel_index) {}
111 TensorType type;
112 std::vector<int> shape;
113 float min;
114 float max;
115 float scale;
116 int32_t zero_point;
117 bool per_channel_quantization;
118 std::vector<float> per_channel_quantization_scales;
119 std::vector<int64_t> per_channel_quantization_offsets;
120 int32_t channel_index;
121 };
122
123 class SingleOpResolver : public OpResolver {
124 public:
125 SingleOpResolver(const BuiltinOperator op, TfLiteRegistration* registration,
126 int version = 1)
op_(op)127 : op_(op), registration_(*registration) {
128 registration_.builtin_code = static_cast<int32_t>(op);
129 registration_.version = version;
130 }
FindOp(BuiltinOperator op,int version)131 const TfLiteRegistration* FindOp(BuiltinOperator op,
132 int version) const override {
133 if (op == op_) {
134 return ®istration_;
135 }
136 return nullptr;
137 }
FindOp(const char * op,int version)138 const TfLiteRegistration* FindOp(const char* op, int version) const override {
139 return nullptr;
140 }
141
142 private:
143 const BuiltinOperator op_;
144 TfLiteRegistration registration_;
145 };
146
147 class SingleOpModel {
148 public:
SingleOpModel()149 SingleOpModel() {}
150 ~SingleOpModel();
151
152 // Set a function callback that is run right after graph is prepared
153 // that allows applying external delegates. This is useful for testing
154 // other runtimes like NN API or GPU.
SetApplyDelegate(std::function<void (Interpreter *)> apply_delegate_fn)155 void SetApplyDelegate(std::function<void(Interpreter*)> apply_delegate_fn) {
156 apply_delegate_fn_ = apply_delegate_fn;
157 }
158
159 void ApplyDelegate();
160
161 // Copying or assignment is disallowed to simplify ownership semantics.
162 SingleOpModel(const SingleOpModel&) = delete;
163 SingleOpModel& operator=(const SingleOpModel&) = delete;
164
165 // Add a TensorType input tensor and return its index.
166 int AddInput(TensorType type, bool is_variable = false) {
167 return AddInput(TensorData{type}, is_variable);
168 }
169 int AddInput(const TensorData& t, bool is_variable = false);
170
171 int AddIntermediate(TensorType type, const std::vector<float>& scale,
172 const std::vector<int64_t>& zero_point);
173
174 // Templated version of AddConstInput().
175 template <typename T>
AddConstInput(const TensorData & t,std::initializer_list<T> data)176 int AddConstInput(const TensorData& t, std::initializer_list<T> data) {
177 int id = 0;
178 if (t.per_channel_quantization) {
179 id = AddTensorPerChannelQuant(t, data);
180 } else {
181 id = AddTensor(t, data);
182 }
183 inputs_.push_back(id);
184 return id;
185 }
186 template <typename T>
AddConstInput(TensorType type,std::initializer_list<T> data,std::initializer_list<int> shape)187 int AddConstInput(TensorType type, std::initializer_list<T> data,
188 std::initializer_list<int> shape) {
189 return AddConstInput(TensorData{type, shape}, data);
190 }
191
192 // Add a constant sparse tensor as input. For unit test purpose, we choose to
193 // compress all dimensions and traverse them in the original order.
194 template <typename T>
AddConstSparseInput(TensorType type,std::initializer_list<int> shape,std::initializer_list<T> data)195 int AddConstSparseInput(TensorType type, std::initializer_list<int> shape,
196 std::initializer_list<T> data) {
197 return AddSparseTensor(TensorData{type, shape}, data);
198 }
199
200 // Add a null input tensor (optional input) and return kTfLiteOptionalTensor.
201 int AddNullInput();
202
203 // Add a TensorType output tensor and return its index.
AddOutput(TensorType type)204 int AddOutput(TensorType type) { return AddOutput(TensorData{type}); }
205 int AddOutput(const TensorData& t);
206
207 template <typename T>
QuantizeAndPopulate(int index,const std::vector<float> & data)208 void QuantizeAndPopulate(int index, const std::vector<float>& data) {
209 TfLiteTensor* t = interpreter_->tensor(index);
210 auto q = Quantize<T>(data, t->params.scale, t->params.zero_point);
211 PopulateTensor(index, 0, q.data(), q.data() + q.size());
212 }
213
SymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)214 void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
215 std::vector<int8_t> q = QuantizeTensor(index, data);
216 PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
217 reinterpret_cast<uint8_t*>(q.data() + q.size()));
218 }
219
SignedSymmetricQuantizeAndPopulate(int index,const std::vector<float> & data)220 void SignedSymmetricQuantizeAndPopulate(int index,
221 const std::vector<float>& data) {
222 std::vector<int8_t> q = QuantizeTensor(index, data);
223 PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
224 }
225
226 // Quantize and populate data for filter with per channel quantization.
PerChannelSymmetricQuantizeAndPopulate(int index,const std::vector<float> & input_data)227 void PerChannelSymmetricQuantizeAndPopulate(
228 int index, const std::vector<float>& input_data) {
229 TfLiteTensor* t = interpreter_->tensor(index);
230 auto* params =
231 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
232 const int channel_index = params->quantized_dimension;
233
234 std::vector<int32_t> shape(t->dims->size);
235 for (size_t i = 0; i < shape.size(); ++i) {
236 shape[i] = t->dims->data[i];
237 }
238 const int32_t num_inputs = input_data.size();
239 const int32_t num_channel = shape[channel_index];
240 std::vector<int8_t> quantized_output(num_inputs);
241 std::vector<float> scales_inv(num_channel);
242 for (int i = 0; i < num_channel; ++i) {
243 const float scale = params->scale->size == 1 ? params->scale->data[0]
244 : params->scale->data[i];
245 scales_inv[i] = 1.0f / scale;
246 }
247 optimize::utils::SymmetricPerChannelQuantizeValues(
248 input_data.data(), scales_inv, shape, channel_index, &quantized_output);
249
250 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
251 quantized_output.data() + quantized_output.size());
252 }
253
254 // Quantize and populate data for bias with per channel quantization.
PerChannelQuantizeBias(int index,const std::vector<float> & input_data)255 void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
256 const int32_t num_inputs = input_data.size();
257 std::vector<int32_t> quantized_output(num_inputs);
258 TfLiteTensor* t = interpreter_->tensor(index);
259 auto* params =
260 reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
261 CHECK(t->type == kTfLiteInt32 || t->type == kTfLiteInt64);
262 if (t->type == kTfLiteInt32) {
263 std::vector<int32_t> quantized_output(num_inputs);
264 for (int i = 0; i < num_inputs; ++i) {
265 const float scale = params->scale->size == 1 ? params->scale->data[0]
266 : params->scale->data[i];
267 quantized_output[i] = input_data[i] / scale;
268 }
269 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
270 quantized_output.data() + quantized_output.size());
271 } else {
272 std::vector<int64_t> quantized_output(num_inputs);
273 for (int i = 0; i < num_inputs; ++i) {
274 const float scale = params->scale->size == 1 ? params->scale->data[0]
275 : params->scale->data[i];
276 quantized_output[i] = input_data[i] / scale;
277 }
278 PopulateTensor(index, /*offset=*/0, quantized_output.data(),
279 quantized_output.data() + quantized_output.size());
280 }
281 }
282
GetShape(int id)283 const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
284
GetScale(int id)285 float GetScale(int id) { return tensor_data_.at(id).scale; }
GetZeroPoint(int id)286 int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; }
287
288 // Define the operator in this model.
289 void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
290 flatbuffers::Offset<void> builtin_options);
291 void SetCustomOp(const string& name,
292 const std::vector<uint8_t>& custom_option,
293 const std::function<TfLiteRegistration*()>& registration);
294
295 // Build the interpreter for this model. Also, resize and allocate all
296 // tensors given the shapes of the inputs.
297 void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
298 int num_threads, bool allow_fp32_relax_to_fp16,
299 bool apply_delegate = true);
300
301 void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
302 int num_threads);
303
304 void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
305 bool allow_fp32_relax_to_fp16, bool apply_delegate);
306
307 void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
308
309 // Executes inference, asserting success.
310 void Invoke();
311
312 // Executes inference *without* asserting success.
313 TfLiteStatus InvokeUnchecked();
314
PopulateStringTensor(int index,const std::vector<string> & content)315 void PopulateStringTensor(int index, const std::vector<string>& content) {
316 auto tensor = interpreter_->tensor(index);
317 DynamicBuffer buf;
318 for (const string& s : content) {
319 buf.AddString(s.data(), s.length());
320 }
321 buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
322 }
323
324 // Populate the tensor given its index.
325 // TODO(b/110696148) clean up and merge with vector-taking variant below.
326 template <typename T>
PopulateTensor(int index,const std::initializer_list<T> & data)327 void PopulateTensor(int index, const std::initializer_list<T>& data) {
328 T* v = interpreter_->typed_tensor<T>(index);
329 if (!v) {
330 auto* t = interpreter_->tensor(index);
331 CHECK(t) << "No tensor with index " << index << ".";
332 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
333 CHECK_EQ(t->type, typeToTfLiteType<T>())
334 << "Type mismatch for tensor with index " << index << ". Requested "
335 << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
336 << TfLiteTypeGetName(t->type) << ".";
337 LOG(FATAL) << "Unknown tensor error.";
338 }
339 for (const T& f : data) {
340 *v = f;
341 ++v;
342 }
343 }
344
345 // Populate the tensor given its index.
346 // TODO(b/110696148) clean up and merge with initializer_list-taking variant
347 // above.
348 template <typename T>
PopulateTensor(int index,const std::vector<T> & data)349 void PopulateTensor(int index, const std::vector<T>& data) {
350 T* v = interpreter_->typed_tensor<T>(index);
351 if (!v) {
352 auto* t = interpreter_->tensor(index);
353 CHECK(t) << "No tensor with index " << index << ".";
354 CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
355 CHECK_EQ(t->type, typeToTfLiteType<T>())
356 << "Type mismatch for tensor with index " << index << ". Requested "
357 << TfLiteTypeGetName(typeToTfLiteType<T>()) << ", got "
358 << TfLiteTypeGetName(t->type) << ".";
359 LOG(FATAL) << "Unknown tensor error.";
360 }
361 for (const T& f : data) {
362 *v = f;
363 ++v;
364 }
365 }
366
367 // Partially populate the tensor, starting at the given offset.
368 template <typename T>
PopulateTensor(int index,int offset,T * begin,T * end)369 void PopulateTensor(int index, int offset, T* begin, T* end) {
370 T* v = interpreter_->typed_tensor<T>(index);
371 memcpy(v + offset, begin, (end - begin) * sizeof(T));
372 }
373
374 // Return a vector with the flattened contents of a tensor.
375 template <typename T>
ExtractVector(int index)376 std::vector<T> ExtractVector(int index) const {
377 const T* v = interpreter_->typed_tensor<T>(index);
378 const auto* tensor = interpreter_->tensor(index);
379 CHECK(v);
380 int tensor_size;
381 if (tensor->sparsity) {
382 // Getting the size of the sparse buffer this way is based on the
383 // assumption that the last dimension of the tensor is a compressed
384 // dimension.
385 tensor_size = tensor->sparsity
386 ->dim_metadata[tensor->sparsity->dim_metadata_size - 1]
387 .array_indices->size;
388 } else {
389 tensor_size = GetTensorSize(index);
390 }
391
392 return std::vector<T>(v, v + tensor_size);
393 }
394
GetTensorShape(int index)395 std::vector<int> GetTensorShape(int index) {
396 std::vector<int> result;
397 TfLiteTensor* t = interpreter_->tensor(index);
398 result.reserve(t->dims->size);
399 for (int i = 0; i < t->dims->size; ++i) {
400 result.push_back(t->dims->data[i]);
401 }
402 return result;
403 }
404
SetNumThreads(int num_threads)405 void SetNumThreads(int num_threads) {
406 CHECK(interpreter_ != nullptr);
407 interpreter_->SetNumThreads(num_threads);
408 }
409
SetResolver(std::unique_ptr<OpResolver> resolver)410 void SetResolver(std::unique_ptr<OpResolver> resolver) {
411 resolver_ = std::move(resolver);
412 }
413
414 // Enables NNAPI delegate application during interpreter creation.
415 static void SetForceUseNnapi(bool use_nnapi);
416 static bool GetForceUseNnapi();
417 int CountOpsExecutedByCpuKernel();
418
419 protected:
420 int32_t GetTensorSize(int index) const;
421
422 flatbuffers::FlatBufferBuilder builder_;
423 std::unique_ptr<tflite::Interpreter> interpreter_;
424 std::unique_ptr<OpResolver> resolver_;
425
426 std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
427 std::vector<flatbuffers::Offset<Operator>> operators_;
428 std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
429
430 template <typename T>
431 int AddTensor(TensorData t, std::initializer_list<T> data,
432 bool is_variable = false) {
433 int id = tensors_.size();
434
435 // This is slightly different depending on whether we are adding a
436 // quantized or a regular tensor.
437 bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
438
439 flatbuffers::Offset<QuantizationParameters> q_params = 0;
440
441 if (is_quantized) {
442 if (t.min != 0 || t.max != 0) {
443 if (t.type == TensorType_UINT8) {
444 std::tie(t.scale, t.zero_point) =
445 QuantizationParams<uint8_t>(t.min, t.max);
446 } else if (t.type == TensorType_INT8) {
447 std::tie(t.scale, t.zero_point) =
448 QuantizationParams<int8_t>(t.min, t.max);
449 } else if (t.type == TensorType_INT32) {
450 std::tie(t.scale, t.zero_point) =
451 QuantizationParams<int32_t>(t.min, t.max);
452 } else if (t.type == TensorType_INT16) {
453 std::tie(t.scale, t.zero_point) =
454 QuantizationParams<int16_t>(t.min, t.max);
455 } else {
456 LOG(FATAL) << "No support for the requested quantized type";
457 }
458 t.min = 0;
459 t.max = 0;
460 }
461
462 q_params = CreateQuantizationParameters(
463 builder_, /*min=*/0, /*max=*/0,
464 builder_.CreateVector<float>({t.scale}),
465 builder_.CreateVector<int64_t>({t.zero_point}));
466 }
467
468 int buffer_id = 0;
469 if (data.size()) {
470 // Initialize buffers list with empty buffer to allow for non-const
471 // tensors.
472 if (buffers_.empty()) {
473 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
474 }
475
476 // Add data as a Buffer to buffers list.
477 buffer_id = buffers_.size();
478 auto data_buffer =
479 builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
480 sizeof(T) * data.size());
481 buffers_.push_back(CreateBuffer(builder_, data_buffer));
482 }
483
484 tensors_.push_back(CreateTensor(builder_,
485 builder_.CreateVector<int>(t.shape), t.type,
486 /*buffer=*/buffer_id,
487 /*name=*/0, q_params, is_variable));
488
489 tensor_data_[id] = t;
490
491 return id;
492 }
493
494 private:
495 template <typename T>
QuantizationParams(float f_min,float f_max)496 std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
497 int32_t zero_point = 0;
498 float scale = 0;
499 const T qmin = std::numeric_limits<T>::min();
500 const T qmax = std::numeric_limits<T>::max();
501 const float qmin_double = qmin;
502 const float qmax_double = qmax;
503 // 0 should always be a representable value. Let's assume that the initial
504 // min,max range contains 0.
505 CHECK_LE(f_min, 0);
506 CHECK_GE(f_max, 0);
507 if (f_min == f_max) {
508 // Special case where the min,max range is a point. Should be {0}.
509 CHECK_EQ(f_min, 0);
510 CHECK_EQ(f_max, 0);
511 return {scale, zero_point};
512 }
513
514 // General case.
515 //
516 // First determine the scale.
517 scale = (f_max - f_min) / (qmax_double - qmin_double);
518
519 // Zero-point computation.
520 // First the initial floating-point computation. The zero-point can be
521 // determined from solving an affine equation for any known pair
522 // (real value, corresponding quantized value).
523 // We know two such pairs: (rmin, qmin) and (rmax, qmax).
524 // The arithmetic error on the zero point computed from either pair
525 // will be roughly machine_epsilon * (sum of absolute values of terms)
526 // so we want to use the variant that adds the smaller terms.
527 const float zero_point_from_min = qmin_double - f_min / scale;
528 const float zero_point_from_max = qmax_double - f_max / scale;
529
530 const float zero_point_from_min_error =
531 std::abs(qmin_double) + std::abs(f_min / scale);
532
533 const float zero_point_from_max_error =
534 std::abs(qmax_double) + std::abs(f_max / scale);
535
536 const float zero_point_double =
537 zero_point_from_min_error < zero_point_from_max_error
538 ? zero_point_from_min
539 : zero_point_from_max;
540
541 // Now we need to nudge the zero point to be an integer
542 // (our zero points are integer, and this is motivated by the requirement
543 // to be able to represent the real value "0" exactly as a quantized value,
544 // which is required in multiple places, for example in Im2col with SAME
545 // padding).
546
547 T nudged_zero_point = 0;
548 if (zero_point_double < qmin_double) {
549 nudged_zero_point = qmin;
550 } else if (zero_point_double > qmax_double) {
551 nudged_zero_point = qmax;
552 } else {
553 nudged_zero_point = static_cast<T>(std::round(zero_point_double));
554 }
555
556 // The zero point should always be in the range of quantized value,
557 // // [qmin, qmax].
558 CHECK_GE(nudged_zero_point, qmin);
559 CHECK_LE(nudged_zero_point, qmax);
560
561 zero_point = nudged_zero_point;
562 // finally, return the values
563 return {scale, zero_point};
564 }
565
AddTensorPerChannelQuant(const TensorData & t)566 int AddTensorPerChannelQuant(const TensorData& t) {
567 // type does not matter when adding empty data.
568 return AddTensorPerChannelQuant<uint8_t>(t, {});
569 }
570
571 template <typename T>
AddTensorPerChannelQuant(const TensorData & t,const std::initializer_list<T> & data)572 int AddTensorPerChannelQuant(const TensorData& t,
573 const std::initializer_list<T>& data) {
574 const int id = tensors_.size();
575 flatbuffers::Offset<QuantizationParameters> q_params = 0;
576 q_params = CreateQuantizationParameters(
577 builder_, /*min=*/0, /*max=*/0,
578 /*scale=*/
579 builder_.CreateVector<float>(t.per_channel_quantization_scales),
580 /*zero point=*/
581 builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
582 QuantizationDetails_NONE, 0, t.channel_index);
583
584 int buffer_id = 0;
585 if (data.size()) {
586 // Initialize buffers list with empty buffer to allow for non-const
587 // tensors.
588 if (buffers_.empty()) {
589 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
590 }
591
592 // Add data as a Buffer to buffers list.
593 buffer_id = buffers_.size();
594 auto data_buffer =
595 builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
596 sizeof(T) * data.size());
597 buffers_.push_back(CreateBuffer(builder_, data_buffer));
598 }
599
600 tensors_.push_back(
601 CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
602 /*buffer=*/buffer_id,
603 /*name=*/0, q_params, /*is_variable=*/false));
604 tensor_data_[id] = t;
605 return id;
606 }
607
608 template <typename T>
AddSparseTensor(const TensorData & t,std::initializer_list<T> data)609 int AddSparseTensor(const TensorData& t, std::initializer_list<T> data) {
610 int id = tensors_.size();
611 const auto& shape = t.shape;
612 const int dims_count = shape.size();
613 std::vector<TfLiteDimensionType> format(dims_count);
614 std::vector<int> traversal_order(dims_count);
615 std::vector<T> dense_data(data);
616
617 // Compress all dimensions and traverse them in the original order.
618 for (int i = 0; i < dims_count; i++) {
619 format[i] = kTfLiteDimSparseCSR;
620 traversal_order[i] = i;
621 }
622
623 tflite::optimize::sparsity::FormatConverter<T> converter(
624 shape, traversal_order, format);
625 converter.DenseToSparse(dense_data.data());
626
627 const auto& dim_metadata = converter.GetDimMetadata();
628 const auto& sparse_data = converter.GetData();
629
630 // Build sparsity parameter.
631 std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
632 dims_count);
633 for (int i = 0; i < dims_count; i++) {
634 const int metadata_idx = 2 * i;
635 fb_dim_metadata[i] = CreateDimensionMetadata(
636 builder_, DimensionType_SPARSE_CSR, 0,
637 builder_.CreateVector(dim_metadata[metadata_idx]),
638 builder_.CreateVector(dim_metadata[metadata_idx + 1]));
639 }
640
641 flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
642 builder_, builder_.CreateVector(traversal_order), 0,
643 builder_.CreateVector(fb_dim_metadata));
644
645 int buffer_id = 0;
646 if (data.size()) {
647 // Initialize buffers list with empty buffer to allow for non-const
648 // tensors.
649 if (buffers_.empty()) {
650 buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
651 }
652
653 // Add compressed data as a Buffer to buffers list.
654 buffer_id = buffers_.size();
655 auto data_buffer = builder_.CreateVector(
656 reinterpret_cast<const uint8_t*>(sparse_data.data()),
657 sizeof(T) * sparse_data.size());
658 buffers_.push_back(CreateBuffer(builder_, data_buffer));
659 }
660
661 tensors_.push_back(CreateTensor(
662 builder_, builder_.CreateVector<int>(t.shape), t.type,
663 /*buffer=*/buffer_id,
664 /*name=*/0, /*quantization=*/0, /*is_variable=*/false, s_param));
665
666 inputs_.push_back(id);
667 tensor_data_[id] = t;
668
669 return id;
670 }
671
QuantizeTensor(int index,const std::vector<float> & data)672 std::vector<int8_t> QuantizeTensor(int index,
673 const std::vector<float>& data) {
674 TfLiteTensor* t = interpreter_->tensor(index);
675 const int length = data.size();
676 std::vector<int8_t> q(length);
677 float min, max, scaling_factor;
678 tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
679 &max, &scaling_factor);
680 // Update quantization params.
681 t->params.scale = scaling_factor;
682 t->params.zero_point = 0;
683 // Populate the new quantization params.
684 TfLiteQuantizationFree(&t->quantization);
685 t->quantization.type = kTfLiteAffineQuantization;
686 auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
687 malloc(sizeof(TfLiteAffineQuantization)));
688 affine_quantization->quantized_dimension = 0;
689 affine_quantization->scale = TfLiteFloatArrayCreate(1);
690 affine_quantization->zero_point = TfLiteIntArrayCreate(1);
691 affine_quantization->scale->data[0] = scaling_factor;
692 affine_quantization->zero_point->data[0] = 0;
693 t->quantization.params = affine_quantization;
694 return q;
695 }
696
697 // Checks if acceleration has been done as expected.
698 // Currently supports only NNAPI.
699 // It verifies if the test was configured to run with NNAPI acceleration
700 // or not (SetForceUseNnapi(true)).
701 // In affirmative case it checks if:
702 // - the test case has been listed in the list of nnapi-accelerated cases
703 // - the test is running on a device (NNAPI has been loaded)
704 //
705 // The list of nnapi-accelerated test cases is a file containing regex to
706 // include or exclude specific test cases plus the minimum android SDK version
707 // the acceleration should be enabled for. For example:
708 // To enable the test BorderFloat in TopKV2OpTest only from
709 // android_sdk_version 29:
710 //
711 // TopKV2OpTest/BorderFloat,29
712 //
713 // And to have it always excluded while enabling all other Float tests
714 // (the order of the rules is important, the first one matching is used):
715 //
716 // -TopKV2OpTest/BorderFloat
717 // TopKV2OpTest/.+Float
718
719 void ValidateAcceleration();
720
721 // If the test was configured to use NNAPI and NNAPI was actually loaded,
722 // checks if the single operation in the model has been accelerated.
723 void ExpectOpAcceleratedWithNnapi(const std::string& test_id);
724
725 std::map<int, TensorData> tensor_data_;
726 std::vector<int32_t> inputs_;
727 std::vector<int32_t> intermediates_;
728 std::vector<int32_t> outputs_;
729 std::vector<flatbuffers::Offset<Tensor>> tensors_;
730 std::vector<flatbuffers::Offset<Buffer>> buffers_;
731 // A function pointer that gets called after the interpreter is created but
732 // before evaluation happens. This is useful for applying a delegate.
733 std::function<void(Interpreter*)> apply_delegate_fn_;
734 };
735
736 // Populate string tensors.
737 template <>
738 inline void SingleOpModel::PopulateTensor<string>(
739 int index, const std::initializer_list<string>& data) {
740 PopulateStringTensor(index, data);
741 }
742
743 // Base class for single op unit tests.
744 // The tests are parameterized to test multiple kernels for a single op.
745 // The parameters are strings like "optimized" and "reference" to have better
746 // readability in test reports.
747 //
748 // To use this class:
749 // * Define a constant map from strings to TfLiteRegistration.
750 // * Implement a test class that inherits SingleOpTest.
751 // * Instantiate the test cases with SingleOpTest::GetKernelTags helper
752 // function.
753 // * Call GetRegistration to get the TfLiteRegistration to be used before
754 // building the interpreter.
755 class SingleOpTest : public ::testing::TestWithParam<string> {
756 public:
GetKernelTags(const std::map<string,TfLiteRegistration * > & kernel_map)757 static std::vector<string> GetKernelTags(
758 const std::map<string, TfLiteRegistration*>& kernel_map) {
759 std::vector<string> tags;
760 tags.reserve(kernel_map.size());
761 for (const auto& it : kernel_map) {
762 tags.push_back(it.first);
763 }
764 return tags;
765 }
766
767 protected:
768 virtual const std::map<string, TfLiteRegistration*>& GetKernelMap() = 0;
GetRegistration()769 TfLiteRegistration* GetRegistration() {
770 return GetKernelMap().at(GetParam());
771 }
772 };
773
774 // Returns the corresponding TensorType given the type T.
775 template <typename T>
GetTensorType()776 TensorType GetTensorType() {
777 if (std::is_same<T, float>::value) return TensorType_FLOAT32;
778 if (std::is_same<T, TfLiteFloat16>::value) return TensorType_FLOAT16;
779 if (std::is_same<T, int8_t>::value) return TensorType_INT8;
780 if (std::is_same<T, int16_t>::value) return TensorType_INT16;
781 if (std::is_same<T, int32_t>::value) return TensorType_INT32;
782 if (std::is_same<T, int64_t>::value) return TensorType_INT64;
783 if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
784 if (std::is_same<T, string>::value) return TensorType_STRING;
785 return TensorType_MIN; // default value
786 }
787
788 // Strings have a special implementation that is in test_util.cc
789 template <>
790 std::vector<string> SingleOpModel::ExtractVector(int index) const;
791
792 // The TypeUnion struct specializations hold a collection of related types.
793 // Each struct holds: 1. a primitive type (e.g. float), 2. a TensorType (e.g.
794 // TensorType_FLOAT32, and 3. a TfLiteType (e.g. kTfLiteFloat32). The latter
795 // two are actually enum values and not raw types, but these specializations
796 // make it easy to use gUnit Typed Test Suite:
797 // https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#typed-tests
798 template <typename T>
799 struct TypeUnion;
800
801 template <>
802 struct TypeUnion<float> {
803 public:
804 static const TensorType tensor_type = TensorType::TensorType_FLOAT32;
805 static const TfLiteType tflite_type = TfLiteType::kTfLiteFloat32;
806 typedef float ScalarType;
807 };
808
809 template <>
810 struct TypeUnion<int32_t> {
811 public:
812 static const TensorType tensor_type = TensorType::TensorType_INT32;
813 static const TfLiteType tflite_type = TfLiteType::kTfLiteInt32;
814 typedef int32_t ScalarType;
815 };
816
817 template <>
818 struct TypeUnion<int16_t> {
819 public:
820 static const TensorType tensor_type = TensorType::TensorType_INT16;
821 static const TfLiteType tflite_type = TfLiteType::kTfLiteInt16;
822 typedef int16_t ScalarType;
823 };
824
825 template <>
826 struct TypeUnion<int8_t> {
827 public:
828 static const TensorType tensor_type = TensorType::TensorType_INT8;
829 static const TfLiteType tflite_type = TfLiteType::kTfLiteInt8;
830 typedef int8_t ScalarType;
831 };
832
833 template <>
834 struct TypeUnion<uint8_t> {
835 public:
836 static const TensorType tensor_type = TensorType::TensorType_UINT8;
837 static const TfLiteType tflite_type = TfLiteType::kTfLiteUInt8;
838 typedef uint8_t ScalarType;
839 };
840
841 class MultiOpModel : public SingleOpModel {
842 public:
843 MultiOpModel() : SingleOpModel() {}
844 ~MultiOpModel() {}
845
846 void AddBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
847 const flatbuffers::Offset<void>& builtin_options,
848 const std::vector<int32_t>& inputs,
849 const std::vector<int32_t>& outputs);
850
851 void AddCustomOp(const string& name,
852 const std::vector<uint8_t>& custom_option,
853 const std::function<TfLiteRegistration*()>& registration,
854 const std::vector<int32_t>& inputs,
855 const std::vector<int32_t>& outputs);
856
857 template <typename T>
858 int AddInnerTensor(TensorData t) {
859 return AddTensor<T>(t, {}, false);
860 }
861 };
862
863 } // namespace tflite
864
865 #endif // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
866