1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <gtest/gtest.h>
16 #include "tensorflow/lite/builtin_ops.h"
17 #include "tensorflow/lite/c/common.h"
18 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
19 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
20 #include "tensorflow/lite/interpreter.h"
21 #include "tensorflow/lite/kernels/fully_connected.h"
22 #include "tensorflow/lite/kernels/test_util.h"
23 #include "tensorflow/lite/model.h"
24 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
25 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
26
27 namespace tflite {
28
29 namespace ops {
30 namespace builtin {
31
32 TfLiteRegistration* Register_CONVOLUTION_REF();
33 TfLiteRegistration* Register_DEQUANTIZE();
34
35 } // namespace builtin
36 } // namespace ops
37
38 namespace {
39
40 class SingleOpModelWithNNAPI : public SingleOpModel {
41 public:
42 SingleOpModelWithNNAPI() = default;
Init(const NnApi * nnapi)43 void Init(const NnApi* nnapi) {
44 options_.disallow_nnapi_cpu = false;
45 stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options_));
46 SetDelegate(stateful_delegate_.get());
47 }
48
GetDelegate()49 StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
50
SetBufferHandle(int index,TfLiteBufferHandle handle)51 void SetBufferHandle(int index, TfLiteBufferHandle handle) {
52 interpreter_->SetBufferHandle(index, handle, stateful_delegate_.get());
53 }
GetCompilationStatus()54 TfLiteStatus GetCompilationStatus() { return compilation_status_; }
55
56 protected:
57 std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
58 StatefulNnApiDelegate::Options options_;
59 TfLiteStatus compilation_status_;
60 };
61
62 class HybridFullyConnectedOpModel : public SingleOpModelWithNNAPI {
63 public:
HybridFullyConnectedOpModel(const NnApi * nnapi,int units,int batches,const TensorData & input,const TensorData & weights,const TensorData & output={TensorType_FLOAT32},bool asymmetric_inputs=false)64 HybridFullyConnectedOpModel(const NnApi* nnapi, int units, int batches,
65 const TensorData& input,
66 const TensorData& weights,
67 const TensorData& output = {TensorType_FLOAT32},
68 bool asymmetric_inputs = false)
69 : batches_(batches), units_(units) {
70 SingleOpModelWithNNAPI::Init(nnapi);
71 int total_input_size = 1;
72 for (size_t i = 0; i < input.shape.size(); ++i) {
73 total_input_size *= input.shape[i];
74 }
75 input_size_ = total_input_size / batches_;
76
77 input_ = AddInput(input);
78 weights_ = AddInput(weights);
79
80 TensorData bias{TensorType_FLOAT32, {units_}};
81 bias_ = AddInput(bias);
82
83 output_ = AddOutput(output);
84
85 auto options = CreateFullyConnectedOptions(
86 builder_, ActivationFunctionType_RELU,
87 tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
88 false, asymmetric_inputs)
89 .Union();
90 SetBuiltinOp(BuiltinOperator_FULLY_CONNECTED,
91 BuiltinOptions_FullyConnectedOptions, options);
92 resolver_ = absl::make_unique<SingleOpResolver>(
93 BuiltinOperator_FULLY_CONNECTED,
94 ops::builtin::Register_FULLY_CONNECTED_PIE());
95 BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)},
96 /*num_threads=*/-1,
97 /* allow_fp32_relax_to_fp16 */ false,
98 /*apply_delegate=*/false);
99 compilation_status_ = ApplyDelegate();
100 }
SetBias(const std::vector<float> & f)101 void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); }
SetWeights(const std::vector<float> & data)102 void SetWeights(const std::vector<float>& data) {
103 SymmetricQuantizeAndPopulate(weights_, data);
104 }
SetSignedWeights(std::initializer_list<float> f)105 void SetSignedWeights(std::initializer_list<float> f) {
106 SignedSymmetricQuantizeAndPopulate(weights_, f);
107 }
108
SetInput(const std::vector<float> & f)109 void SetInput(const std::vector<float>& f) { PopulateTensor(input_, f); }
GetOutput()110 std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
GetOutputShape()111 std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
112
input_size()113 int input_size() { return input_size_; }
num_units()114 int num_units() { return units_; }
num_batches()115 int num_batches() { return batches_; }
116
117 protected:
118 int input_;
119 int weights_;
120 int bias_;
121 int output_;
122
123 int batches_;
124 int units_;
125 int input_size_;
126 };
127
128 struct NnApiSignedQuantizationTest
129 : ::tflite::delegate::nnapi::NnApiDelegateMockTest {
SetUpTestSuitetflite::__anonbf4361220111::NnApiSignedQuantizationTest130 static void SetUpTestSuite() { tensors_count = new std::map<int, int>(); }
SetUptflite::__anonbf4361220111::NnApiSignedQuantizationTest131 void SetUp() override {
132 ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
133 nnapi_mock_->StubAddOperandWith(
134 [](ANeuralNetworksModel* model,
135 const ANeuralNetworksOperandType* type) -> int {
136 const auto nn_tensor_type = type->type;
137 if (tensors_count->find(nn_tensor_type) == tensors_count->end()) {
138 tensors_count->insert({nn_tensor_type, 0});
139 }
140 tensors_count->at(nn_tensor_type)++;
141 return ANEURALNETWORKS_NO_ERROR;
142 });
143 }
TearDowntflite::__anonbf4361220111::NnApiSignedQuantizationTest144 void TearDown() override { tensors_count->clear(); }
TearDownTestSuitetflite::__anonbf4361220111::NnApiSignedQuantizationTest145 static void TearDownTestSuite() {
146 delete tensors_count;
147 tensors_count = nullptr;
148 }
149 static std::map<int, int>* tensors_count;
150 };
151 std::map<int, int>* NnApiSignedQuantizationTest::tensors_count = nullptr;
152
TEST_F(NnApiSignedQuantizationTest,HybridFullyConnectedMapsToSignedSymmOnSdk29)153 TEST_F(NnApiSignedQuantizationTest,
154 HybridFullyConnectedMapsToSignedSymmOnSdk29) {
155 nnapi_mock_->SetAndroidSdkVersion(29);
156
157 HybridFullyConnectedOpModel m(
158 nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
159 /*input=*/{TensorType_FLOAT32, {2, 10}},
160 /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
161 m.SetSignedWeights({
162 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0
163 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
164 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2
165 });
166 m.SetBias({1, 2, 3});
167 m.SetInput({
168 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0
169 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1
170 });
171
172 m.Invoke();
173 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
174
175 ASSERT_EQ(tensors_count->size(), 3);
176 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
177 tensors_count->end());
178 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
179 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
180 tensors_count->end());
181
182 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
183 4); // fc_input, fc_weights, fc_bias, fc_output
184 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation
185 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
186 1); // dequantize_weights_input
187 }
188
TEST_F(NnApiSignedQuantizationTest,HybridFullyConnectedMapsToSignedSymmOnSdk30)189 TEST_F(NnApiSignedQuantizationTest,
190 HybridFullyConnectedMapsToSignedSymmOnSdk30) {
191 nnapi_mock_->SetAndroidSdkVersion(30);
192
193 HybridFullyConnectedOpModel m(
194 nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
195 /*input=*/{TensorType_FLOAT32, {2, 10}},
196 /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
197 m.SetSignedWeights({
198 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0
199 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
200 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2
201 });
202 m.SetBias({1, 2, 3});
203 m.SetInput({
204 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0
205 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1
206 });
207
208 m.Invoke();
209 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
210
211 ASSERT_EQ(tensors_count->size(), 3);
212 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
213 tensors_count->end());
214 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
215 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
216 tensors_count->end());
217
218 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
219 4); // fc_input, fc_weights, fc_bias, fc_output
220 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation
221 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
222 1); // dequantize_weights_input
223 }
224
225 template <typename FilterType>
226 class BaseConvolutionOpModel : public SingleOpModelWithNNAPI {
227 public:
BaseConvolutionOpModel(const NnApi * nnapi,TfLiteRegistration * registration,const TensorData & input,const TensorData & filter,const TensorData & output,int stride_width=2,int stride_height=2,enum Padding padding=Padding_VALID,enum ActivationFunctionType activation=ActivationFunctionType_NONE,int dilation_width_factor=1,int dilation_height_factor=1,std::initializer_list<FilterType> filter_data={})228 BaseConvolutionOpModel(
229 const NnApi* nnapi, TfLiteRegistration* registration,
230 const TensorData& input, const TensorData& filter,
231 const TensorData& output, int stride_width = 2, int stride_height = 2,
232 enum Padding padding = Padding_VALID,
233 enum ActivationFunctionType activation = ActivationFunctionType_NONE,
234 int dilation_width_factor = 1, int dilation_height_factor = 1,
235 std::initializer_list<FilterType> filter_data = {}) {
236 SingleOpModelWithNNAPI::Init(nnapi);
237
238 input_ = AddInput(input);
239
240 if (filter_data.size()) {
241 filter_ = AddConstInput(filter, filter_data);
242 } else {
243 filter_ = AddInput(filter);
244 }
245
246 int bias_size = GetShape(filter_)[0];
247 if (input.type == TensorType_FLOAT32) {
248 bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
249 } else {
250 // This is a quantized version. The scale of 'bias' depends on the scales
251 // of input and filter. Supposedly this is correctly set during quantized
252 // training.
253 if (filter.per_channel_quantization) {
254 // per channel quantization.
255 std::vector<float> bias_scale(
256 filter.per_channel_quantization_scales.size());
257 std::vector<int64_t> bias_zero_points(
258 filter.per_channel_quantization_scales.size());
259 for (size_t i = 0; i < filter.per_channel_quantization_scales.size();
260 ++i) {
261 bias_scale[i] =
262 input.scale * filter.per_channel_quantization_scales[i];
263 bias_zero_points[i] = 0;
264 }
265 tflite::TensorType bias_type = TensorType_INT32;
266 if (input.type == TensorType_INT16) {
267 // In case of 16-bit, the bias type is set to be int 64.
268 bias_type = TensorType_INT64;
269 }
270 TensorData bias{bias_type,
271 {bias_size},
272 /*min=*/0,
273 /*max=*/0,
274 /*scale=*/0,
275 /*zero_point=*/0,
276 true,
277 /*per_channel_quantization_scales=*/bias_scale,
278 /*per_channel_quantization_offsets=*/bias_zero_points,
279 /*channel_index==*/0};
280 bias_ = AddInput(bias);
281 } else {
282 // per tensor quantization.
283 auto bias_scale = GetScale(input_) * GetScale(filter_);
284 TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale};
285 bias_ = AddInput(bias);
286 }
287 }
288
289 output_ = AddOutput(output);
290
291 SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
292 CreateConv2DOptions(
293 builder_, padding, stride_width, stride_height, activation,
294 dilation_width_factor, dilation_height_factor)
295 .Union());
296
297 resolver_ = absl::make_unique<SingleOpResolver>(BuiltinOperator_CONV_2D,
298 registration);
299 BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
300 /*num_threads=*/-1,
301 /* allow_fp32_relax_to_fp16 */ false,
302 /*apply_delegate=*/false);
303 compilation_status_ = ApplyDelegate();
304 }
305
306 protected:
307 int input_;
308 int filter_;
309 int bias_;
310 int output_;
311 };
312
313 class QuantizedConvolutionOpModel : public BaseConvolutionOpModel<uint8_t> {
314 public:
315 using BaseConvolutionOpModel::BaseConvolutionOpModel;
316
SetInput(std::initializer_list<float> data)317 void SetInput(std::initializer_list<float> data) {
318 QuantizeAndPopulate<uint8_t>(input_, data);
319 }
320
SetFilter(std::initializer_list<float> data)321 void SetFilter(std::initializer_list<float> data) {
322 QuantizeAndPopulate<uint8_t>(filter_, data);
323 }
324
SetBias(std::initializer_list<float> data)325 void SetBias(std::initializer_list<float> data) {
326 QuantizeAndPopulate<int32_t>(bias_, data);
327 }
328
GetOutput()329 std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
GetDequantizedOutput()330 std::vector<float> GetDequantizedOutput() {
331 return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
332 GetScale(output_), GetZeroPoint(output_));
333 }
334 };
335
TEST_F(NnApiSignedQuantizationTest,Conv2DUnsignedPerTensorMapsToUnsignedOnSdk29)336 TEST_F(NnApiSignedQuantizationTest,
337 Conv2DUnsignedPerTensorMapsToUnsignedOnSdk29) {
338 QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
339 ops::builtin::Register_CONVOLUTION_REF(),
340 {TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
341 {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
342 {TensorType_UINT8, {}, -127, 128});
343 m.SetInput({
344 // First batch
345 1, 1, 1, 1, // row = 1
346 2, 2, 2, 2, // row = 2
347 // Second batch
348 1, 2, 3, 4, // row = 1
349 1, 2, 3, 4, // row = 2
350 });
351 m.SetFilter({
352 1, 2, 3, 4, // first 2x2 filter
353 -1, 1, -1, 1, // second 2x2 filter
354 -1, -1, 1, 1, // third 2x2 filter
355 });
356 m.SetBias({1, 2, 3});
357
358 m.Invoke();
359 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
360
361 ASSERT_EQ(tensors_count->size(), 3);
362 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
363 tensors_count->end());
364 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
365 tensors_count->end());
366 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
367
368 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
369 3); // input, filter, output
370 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
371 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
372 4); // padding, stride_width, stride_height, activation
373 }
374
TEST_F(NnApiSignedQuantizationTest,Conv2dUnsignedPerTensorMapsToUnsignedOnSdk30)375 TEST_F(NnApiSignedQuantizationTest,
376 Conv2dUnsignedPerTensorMapsToUnsignedOnSdk30) {
377 nnapi_mock_->SetAndroidSdkVersion(30);
378 QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
379 ops::builtin::Register_CONVOLUTION_REF(),
380 {TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
381 {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
382 {TensorType_UINT8, {}, -127, 128});
383 m.SetInput({
384 // First batch
385 1, 1, 1, 1, // row = 1
386 2, 2, 2, 2, // row = 2
387 // Second batch
388 1, 2, 3, 4, // row = 1
389 1, 2, 3, 4, // row = 2
390 });
391 m.SetFilter({
392 1, 2, 3, 4, // first 2x2 filter
393 -1, 1, -1, 1, // second 2x2 filter
394 -1, -1, 1, 1, // third 2x2 filter
395 });
396 m.SetBias({1, 2, 3});
397
398 m.Invoke();
399 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
400
401 ASSERT_EQ(tensors_count->size(), 3);
402 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
403 tensors_count->end());
404 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
405 tensors_count->end());
406 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
407
408 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
409 3); // input, filter, output
410 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
411 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
412 4); // padding, stride_width, stride_height, activation
413 }
414
415 class PerChannelQuantizedConvolutionOpModel
416 : public BaseConvolutionOpModel<int8_t> {
417 public:
418 using BaseConvolutionOpModel::BaseConvolutionOpModel;
419
SetInput(std::initializer_list<float> data)420 void SetInput(std::initializer_list<float> data) {
421 QuantizeAndPopulate<int8_t>(input_, data);
422 }
423
SetFilter(std::initializer_list<float> data)424 void SetFilter(std::initializer_list<float> data) {
425 PerChannelSymmetricQuantizeAndPopulate(filter_, data);
426 }
427
SetBias(std::initializer_list<float> data)428 void SetBias(std::initializer_list<float> data) {
429 PerChannelQuantizeBias(bias_, data);
430 }
431
GetOutput()432 std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); }
GetDequantizedOutput()433 std::vector<float> GetDequantizedOutput() {
434 return Dequantize<int8_t>(ExtractVector<int8_t>(output_), GetScale(output_),
435 GetZeroPoint(output_));
436 }
437 };
438
TEST_F(NnApiSignedQuantizationTest,Conv2dSignedPerTensorMapsToUnsignedOnSdk29)439 TEST_F(NnApiSignedQuantizationTest,
440 Conv2dSignedPerTensorMapsToUnsignedOnSdk29) {
441 nnapi_mock_->SetAndroidSdkVersion(29);
442 PerChannelQuantizedConvolutionOpModel m(
443 nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
444 {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
445 {TensorType_INT8,
446 // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
447 {2, 2, 2, 2},
448 0,
449 0,
450 0,
451 0,
452 /*per_channel_quantization=*/true,
453 /*per_channel_quantization_scales=*/{1},
454 /*per_channel_quantization_offsets=*/{0},
455 /*channel_index=*/0},
456 {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
457 /*stride_width=*/1, /*stride_height=*/1);
458 m.SetInput({
459 // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
460 3, 2, // batch = 0, y = 0, x = 0
461 1, -1, // batch = 0, y = 0, x = 1
462 -2, -3, // batch = 0, y = 0, x = 2
463 4, 3, // batch = 0, y = 1, x = 0
464 2, -2, // batch = 0, y = 1, x = 1
465 -3, -4, // batch = 0, y = 1, x = 2
466 });
467 m.SetFilter(
468 // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
469 {
470 1, 2, // out channel = 0, y = 0, x = 0
471 3, 4, // out channel = 0, y = 0, x = 1
472 3, 4, // out channel = 0, y = 1, x = 0
473 5, 6, // out channel = 0, y = 1, x = 1
474 7, 8, // out channel = 1, y = 0, x = 0
475 5, 6, // out channel = 1, y = 0, x = 1
476 3, 4, // out channel = 1, y = 1, x = 0
477 1, 2, // out channel = 1, y = 1, x = 1
478 });
479 m.SetBias({3, -2});
480
481 // Invoke and verify output.
482 // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
483 m.Invoke();
484 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
485
486 ASSERT_EQ(tensors_count->size(), 3);
487 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
488 tensors_count->end());
489 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
490 tensors_count->end());
491 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
492
493 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
494 3); // input, filter, output
495 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
496 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
497 4); // padding, stride_width, stride_height, activation
498 }
499
TEST_F(NnApiSignedQuantizationTest,Conv2dSignedPerTensorMapsToUnsignedOnSdk30)500 TEST_F(NnApiSignedQuantizationTest,
501 Conv2dSignedPerTensorMapsToUnsignedOnSdk30) {
502 nnapi_mock_->SetAndroidSdkVersion(30);
503 PerChannelQuantizedConvolutionOpModel m(
504 nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
505 {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
506 {TensorType_INT8,
507 // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
508 {2, 2, 2, 2},
509 0,
510 0,
511 0,
512 0,
513 /*per_channel_quantization=*/true,
514 /*per_channel_quantization_scales=*/{1},
515 /*per_channel_quantization_offsets=*/{0},
516 /*channel_index=*/0},
517 {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
518 /*stride_width=*/1, /*stride_height=*/1);
519 m.SetInput({
520 // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
521 3, 2, // batch = 0, y = 0, x = 0
522 1, -1, // batch = 0, y = 0, x = 1
523 -2, -3, // batch = 0, y = 0, x = 2
524 4, 3, // batch = 0, y = 1, x = 0
525 2, -2, // batch = 0, y = 1, x = 1
526 -3, -4, // batch = 0, y = 1, x = 2
527 });
528 m.SetFilter(
529 // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
530 {
531 1, 2, // out channel = 0, y = 0, x = 0
532 3, 4, // out channel = 0, y = 0, x = 1
533 3, 4, // out channel = 0, y = 1, x = 0
534 5, 6, // out channel = 0, y = 1, x = 1
535 7, 8, // out channel = 1, y = 0, x = 0
536 5, 6, // out channel = 1, y = 0, x = 1
537 3, 4, // out channel = 1, y = 1, x = 0
538 1, 2, // out channel = 1, y = 1, x = 1
539 });
540 m.SetBias({3, -2});
541
542 // Invoke and verify output.
543 // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
544 m.Invoke();
545 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
546
547 ASSERT_EQ(tensors_count->size(), 3);
548 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
549 tensors_count->end());
550 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
551 tensors_count->end());
552 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
553
554 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
555 3); // input, filter, output
556 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
557 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
558 4); // padding, stride_width, stride_height, activation
559 }
560
TEST_F(NnApiSignedQuantizationTest,Conv2dSignedPerChannelMapsToUnsignedOnSdk29)561 TEST_F(NnApiSignedQuantizationTest,
562 Conv2dSignedPerChannelMapsToUnsignedOnSdk29) {
563 PerChannelQuantizedConvolutionOpModel m(
564 nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
565 {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
566 {TensorType_INT8,
567 // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
568 {2, 2, 2, 2},
569 0,
570 0,
571 0,
572 0,
573 /*per_channel_quantization=*/true,
574 /*per_channel_quantization_scales=*/{1, 2},
575 /*per_channel_quantization_offsets=*/{0, 0},
576 /*channel_index=*/0},
577 {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
578 /*stride_width=*/1, /*stride_height=*/1);
579 m.SetInput({
580 // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
581 3, 2, // batch = 0, y = 0, x = 0
582 1, -1, // batch = 0, y = 0, x = 1
583 -2, -3, // batch = 0, y = 0, x = 2
584 4, 3, // batch = 0, y = 1, x = 0
585 2, -2, // batch = 0, y = 1, x = 1
586 -3, -4, // batch = 0, y = 1, x = 2
587 });
588 m.SetFilter(
589 // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
590 {
591 1, 2, // out channel = 0, y = 0, x = 0
592 3, 4, // out channel = 0, y = 0, x = 1
593 3, 4, // out channel = 0, y = 1, x = 0
594 5, 6, // out channel = 0, y = 1, x = 1
595 7, 8, // out channel = 1, y = 0, x = 0
596 5, 6, // out channel = 1, y = 0, x = 1
597 3, 4, // out channel = 1, y = 1, x = 0
598 1, 2, // out channel = 1, y = 1, x = 1
599 });
600 m.SetBias({3, -2});
601
602 // Invoke and verify output.
603 // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
604 m.Invoke();
605 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
606
607 ASSERT_EQ(tensors_count->size(), 4);
608 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
609 tensors_count->end());
610 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
611 tensors_count->end());
612 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
613 tensors_count->end());
614 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
615
616 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
617 2); // input, output
618 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
619 1); // filter
620 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
621 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
622 4); // padding, stride_width, stride_height, activation
623 }
624
TEST_F(NnApiSignedQuantizationTest,Conv2dSignedPerChannelMapsToSignedOnSdk30)625 TEST_F(NnApiSignedQuantizationTest, Conv2dSignedPerChannelMapsToSignedOnSdk30) {
626 nnapi_mock_->SetAndroidSdkVersion(30);
627 PerChannelQuantizedConvolutionOpModel m(
628 nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
629 {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
630 {TensorType_INT8,
631 // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
632 {2, 2, 2, 2},
633 0,
634 0,
635 0,
636 0,
637 /*per_channel_quantization=*/true,
638 /*per_channel_quantization_scales=*/{1, 2},
639 /*per_channel_quantization_offsets=*/{0, 0},
640 /*channel_index=*/0},
641 {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
642 /*stride_width=*/1, /*stride_height=*/1);
643 m.SetInput({
644 // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
645 3, 2, // batch = 0, y = 0, x = 0
646 1, -1, // batch = 0, y = 0, x = 1
647 -2, -3, // batch = 0, y = 0, x = 2
648 4, 3, // batch = 0, y = 1, x = 0
649 2, -2, // batch = 0, y = 1, x = 1
650 -3, -4, // batch = 0, y = 1, x = 2
651 });
652 m.SetFilter(
653 // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
654 {
655 1, 2, // out channel = 0, y = 0, x = 0
656 3, 4, // out channel = 0, y = 0, x = 1
657 3, 4, // out channel = 0, y = 1, x = 0
658 5, 6, // out channel = 0, y = 1, x = 1
659 7, 8, // out channel = 1, y = 0, x = 0
660 5, 6, // out channel = 1, y = 0, x = 1
661 3, 4, // out channel = 1, y = 1, x = 0
662 1, 2, // out channel = 1, y = 1, x = 1
663 });
664 m.SetBias({3, -2});
665
666 // Invoke and verify output.
667 // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
668 m.Invoke();
669 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
670
671 ASSERT_EQ(tensors_count->size(), 4);
672 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
673 tensors_count->end());
674 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
675 tensors_count->end());
676 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
677 tensors_count->end());
678 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
679
680 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
681 2); // input, output
682 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
683 1); // filter
684 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
685 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
686 4); // padding, stride_width, stride_height, activation
687 }
688
689 class QuantizeOpModel : public SingleOpModelWithNNAPI {
690 public:
QuantizeOpModel(const NnApi * nnapi,const TensorData & input,const TensorData & output)691 QuantizeOpModel(const NnApi* nnapi, const TensorData& input,
692 const TensorData& output) {
693 SingleOpModelWithNNAPI::Init(nnapi);
694 input_ = AddInput(input);
695 output_ = AddOutput(output);
696 SetBuiltinOp(BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions,
697 CreateQuantizeOptions(builder_).Union());
698
699 BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
700 /* allow_fp32_relax_to_fp16 */ false,
701 /*apply_delegate=*/false);
702 compilation_status_ = ApplyDelegate();
703 }
704
SetInput(std::initializer_list<float> data)705 void SetInput(std::initializer_list<float> data) {
706 PopulateTensor(input_, data);
707 }
708
709 template <typename T>
SetInputAndQuantize(std::initializer_list<float> data)710 void SetInputAndQuantize(std::initializer_list<float> data) {
711 QuantizeAndPopulate<T>(input_, data);
712 }
713
714 template <typename T>
GetOutput()715 std::vector<T> GetOutput() {
716 return ExtractVector<T>(output_);
717 }
718
719 private:
720 int input_;
721 int output_;
722 };
723
TEST_F(NnApiSignedQuantizationTest,QuantizeUint8MapsToUint8OnSdk29)724 TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk29) {
725 // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
726 QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
727 {TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
728
729 m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
730 m.Invoke();
731 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
732
733 ASSERT_EQ(tensors_count->size(), 2);
734 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
735 tensors_count->end());
736 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
737 tensors_count->end());
738
739 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
740 1); // input
741 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
742 1); // output
743 }
744
TEST_F(NnApiSignedQuantizationTest,QuantizeUint8MapsToUint8OnSdk30)745 TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk30) {
746 nnapi_mock_->SetAndroidSdkVersion(30);
747 // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
748 QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
749 {TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
750
751 m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
752 m.Invoke();
753 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
754
755 ASSERT_EQ(tensors_count->size(), 2);
756 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
757 tensors_count->end());
758 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
759 tensors_count->end());
760
761 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
762 1); // input
763 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
764 1); // output
765 }
766
767 // Quantize with Int8 output is only supported since SDK level 30.
TEST_F(NnApiSignedQuantizationTest,QuantizeInt8MapsToInt8OnSdk30)768 TEST_F(NnApiSignedQuantizationTest, QuantizeInt8MapsToInt8OnSdk30) {
769 nnapi_mock_->SetAndroidSdkVersion(30);
770 // [-63.5, 64] -> scale=0.5 zero_point=1 for INT8
771 QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
772 {TensorType_INT8, {2, 5}, 0, 0, 0.5, -1});
773
774 m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
775 m.Invoke();
776 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
777
778 ASSERT_EQ(tensors_count->size(), 2);
779 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
780 tensors_count->end());
781 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
782 tensors_count->end());
783
784 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
785 1); // input
786 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
787 1); // output
788 }
789
790 class DequantizeOpModel : public SingleOpModelWithNNAPI {
791 public:
DequantizeOpModel(const NnApi * nnapi,TensorType type,std::initializer_list<int> shape,float scale,int32_t zero_point,int version)792 DequantizeOpModel(const NnApi* nnapi, TensorType type,
793 std::initializer_list<int> shape, float scale,
794 int32_t zero_point, int version) {
795 SingleOpModelWithNNAPI::Init(nnapi);
796 const TensorData input_tensor_data = {type, shape, 0, 0, scale, zero_point};
797 input_ = AddInput(input_tensor_data);
798 output_ = AddOutput({TensorType_FLOAT32, shape});
799 SetBuiltinOp(BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions,
800 CreateDequantizeOptions(builder_).Union());
801
802 resolver_ = absl::make_unique<SingleOpResolver>(
803 BuiltinOperator_DEQUANTIZE, ops::builtin::Register_DEQUANTIZE(),
804 version);
805
806 BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
807 /* allow_fp32_relax_to_fp16 */ false,
808 /*apply_delegate=*/false);
809 compilation_status_ = ApplyDelegate();
810 }
811
812 template <typename T>
SetInput(std::initializer_list<T> data)813 void SetInput(std::initializer_list<T> data) {
814 PopulateTensor(input_, data);
815 }
816
GetOutput()817 std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
818
819 private:
820 int input_;
821 int output_;
822 };
823
TEST_F(NnApiSignedQuantizationTest,DequantizeUint8MapsToUint8OnSdk29)824 TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk29) {
825 // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
826 DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
827 127, 1);
828
829 m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
830 m.Invoke();
831 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
832
833 ASSERT_EQ(tensors_count->size(), 2);
834 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
835 tensors_count->end());
836 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
837 tensors_count->end());
838
839 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
840 1); // input
841 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
842 1); // output
843 }
844
TEST_F(NnApiSignedQuantizationTest,DequantizeUint8MapsToUint8OnSdk30)845 TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk30) {
846 nnapi_mock_->SetAndroidSdkVersion(30);
847 // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
848 DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
849 127, 1);
850
851 m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
852 m.Invoke();
853 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
854
855 ASSERT_EQ(tensors_count->size(), 2);
856 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
857 tensors_count->end());
858 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
859 tensors_count->end());
860
861 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
862 1); // input
863 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
864 1); // output
865 }
866
867 // Dequantize with Int8 input is only supported for symmetric quantization on
868 // SDK level 29
TEST_F(NnApiSignedQuantizationTest,DequantizeTestInt8SymmMapsToInt8SymmOnSdk29)869 TEST_F(NnApiSignedQuantizationTest,
870 DequantizeTestInt8SymmMapsToInt8SymmOnSdk29) {
871 // [-63.5, 64] -> scale=0.5, zero_point=0 for INT8
872 DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, 0,
873 2);
874
875 m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
876 m.Invoke();
877 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
878
879 ASSERT_EQ(tensors_count->size(), 2);
880 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
881 tensors_count->end());
882 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
883 tensors_count->end());
884
885 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
886 1); // input
887 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
888 1); // output
889 }
890
891 // Dequantize with Int8 input is only supported since SDK level 30.
TEST_F(NnApiSignedQuantizationTest,DequantizeTestInt8MapsToInt8OnSdk30)892 TEST_F(NnApiSignedQuantizationTest, DequantizeTestInt8MapsToInt8OnSdk30) {
893 nnapi_mock_->SetAndroidSdkVersion(30);
894 // [-63.5, 64] -> scale=0.5, zero_point=1 for INT8
895 DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, -1,
896 2);
897
898 m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
899 m.Invoke();
900 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
901
902 ASSERT_EQ(tensors_count->size(), 2);
903 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
904 tensors_count->end());
905 ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
906 tensors_count->end());
907
908 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
909 1); // input
910 EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
911 1); // output
912 }
913
914 } // namespace
915 } // namespace tflite
916