1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <sys/mman.h>
16
17 #include <algorithm>
18 #include <array>
19 #include <cstdint>
20 #include <iterator>
21 #include <memory>
22 #include <numeric>
23 #include <ostream>
24 #include <string>
25 #include <unordered_set>
26 #include <vector>
27
28 #include <gtest/gtest.h>
29 #include "tensorflow/lite/builtin_ops.h"
30 #include "tensorflow/lite/c/common.h"
31 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
32 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
33 #include "tensorflow/lite/interpreter.h"
34 #include "tensorflow/lite/kernels/test_util.h"
35 #include "tensorflow/lite/model.h"
36 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
37 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
38
39 namespace tflite {
40 namespace {
41
42 class FloatAddOpModel : public SingleOpModel {
43 public:
44 FloatAddOpModel() = default;
Init(const NnApi * nnapi,tflite::StatefulNnApiDelegate::Options options,const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)45 void Init(const NnApi* nnapi, tflite::StatefulNnApiDelegate::Options options,
46 const TensorData& input1, const TensorData& input2,
47 const TensorData& output, ActivationFunctionType activation_type,
48 bool allow_fp32_relax_to_fp16 = false) {
49 stateful_delegate_ =
50 std::make_unique<StatefulNnApiDelegate>(nnapi, options);
51 SetDelegate(stateful_delegate_.get());
52
53 input1_ = AddInput(input1);
54 input2_ = AddInput(input2);
55 output_ = AddOutput(output);
56 SetBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
57 CreateAddOptions(builder_, activation_type).Union());
58 BuildInterpreter({GetShape(input1_), GetShape(input2_)}, /*num_threads=*/-1,
59 allow_fp32_relax_to_fp16, /*apply_delegate=*/false);
60 compilation_status_ = ApplyDelegate();
61 }
62
input1()63 int input1() { return input1_; }
input2()64 int input2() { return input2_; }
65
GetOutput()66 std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
67
GetCompilationStatus()68 TfLiteStatus GetCompilationStatus() { return compilation_status_; }
69
70 protected:
71 int input1_;
72 int input2_;
73 int output_;
74
75 private:
76 std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
77 TfLiteStatus compilation_status_;
78 };
79
80 struct NnApiDeviceSelectionTest
81 : ::tflite::delegate::nnapi::NnApiDelegateMockTest {
SetUptflite::__anone115d1810111::NnApiDeviceSelectionTest82 void SetUp() override {
83 ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
84 nnapi_mock_->GetDeviceCountReturnsCount<3>();
85 nnapi_mock_->StubGetDeviceWith(
86 [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int {
87 *device = reinterpret_cast<ANeuralNetworksDevice*>(devIndex + 1);
88 return 0;
89 });
90 nnapi_mock_->StubGetDeviceNameWith(
91 [](const ANeuralNetworksDevice* device, const char** name) -> int {
92 if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
93 *name = "dsp";
94 } else if (device == reinterpret_cast<ANeuralNetworksDevice*>(2)) {
95 *name = "gpu";
96 } else {
97 *name = "nnapi-reference";
98 }
99 return ANEURALNETWORKS_NO_ERROR;
100 });
101 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
102 [](const ANeuralNetworksModel* model,
103 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
104 bool* supportedOps) -> int {
105 supportedOps[0] = true;
106 return ANEURALNETWORKS_NO_ERROR;
107 });
108 }
InitWithOptionstflite::__anone115d1810111::NnApiDeviceSelectionTest109 void InitWithOptions(tflite::StatefulNnApiDelegate::Options options) {
110 m.Init(nnapi_mock_->GetNnApi(), options, {TensorType_FLOAT32, {1, 2, 2, 1}},
111 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
112 ActivationFunctionType_NONE);
113 m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
114 m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.5});
115 }
116 FloatAddOpModel m;
117 };
118
TEST_F(NnApiDeviceSelectionTest,DoesntSetDevicesWhenCpuAllowed)119 TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWhenCpuAllowed) {
120 nnapi_mock_->StubCompilationCreateForDevicesWith(
121 [](ANeuralNetworksModel* model,
122 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
123 ANeuralNetworksCompilation** compilation) -> int {
124 EXPECT_TRUE(false) << "Should not call createForDevices";
125 return 1;
126 });
127
128 tflite::StatefulNnApiDelegate::Options options;
129 options.disallow_nnapi_cpu = false;
130 InitWithOptions(options);
131 ASSERT_EQ(m.Invoke(), kTfLiteOk);
132 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
133 }
134
TEST_F(NnApiDeviceSelectionTest,SetsDeviceBasedOnOptions)135 TEST_F(NnApiDeviceSelectionTest, SetsDeviceBasedOnOptions) {
136 nnapi_mock_->CompilationCreateReturns<1>();
137 nnapi_mock_->StubCompilationCreateForDevicesWith(
138 [](ANeuralNetworksModel* model,
139 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
140 ANeuralNetworksCompilation** compilation) -> int {
141 EXPECT_EQ(numDevices, 1);
142 EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
143 if (numDevices != 1 ||
144 devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1)) {
145 return 1;
146 } else {
147 *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
148 return ANEURALNETWORKS_NO_ERROR;
149 }
150 });
151
152 tflite::StatefulNnApiDelegate::Options options;
153 options.accelerator_name = "dsp";
154 InitWithOptions(options);
155 ASSERT_EQ(m.Invoke(), kTfLiteOk);
156 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
157 }
158
TEST_F(NnApiDeviceSelectionTest,DisallowsCPUBasedOnOptions)159 TEST_F(NnApiDeviceSelectionTest, DisallowsCPUBasedOnOptions) {
160 nnapi_mock_->CompilationCreateReturns<1>();
161 nnapi_mock_->StubCompilationCreateForDevicesWith(
162 [](ANeuralNetworksModel* model,
163 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
164 ANeuralNetworksCompilation** compilation) -> int {
165 EXPECT_EQ(numDevices, 2);
166 EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
167 EXPECT_EQ(devices[1], reinterpret_cast<ANeuralNetworksDevice*>(2));
168 if (numDevices != 2 ||
169 devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1) ||
170 devices[1] != reinterpret_cast<ANeuralNetworksDevice*>(2)) {
171 return 1;
172 } else {
173 *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
174 return ANEURALNETWORKS_NO_ERROR;
175 }
176 });
177
178 tflite::StatefulNnApiDelegate::Options options;
179 options.disallow_nnapi_cpu = true;
180 InitWithOptions(options);
181 ASSERT_EQ(m.Invoke(), kTfLiteOk);
182 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
183 }
184
TEST_F(NnApiDeviceSelectionTest,DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled)185 TEST_F(NnApiDeviceSelectionTest,
186 DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled) {
187 // Only nnapi-reference is available on device
188 nnapi_mock_->GetDeviceCountReturnsCount<1>();
189 nnapi_mock_->GetDeviceNameReturnsName("nnapi-reference");
190
191 tflite::StatefulNnApiDelegate::Options options;
192 options.disallow_nnapi_cpu = false;
193 InitWithOptions(options);
194 ASSERT_EQ(m.Invoke(), kTfLiteOk);
195 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
196 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
197 }
198
TEST_F(NnApiDeviceSelectionTest,DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled)199 TEST_F(NnApiDeviceSelectionTest,
200 DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled) {
201 // Only nnapi-reference is available on device
202 nnapi_mock_->GetDeviceCountReturnsCount<1>();
203 nnapi_mock_->GetDeviceNameReturnsName("nnapi-reference");
204
205 tflite::StatefulNnApiDelegate::Options options;
206 options.disallow_nnapi_cpu = true;
207 InitWithOptions(options);
208 ASSERT_EQ(m.Invoke(), kTfLiteOk);
209 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
210 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
211 }
212
213 struct UnsupportedOperationOnDeviceTest
214 : ::tflite::delegate::nnapi::NnApiDelegateMockTest {};
215
216 class AcceleratedModel {
217 public:
GetDelegate()218 StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
219
220 protected:
221 // build a delegate with a target accelerator name.
AcceleratedModel(const NnApi * nnapi,const std::string & accelerator_name,int max_nnapi_partitions=0)222 AcceleratedModel(const NnApi* nnapi, const std::string& accelerator_name,
223 int max_nnapi_partitions = 0) {
224 StatefulNnApiDelegate::Options options;
225 options.accelerator_name = accelerator_name.c_str();
226 options.max_number_delegated_partitions = max_nnapi_partitions;
227 stateful_delegate_ =
228 std::make_unique<StatefulNnApiDelegate>(nnapi, options);
229 }
230
231 // build a delegate with no target accelerator name, can disable the NNAPI CPU
232 // fallback implementation using the disallow_nnapi_cpu flag.
AcceleratedModel(const NnApi * nnapi,bool disallow_nnapi_cpu,int max_nnapi_partitions=0)233 AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu,
234 int max_nnapi_partitions = 0) {
235 StatefulNnApiDelegate::Options options;
236 options.disallow_nnapi_cpu = disallow_nnapi_cpu;
237 options.max_number_delegated_partitions = max_nnapi_partitions;
238 stateful_delegate_ =
239 std::make_unique<StatefulNnApiDelegate>(nnapi, options);
240 }
241
242 private:
243 std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
244 };
245
246 class ArgMaxOpModel : public SingleOpModel, public AcceleratedModel {
247 public:
ArgMaxOpModel(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type,const NnApi * nnapi,const char * device_name)248 ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
249 int axis_value, TensorType output_type, const NnApi* nnapi,
250 const char* device_name)
251 : SingleOpModel(), AcceleratedModel(nnapi, device_name) {
252 Init(input_shape, input_type, axis_value, output_type);
253 }
254
ArgMaxOpModel(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type,const NnApi * nnapi,bool disallow_nnapi_cpu)255 ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
256 int axis_value, TensorType output_type, const NnApi* nnapi,
257 bool disallow_nnapi_cpu)
258 : SingleOpModel(), AcceleratedModel(nnapi, disallow_nnapi_cpu) {
259 Init(input_shape, input_type, axis_value, output_type);
260 }
261
input() const262 int input() const { return input_; }
263
264 protected:
265 int input_;
266 int axis_;
267 int output_;
268
Init(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type)269 void Init(std::initializer_list<int> input_shape, TensorType input_type,
270 int axis_value, TensorType output_type) {
271 SetDelegate(GetDelegate());
272 input_ = AddInput(input_type);
273 axis_ = AddConstInput(TensorType_INT32, {axis_value}, {1});
274 output_ = AddOutput(output_type);
275
276 SetBuiltinOp(BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions,
277 CreateArgMaxOptions(builder_, output_type).Union());
278 BuildInterpreter({input_shape, {1}});
279 }
280 };
281
TEST_F(UnsupportedOperationOnDeviceTest,ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice)282 TEST_F(UnsupportedOperationOnDeviceTest,
283 ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice) {
284 nnapi_mock_->SetAndroidSdkVersion(29);
285 nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
286 // Setting this here because I want the delegate not to be applied in the
287 // first case because the feature level is not high enough and not because the
288 // operations are not supported by the device.
289 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
290 [](const ANeuralNetworksModel* model,
291 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
292 bool* supportedOps) -> int {
293 std::fill(supportedOps, supportedOps + 1, true);
294 return ANEURALNETWORKS_NO_ERROR;
295 });
296
297 ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
298 TensorType_INT32, nnapi_mock_->GetNnApi(), "test-device");
299 m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
300 ASSERT_EQ(m.Invoke(), kTfLiteOk);
301
302 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
303 << "Expected Max not to be delegates since it not supported before NNAPI "
304 "1.2 and device declares to support only NNAPI 1.1.";
305
306 nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
307
308 ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
309 TensorType_INT32, nnapi_mock_->GetNnApi(), "test-device");
310 m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
311 ASSERT_EQ(m1.Invoke(), kTfLiteOk);
312
313 EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
314 << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
315 }
316
TEST_F(UnsupportedOperationOnDeviceTest,ShouldUseDeviceFeatureLevelWhenDisablingCPU)317 TEST_F(UnsupportedOperationOnDeviceTest,
318 ShouldUseDeviceFeatureLevelWhenDisablingCPU) {
319 nnapi_mock_->SetAndroidSdkVersion(29);
320 nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
321 // Setting this here because I want the delegate not to be applied in the
322 // first case because the feature level is not high enough and not because the
323 // operations are not supported by the device.
324 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
325 [](const ANeuralNetworksModel* model,
326 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
327 bool* supportedOps) -> int {
328 std::fill(supportedOps, supportedOps + 1, true);
329 return ANEURALNETWORKS_NO_ERROR;
330 });
331
332 ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
333 TensorType_INT32, nnapi_mock_->GetNnApi(),
334 /*disallow_nnapi_cpu=*/true);
335 m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
336 ASSERT_EQ(m.Invoke(), kTfLiteOk);
337
338 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
339 << "Expected Max not to be delegates since it not supported before NNAPI "
340 "1.2 and device declares to support only NNAPI 1.1.";
341
342 ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
343 TensorType_INT32, nnapi_mock_->GetNnApi(),
344 /*disallow_nnapi_cpu=*/false);
345 m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
346 ASSERT_EQ(m1.Invoke(), kTfLiteOk);
347
348 EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
349 << "Expected Max op to be delegated since we enabled NNAPI CPU "
350 "implementation.";
351
352 nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
353
354 ArgMaxOpModel m2({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
355 TensorType_INT32, nnapi_mock_->GetNnApi(),
356 /*disallow_nnapi_cpu=*/true);
357 m2.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
358 ASSERT_EQ(m2.Invoke(), kTfLiteOk);
359
360 EXPECT_EQ(m2.CountOpsExecutedByCpuKernel(), 0)
361 << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
362 }
363
364 // This is a model with two ops:
365 //
366 // input1 ---->
367 // ADD --
368 // input2 --> |
369 // -->
370 // SUB --> output
371 // input3 ---------------->
372 //
373 class AddSubOpsAcceleratedModel : public MultiOpModel, public AcceleratedModel {
374 public:
AddSubOpsAcceleratedModel(const TensorData & input1,const TensorData & input2,const TensorData & input3,const TensorData & output,ActivationFunctionType activation_type,const NnApi * nnapi,const std::string & accelerator_name,bool allow_fp32_relax_to_fp16=false)375 AddSubOpsAcceleratedModel(const TensorData& input1, const TensorData& input2,
376 const TensorData& input3, const TensorData& output,
377 ActivationFunctionType activation_type,
378 const NnApi* nnapi,
379 const std::string& accelerator_name,
380 bool allow_fp32_relax_to_fp16 = false)
381 : MultiOpModel(), AcceleratedModel(nnapi, accelerator_name) {
382 SetDelegate(GetDelegate());
383 Init(input1, input2, input3, output, activation_type,
384 allow_fp32_relax_to_fp16);
385 }
386
input1()387 int input1() { return input1_; }
input2()388 int input2() { return input2_; }
input3()389 int input3() { return input3_; }
390
GetOutput()391 std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
392
393 protected:
394 int input1_;
395 int input2_;
396 int input3_;
397 int output_;
398
399 private:
400 // Performs initialization logic shared across all constructors.
Init(const TensorData & input1,const TensorData & input2,const TensorData & input3,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)401 void Init(const TensorData& input1, const TensorData& input2,
402 const TensorData& input3, const TensorData& output,
403 ActivationFunctionType activation_type,
404 bool allow_fp32_relax_to_fp16 = false) {
405 input1_ = AddInput(input1);
406 input2_ = AddInput(input2);
407 input3_ = AddInput(input3);
408 const int add_output = AddInnerTensor<float>(output);
409 output_ = AddOutput(output);
410 AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
411 CreateAddOptions(builder_, activation_type).Union(),
412 {input1_, input2_}, {add_output});
413 AddBuiltinOp(BuiltinOperator_SUB, BuiltinOptions_SubOptions,
414 CreateSubOptions(builder_, activation_type).Union(),
415 {add_output, input3_}, {output_});
416 BuildInterpreter({GetShape(input1_), GetShape(input2_), GetShape(input3_)},
417 /*num_threads=*/-1, allow_fp32_relax_to_fp16,
418 /*apply_delegate=*/true);
419 }
420 };
421
422 int should_build_model_with_sup_ops_compilation_model_create_count = 0;
423 int should_build_model_with_sup_ops_add_operation_count = 0;
TEST_F(UnsupportedOperationOnDeviceTest,ShouldBuildModelWithOnlyDeviceSupportedOps)424 TEST_F(UnsupportedOperationOnDeviceTest,
425 ShouldBuildModelWithOnlyDeviceSupportedOps) {
426 nnapi_mock_->SetNnapiSupportedDevice("test-device");
427
428 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
429 [](const ANeuralNetworksModel* model,
430 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
431 bool* supportedOps) -> int {
432 // Returning the first as supported since this will leverage
433 // the assertion on caching.
434 supportedOps[0] = true;
435 supportedOps[1] = false;
436 return ANEURALNETWORKS_NO_ERROR;
437 });
438
439 nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
440 ++should_build_model_with_sup_ops_compilation_model_create_count;
441 *model = reinterpret_cast<ANeuralNetworksModel*>(1);
442 return ANEURALNETWORKS_NO_ERROR;
443 });
444
445 nnapi_mock_->StubAddOperationWith(
446 [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
447 uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
448 const uint32_t* outputs) -> int {
449 ++should_build_model_with_sup_ops_add_operation_count;
450 return ANEURALNETWORKS_NO_ERROR;
451 });
452
453 AddSubOpsAcceleratedModel m(
454 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
455 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
456 ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
457 /*accelerator_name=*/"test-device");
458 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
459 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
460 m.PopulateTensor<float>(m.input1(), input1);
461 m.PopulateTensor<float>(m.input2(), input2);
462 m.PopulateTensor<float>(m.input3(), input2);
463 ASSERT_EQ(m.Invoke(), kTfLiteOk);
464
465 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
466 ASSERT_EQ(should_build_model_with_sup_ops_compilation_model_create_count, 2)
467 << "Model with unsupported operations has been cached";
468 EXPECT_EQ(should_build_model_with_sup_ops_add_operation_count, 3)
469 << "The second model should contain only one operation";
470 }
471
TEST_F(UnsupportedOperationOnDeviceTest,ShouldRunOnCpuIfDeviceSupportsNoOps)472 TEST_F(UnsupportedOperationOnDeviceTest, ShouldRunOnCpuIfDeviceSupportsNoOps) {
473 nnapi_mock_->SetNnapiSupportedDevice("test-device");
474
475 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
476 [](const ANeuralNetworksModel* model,
477 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
478 bool* supportedOps) -> int {
479 std::fill(supportedOps, supportedOps + 2, false);
480 return ANEURALNETWORKS_NO_ERROR;
481 });
482
483 AddSubOpsAcceleratedModel m(
484 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
485 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
486 ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
487 /*accelerator_name=*/"test-device");
488 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
489 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
490 m.PopulateTensor<float>(m.input1(), input1);
491 m.PopulateTensor<float>(m.input2(), input2);
492 m.PopulateTensor<float>(m.input3(), input2);
493 ASSERT_EQ(m.Invoke(), kTfLiteOk);
494
495 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 2);
496 }
497
498 int should_cache_model_compilation_model_create_count = 0;
TEST_F(UnsupportedOperationOnDeviceTest,ShouldCacheModelCompilation)499 TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
500 nnapi_mock_->SetNnapiSupportedDevice("test-device");
501
502 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
503 [](const ANeuralNetworksModel* model,
504 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
505 bool* supportedOps) -> int {
506 std::fill(supportedOps, supportedOps + 2, true);
507 return ANEURALNETWORKS_NO_ERROR;
508 });
509
510 nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
511 ++should_cache_model_compilation_model_create_count;
512 *model = reinterpret_cast<ANeuralNetworksModel*>(1);
513 return ANEURALNETWORKS_NO_ERROR;
514 });
515
516 AddSubOpsAcceleratedModel m(
517 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
518 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
519 ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
520 /*accelerator_name=*/"test-device");
521 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
522 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
523 m.PopulateTensor<float>(m.input1(), input1);
524 m.PopulateTensor<float>(m.input2(), input2);
525 m.PopulateTensor<float>(m.input3(), input2);
526 ASSERT_EQ(m.Invoke(), kTfLiteOk);
527
528 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
529 EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
530 }
531
TEST_F(UnsupportedOperationOnDeviceTest,ShouldNotApplySupportedOperationsFilterBeforeAndroidSdk29)532 TEST_F(UnsupportedOperationOnDeviceTest,
533 ShouldNotApplySupportedOperationsFilterBeforeAndroidSdk29) {
534 nnapi_mock_->SetAndroidSdkVersion(28, /*set_unsupported_ops_to_null=*/true);
535 nnapi_mock_->ModelCreateReturns<0>();
536 AddSubOpsAcceleratedModel m(
537 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
538 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
539 ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
540 /*accelerator_name=*/"test-device");
541 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
542 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
543 m.PopulateTensor<float>(m.input1(), input1);
544 m.PopulateTensor<float>(m.input2(), input2);
545 m.PopulateTensor<float>(m.input3(), input2);
546 ASSERT_EQ(m.Invoke(), kTfLiteOk);
547
548 // Delegation succeded without failures and all nodes have been delegated.
549 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
550 }
551
552 // This is a model with two ops:
553 //
554 // input1 ----> HARD_SWISH ---->
555 // ADD --> output
556 // input2 ---------------------->
557 //
558 class HardSwishAddOpsAcceleratedModel : public MultiOpModel,
559 public AcceleratedModel {
560 public:
HardSwishAddOpsAcceleratedModel(const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,const NnApi * nnapi,const std::string & accelerator_name,bool allow_fp32_relax_to_fp16=false)561 HardSwishAddOpsAcceleratedModel(const TensorData& input1,
562 const TensorData& input2,
563 const TensorData& output,
564 ActivationFunctionType activation_type,
565 const NnApi* nnapi,
566 const std::string& accelerator_name,
567 bool allow_fp32_relax_to_fp16 = false)
568 : MultiOpModel(), AcceleratedModel(nnapi, accelerator_name) {
569 SetDelegate(GetDelegate());
570 Init(input1, input2, output, activation_type, allow_fp32_relax_to_fp16);
571 }
572
input1()573 int input1() { return input1_; }
input2()574 int input2() { return input2_; }
575
GetOutput()576 std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
577
578 protected:
579 int input1_;
580 int input2_;
581 int output_;
582
583 private:
584 // Performs initialization logic shared across all constructors.
Init(const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)585 void Init(const TensorData& input1, const TensorData& input2,
586 const TensorData& output, ActivationFunctionType activation_type,
587 bool allow_fp32_relax_to_fp16 = false) {
588 input1_ = AddInput(input1);
589 input2_ = AddInput(input2);
590 const int hard_swish_output = AddInnerTensor<float>(output);
591 output_ = AddOutput(output);
592 AddBuiltinOp(BuiltinOperator_HARD_SWISH, BuiltinOptions_HardSwishOptions,
593 CreateHardSwishOptions(builder_).Union(), {input1_},
594 {hard_swish_output});
595 AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
596 CreateAddOptions(builder_, activation_type).Union(),
597 {input1_, hard_swish_output}, {output_});
598 BuildInterpreter({GetShape(input1_), GetShape(input2_)}, /*num_threads=*/-1,
599 allow_fp32_relax_to_fp16, /*apply_delegate=*/true);
600 }
601 };
602
603 struct TfLiteOpMappedToMultipleNnApiOps
604 : ::tflite::delegate::nnapi::NnApiDelegateMockTest {};
605
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AllCostituentOpsNotSupported)606 TEST_F(TfLiteOpMappedToMultipleNnApiOps, AllCostituentOpsNotSupported) {
607 nnapi_mock_->ModelCreateReturns<0>();
608
609 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
610 [](const ANeuralNetworksModel* model,
611 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
612 bool* supportedOps) -> int {
613 // HardSwish is mapped to 4 NNAPI ops, none of which supported.
614 std::fill(supportedOps, supportedOps + 4, false);
615 // After that we have the ADD op that is supported.
616 supportedOps[4] = true;
617 return ANEURALNETWORKS_NO_ERROR;
618 });
619
620 HardSwishAddOpsAcceleratedModel m(
621 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
622 {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
623 nnapi_mock_->GetNnApi(),
624 /*accelerator_name=*/"test-device");
625 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
626 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
627 m.PopulateTensor<float>(m.input1(), input1);
628 m.PopulateTensor<float>(m.input2(), input2);
629 ASSERT_EQ(m.Invoke(), kTfLiteOk);
630
631 // Delegation succeded without failures and HardSwish has not been delegated
632 // but Add has been correctly delegated.
633 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
634 }
635
TEST_F(TfLiteOpMappedToMultipleNnApiOps,NotAllConstitutentOpsSupported)636 TEST_F(TfLiteOpMappedToMultipleNnApiOps, NotAllConstitutentOpsSupported) {
637 nnapi_mock_->ModelCreateReturns<0>();
638 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
639 [](const ANeuralNetworksModel* model,
640 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
641 bool* supportedOps) -> int {
642 // HardSwish is mapped to 4 NNAPI ops (the first 4 ones), so we have 5
643 // ops in the NNAPI model.
644 std::fill(supportedOps, supportedOps + 5, true);
645 // One of the NNAPI ops required by HardSwish is not supported.
646 supportedOps[2] = false;
647 return ANEURALNETWORKS_NO_ERROR;
648 });
649
650 HardSwishAddOpsAcceleratedModel m(
651 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
652 {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
653 nnapi_mock_->GetNnApi(),
654 /*accelerator_name=*/"test-device");
655 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
656 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
657 m.PopulateTensor<float>(m.input1(), input1);
658 m.PopulateTensor<float>(m.input2(), input2);
659 ASSERT_EQ(m.Invoke(), kTfLiteOk);
660
661 // Delegation succeded without failures. HardSwish has not been delegated
662 // but Add is delegated.
663 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
664 }
665
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AllConstitutentOpsSupported)666 TEST_F(TfLiteOpMappedToMultipleNnApiOps, AllConstitutentOpsSupported) {
667 nnapi_mock_->ModelCreateReturns<0>();
668 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
669 [](const ANeuralNetworksModel* model,
670 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
671 bool* supportedOps) -> int {
672 // HardSwish is mapped to 4 NNAPI ops (the first 4 ones), so we have 5
673 // ops in the NNAPI model.
674 // All ops are supported by the accelerator.
675 std::fill(supportedOps, supportedOps + 5, true);
676 return ANEURALNETWORKS_NO_ERROR;
677 });
678
679 HardSwishAddOpsAcceleratedModel m(
680 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
681 {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
682 nnapi_mock_->GetNnApi(),
683 /*accelerator_name=*/"test-device");
684 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
685 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
686 m.PopulateTensor<float>(m.input1(), input1);
687 m.PopulateTensor<float>(m.input2(), input2);
688 ASSERT_EQ(m.Invoke(), kTfLiteOk);
689
690 // Delegation succeded without failures and all nodes have been delegated.
691 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
692 }
693
694 class QuantizedWeightsConvolutionOpModel : public SingleOpModel,
695 public AcceleratedModel {
696 public:
QuantizedWeightsConvolutionOpModel(const NnApi * nnapi,std::string accelerator_name,const TensorData & input,const TensorData & filter,const TensorData & output,int stride_width=2,int stride_height=2,enum Padding padding=Padding_VALID,enum ActivationFunctionType activation=ActivationFunctionType_NONE,int dilation_width_factor=1,int dilation_height_factor=1,int num_threads=-1,std::initializer_list<uint8_t> filter_data={})697 QuantizedWeightsConvolutionOpModel(
698 const NnApi* nnapi, std::string accelerator_name, const TensorData& input,
699 const TensorData& filter, const TensorData& output, int stride_width = 2,
700 int stride_height = 2, enum Padding padding = Padding_VALID,
701 enum ActivationFunctionType activation = ActivationFunctionType_NONE,
702 int dilation_width_factor = 1, int dilation_height_factor = 1,
703 int num_threads = -1, std::initializer_list<uint8_t> filter_data = {})
704 : SingleOpModel(), AcceleratedModel(nnapi, accelerator_name) {
705 SetDelegate(GetDelegate());
706
707 input_ = AddInput(input);
708
709 if (filter_data.size()) {
710 filter_ = AddConstInput(filter, filter_data);
711 } else {
712 filter_ = AddInput(filter);
713 }
714
715 int bias_size = GetShape(filter_)[0];
716
717 bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
718
719 output_ = AddOutput(output);
720
721 SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
722 CreateConv2DOptions(
723 builder_, padding, stride_width, stride_height, activation,
724 dilation_width_factor, dilation_height_factor)
725 .Union());
726
727 BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
728 num_threads, /*allow_fp32_relax_to_fp16=*/false,
729 /*apply_delegate=*/true);
730 }
731
SetInput(std::initializer_list<float> data)732 void SetInput(std::initializer_list<float> data) {
733 PopulateTensor(input_, data);
734 }
735
SetFilter(std::initializer_list<float> data)736 void SetFilter(std::initializer_list<float> data) {
737 QuantizeAndPopulate<uint8_t>(filter_, data);
738 }
739
SetBias(std::initializer_list<float> data)740 void SetBias(std::initializer_list<float> data) {
741 PopulateTensor(input_, data);
742 }
743
GetOutput()744 std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
GetDequantizedOutput()745 std::vector<float> GetDequantizedOutput() {
746 return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
747 GetScale(output_), GetZeroPoint(output_));
748 }
749
750 protected:
751 int input_;
752 int filter_;
753 int bias_;
754 int output_;
755 };
756
757 int quantized_conv2d_model_added_nnapi_ops_count = 0;
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AddedDequantizationsAreAccountedInModelOps)758 TEST_F(TfLiteOpMappedToMultipleNnApiOps,
759 AddedDequantizationsAreAccountedInModelOps) {
760 nnapi_mock_->ModelCreateReturns<0>();
761 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
762 [](const ANeuralNetworksModel* model,
763 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
764 bool* supportedOps) -> int {
765 std::fill(supportedOps,
766 supportedOps + quantized_conv2d_model_added_nnapi_ops_count,
767 true);
768 return ANEURALNETWORKS_NO_ERROR;
769 });
770 nnapi_mock_->StubAddOperationWith(
771 [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
772 uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
773 const uint32_t* outputs) -> int {
774 ++quantized_conv2d_model_added_nnapi_ops_count;
775 return ANEURALNETWORKS_NO_ERROR;
776 });
777
778 QuantizedWeightsConvolutionOpModel m(
779 nnapi_mock_->GetNnApi(),
780 /*accelerator_name=*/"test-device", {TensorType_FLOAT32, {2, 2, 4, 1}},
781 {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64}, {TensorType_FLOAT32, {}});
782 m.SetInput({
783 // First batch
784 1, 1, 1, 1, // row = 1
785 2, 2, 2, 2, // row = 2
786 // Second batch
787 1, 2, 3, 4, // row = 1
788 1, 2, 3, 4, // row = 2
789 });
790 m.SetFilter({
791 1, 2, 3, 4, // first 2x2 filter
792 -1, 1, -1, 1, // second 2x2 filter
793 -1, -1, 1, 1, // third 2x2 filter
794 });
795 m.SetBias({1, 2, 3});
796
797 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
798 // When delegating quantized Conv2D, for each quantized inputs a
799 // dequantize operation is added to the model.
800 // In our case 1 Dequantize op for the weights is expected generating
801 // a 2 ops model.
802 EXPECT_EQ(quantized_conv2d_model_added_nnapi_ops_count, 2);
803 }
804
805 // Model with a chain of no-op (add with zero operations)
806 // interleaved with no-op custom nodes.
807 class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
808 public:
LongIdentityModel(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes,const NnApi * nnapi,const std::string & accelerator_name,int max_nnapi_partitions)809 LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
810 const std::unordered_set<int>& custom_nodes_indexes,
811 const NnApi* nnapi, const std::string& accelerator_name,
812 int max_nnapi_partitions)
813 : MultiOpModel(),
814 AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
815 Init(input_shape, graph_size, custom_nodes_indexes);
816 }
817
LongIdentityModel(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes,const NnApi * nnapi,int max_nnapi_partitions)818 LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
819 const std::unordered_set<int>& custom_nodes_indexes,
820 const NnApi* nnapi, int max_nnapi_partitions)
821 : MultiOpModel(), AcceleratedModel(nnapi, false, max_nnapi_partitions) {
822 Init(input_shape, graph_size, custom_nodes_indexes);
823 }
824
SetInput(std::vector<float> value)825 void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
826
CountNnApiPartitions()827 int CountNnApiPartitions() {
828 return std::count_if(
829 std::begin(interpreter_->execution_plan()),
830 std::end(interpreter_->execution_plan()), [this](const int node_index) {
831 return interpreter_->node_and_registration(node_index)
832 ->first.delegate != nullptr;
833 });
834 }
835
836 private:
Init(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes)837 void Init(const std::vector<int>& input_shape, int graph_size,
838 const std::unordered_set<int>& custom_nodes_indexes) {
839 SetDelegate(GetDelegate());
840
841 const TensorData tensor_data{TensorType_FLOAT32, input_shape};
842
843 input_ = AddInput(tensor_data);
844 zero_input_ = AddInput(tensor_data);
845
846 std::vector<int> intermediate_outputs(graph_size - 1);
847 std::generate(
848 std::begin(intermediate_outputs), std::end(intermediate_outputs),
849 [this, &tensor_data]() { return AddInnerTensor<float>(tensor_data); });
850
851 output_ = AddOutput(tensor_data);
852
853 AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
854 CreateAddOptions(builder_).Union(), {input_, zero_input_},
855 {intermediate_outputs[0]});
856
857 for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
858 if (custom_nodes_indexes.count(i + 1) == 1) {
859 AddCustomOp("custom_no_op", {}, [this]() { return CustomNoOpNode(); },
860 {intermediate_outputs[i]}, {intermediate_outputs[i + 1]});
861 } else {
862 AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
863 CreateAddOptions(builder_).Union(),
864 {intermediate_outputs[i], zero_input_},
865 {intermediate_outputs[i + 1]});
866 }
867 }
868
869 AddBuiltinOp(
870 BuiltinOperator_ADD, BuiltinOptions_AddOptions,
871 CreateAddOptions(builder_).Union(),
872 {intermediate_outputs[intermediate_outputs.size() - 1], zero_input_},
873 {output_});
874
875 BuildInterpreter({GetShape(input_), GetShape(zero_input_)});
876
877 std::vector<float> zero(GetTensorSize(input_), 0.0);
878 PopulateTensor(zero_input_, zero);
879 }
880
881 // Return the registration of a custom node simply copying input to output.
CustomNoOpNode()882 TfLiteRegistration* CustomNoOpNode() {
883 static TfLiteRegistration no_op = {
884 .init = [](TfLiteContext* context, const char* buffer,
885 size_t length) -> void* { return nullptr; },
886
887 .free = [](TfLiteContext* context, void* buffer) -> void {},
888
889 .prepare = [](TfLiteContext* context,
890 TfLiteNode* node) -> TfLiteStatus {
891 if (node->inputs->size != 1 || node->outputs->size != 1) {
892 return kTfLiteError;
893 }
894
895 return kTfLiteOk;
896 },
897
898 .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
899 auto input_tensor = context->tensors[node->inputs->data[0]];
900 auto output_tensor = context->tensors[node->outputs->data[0]];
901
902 std::copy(input_tensor.data.raw,
903 input_tensor.data.raw + input_tensor.bytes,
904 output_tensor.data.raw);
905
906 return kTfLiteOk;
907 },
908
909 .profiling_string = nullptr,
910 .builtin_code = kTfLiteBuiltinDelegate,
911 .custom_name = "NoOpTestDelegate",
912 .version = 1,
913 };
914
915 return &no_op;
916 }
917 int input_;
918 int zero_input_;
919 int output_;
920 };
921
922 class NodeFilter {
923 public:
ConfigureSupportedNodes(int graph_size,const std::unordered_set<int> & unsupported_indexes)924 void ConfigureSupportedNodes(
925 int graph_size, const std::unordered_set<int>& unsupported_indexes) {
926 graph_size_ = graph_size;
927 unsupported_indexes_ = unsupported_indexes;
928 }
929
SetNodeSupport(bool * supported_ops)930 void SetNodeSupport(bool* supported_ops) {
931 for (int i = 0; i < graph_size_; i++) {
932 supported_ops[i] = (unsupported_indexes_.count(i) == 0);
933 }
934 }
935
936 private:
937 int graph_size_;
938 std::unordered_set<int> unsupported_indexes_;
939 };
940
941 // Using the same node filter for all DelegatePartitionLimitTests
942 // because StubGetSupportedOperationsForDevicesWith wants a C function.
DelegatePartitionLimitTestNodeFilter()943 NodeFilter* DelegatePartitionLimitTestNodeFilter() {
944 static NodeFilter* node_filter = new NodeFilter();
945 return node_filter;
946 }
947
948 class DelegatePartitionLimitTest
949 : public ::tflite::delegate::nnapi::NnApiDelegateMockTest {
950 protected:
951 // Configure the underlying graph to generate a set of nnapi partition
952 // with the sizes specified in nnapi_partition_sizes and the given
953 // input_shape.
Init(int max_nnapi_partitions,const std::vector<int> & nnapi_partition_sizes,const std::vector<int> & input_shape,bool specify_accelerator=true)954 void Init(int max_nnapi_partitions,
955 const std::vector<int>& nnapi_partition_sizes,
956 const std::vector<int>& input_shape,
957 bool specify_accelerator = true) {
958 // The graph will have as number of nodes the sum of nodes in the NNAPI
959 // partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
960 // not supported by NNAPI and will cause the
961 graph_size_ = std::accumulate(std::begin(nnapi_partition_sizes),
962 std::end(nnapi_partition_sizes),
963 nnapi_partition_sizes.size() - 1);
964
965 std::unordered_set<int> unsupported_ops_idxs;
966 int partition_node_idx = -1;
967 for (int i = 0; i < nnapi_partition_sizes.size() - 1; i++) {
968 partition_node_idx += nnapi_partition_sizes[i] + 1;
969 unsupported_ops_idxs.insert(partition_node_idx);
970 }
971
972 if (specify_accelerator) {
973 // Building a model that will contain initially a single partition
974 // and will get then partitioned by checking the operations supported
975 // by the target accelerator.
976 // This because I am not able to know the size of each partition in my
977 // stubbed GetSupportedOperationsForDevices API.
978 DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
979 graph_size_, unsupported_ops_idxs);
980
981 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
982 [](const ANeuralNetworksModel* model,
983 const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
984 bool* supported_ops) -> int {
985 DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(
986 supported_ops);
987 return ANEURALNETWORKS_NO_ERROR;
988 });
989
990 model_ = std::make_unique<LongIdentityModel>(
991 input_shape, graph_size_,
992 /*custom_nodes_indexes=*/std::unordered_set<int>(),
993 nnapi_mock_->GetNnApi(),
994 /*accelerator_name=*/"test-device", max_nnapi_partitions);
995 } else {
996 // Building a model containing custom nodes that won't be supported
997 // by the delegate and generate the partitions.
998 model_ = std::make_unique<LongIdentityModel>(
999 input_shape, graph_size_, unsupported_ops_idxs,
1000 nnapi_mock_->GetNnApi(), max_nnapi_partitions);
1001 }
1002 }
1003
1004 std::unique_ptr<LongIdentityModel> model_;
1005
OriginalGraphSize()1006 int OriginalGraphSize() { return graph_size_; }
1007
1008 private:
1009 int graph_size_;
1010 };
1011
TEST_F(DelegatePartitionLimitTest,ShouldDelegateOnePartitionOnly)1012 TEST_F(DelegatePartitionLimitTest, ShouldDelegateOnePartitionOnly) {
1013 Init(/*max_nnapi_partitions=*/1,
1014 /*nnapi_partition_sizes=*/{3, 2},
1015 /*input_shape=*/{1, 2, 2, 1});
1016
1017 EXPECT_EQ(model_->CountNnApiPartitions(), 1);
1018 }
1019
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfLimitIsZero)1020 TEST_F(DelegatePartitionLimitTest,
1021 ShouldDelegateAllPossiblePartitionsIfLimitIsZero) {
1022 Init(/*max_nnapi_partitions=*/0,
1023 /*nnapi_partition_sizes=*/{3, 2},
1024 /*input_shape=*/{1, 2, 2, 1});
1025
1026 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1027 }
1028
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfLimitIsNegative)1029 TEST_F(DelegatePartitionLimitTest,
1030 ShouldDelegateAllPossiblePartitionsIfLimitIsNegative) {
1031 Init(/*max_nnapi_partitions=*/0,
1032 /*nnapi_partition_sizes=*/{3, 2},
1033 /*input_shape=*/{1, 2, 2, 1});
1034
1035 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1036 }
1037
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfBelowLimit)1038 TEST_F(DelegatePartitionLimitTest,
1039 ShouldDelegateAllPossiblePartitionsIfBelowLimit) {
1040 Init(/*max_nnapi_partitions=*/3,
1041 /*nnapi_partition_sizes=*/{3, 2},
1042 /*input_shape=*/{1, 2, 2, 1});
1043
1044 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1045 }
1046
TEST_F(DelegatePartitionLimitTest,ShouldDelegatePartitionWithHigherNodeCount)1047 TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
1048 int kLargestModelSize = 3;
1049 Init(/*max_nnapi_partitions=*/1,
1050 /*nnapi_partition_sizes=*/{3, 2},
1051 /*input_shape=*/{1, 2, 2, 1});
1052
1053 EXPECT_EQ(model_->CountNnApiPartitions(), 1);
1054 EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(),
1055 OriginalGraphSize() - kLargestModelSize);
1056 }
1057
TEST_F(DelegatePartitionLimitTest,ShouldDelegatePartitionsWithHigherNodeCount)1058 TEST_F(DelegatePartitionLimitTest,
1059 ShouldDelegatePartitionsWithHigherNodeCount) {
1060 int kLargestModelSize = 5;
1061 int kSecondLargestModelSize = 4;
1062 Init(/*max_nnapi_partitions=*/2,
1063 /*nnapi_partition_sizes=*/
1064 {1, kLargestModelSize, 2, kSecondLargestModelSize},
1065 /*input_shape=*/{1, 2, 2, 1});
1066
1067 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1068 EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
1069 }
1070
TEST_F(DelegatePartitionLimitTest,ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified)1071 TEST_F(DelegatePartitionLimitTest,
1072 ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified) {
1073 int kLargestModelSize = 5;
1074 int kSecondLargestModelSize = 4;
1075 Init(/*max_nnapi_partitions=*/2,
1076 /*nnapi_partition_sizes=*/
1077 {1, kLargestModelSize, 2, kSecondLargestModelSize},
1078 /*input_shape=*/{1, 2, 2, 1}, /*specify_accelerator=*/false);
1079
1080 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1081 EXPECT_EQ(
1082 model_->CountOpsExecutedByCpuKernel(),
1083 OriginalGraphSize() - (kLargestModelSize + kSecondLargestModelSize));
1084 }
1085
1086 } // namespace
1087 } // namespace tflite
1088