1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <sys/mman.h>
16
17 #include <algorithm>
18 #include <array>
19 #include <cstdint>
20 #include <iterator>
21 #include <memory>
22 #include <numeric>
23 #include <ostream>
24 #include <unordered_set>
25 #include <vector>
26
27 #include <gtest/gtest.h>
28 #include "tensorflow/lite/builtin_ops.h"
29 #include "tensorflow/lite/c/common.h"
30 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
31 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
32 #include "tensorflow/lite/interpreter.h"
33 #include "tensorflow/lite/kernels/test_util.h"
34 #include "tensorflow/lite/model.h"
35 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
36 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
37
38 namespace tflite {
39 namespace {
40
41 class FloatAddOpModel : public SingleOpModel {
42 public:
43 FloatAddOpModel() = default;
Init(const NnApi * nnapi,tflite::StatefulNnApiDelegate::Options options,const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)44 void Init(const NnApi* nnapi, tflite::StatefulNnApiDelegate::Options options,
45 const TensorData& input1, const TensorData& input2,
46 const TensorData& output, ActivationFunctionType activation_type,
47 bool allow_fp32_relax_to_fp16 = false) {
48 stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
49 SetDelegate(stateful_delegate_.get());
50
51 input1_ = AddInput(input1);
52 input2_ = AddInput(input2);
53 output_ = AddOutput(output);
54 SetBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
55 CreateAddOptions(builder_, activation_type).Union());
56 BuildInterpreter({GetShape(input1_), GetShape(input2_)}, /*num_threads=*/-1,
57 allow_fp32_relax_to_fp16, /*apply_delegate=*/false);
58 compilation_status_ = ApplyDelegate();
59 }
60
input1()61 int input1() { return input1_; }
input2()62 int input2() { return input2_; }
63
GetOutput()64 std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
65
GetCompilationStatus()66 TfLiteStatus GetCompilationStatus() { return compilation_status_; }
67
68 protected:
69 int input1_;
70 int input2_;
71 int output_;
72
73 private:
74 std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
75 TfLiteStatus compilation_status_;
76 };
77
78 struct NnApiDeviceSelectionTest
79 : ::tflite::delegate::nnapi::NnApiDelegateMockTest {
SetUptflite::__anone5dffd370111::NnApiDeviceSelectionTest80 void SetUp() override {
81 ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
82 nnapi_mock_->GetDeviceCountReturnsCount<3>();
83 nnapi_mock_->StubGetDeviceWith(
84 [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int {
85 *device = reinterpret_cast<ANeuralNetworksDevice*>(devIndex + 1);
86 return 0;
87 });
88 nnapi_mock_->StubGetDeviceNameWith(
89 [](const ANeuralNetworksDevice* device, const char** name) -> int {
90 if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
91 *name = "dsp";
92 } else if (device == reinterpret_cast<ANeuralNetworksDevice*>(2)) {
93 *name = "gpu";
94 } else {
95 *name = "nnapi-reference";
96 }
97 return ANEURALNETWORKS_NO_ERROR;
98 });
99 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
100 [](const ANeuralNetworksModel* model,
101 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
102 bool* supportedOps) -> int {
103 supportedOps[0] = true;
104 return ANEURALNETWORKS_NO_ERROR;
105 });
106 }
InitWithOptionstflite::__anone5dffd370111::NnApiDeviceSelectionTest107 void InitWithOptions(tflite::StatefulNnApiDelegate::Options options) {
108 m.Init(nnapi_mock_->GetNnApi(), options, {TensorType_FLOAT32, {1, 2, 2, 1}},
109 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
110 ActivationFunctionType_NONE);
111 m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
112 m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.5});
113 }
114 FloatAddOpModel m;
115 };
116
TEST_F(NnApiDeviceSelectionTest,DoesntSetDevicesWhenCpuAllowed)117 TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWhenCpuAllowed) {
118 nnapi_mock_->StubCompilationCreateForDevicesWith(
119 [](ANeuralNetworksModel* model,
120 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
121 ANeuralNetworksCompilation** compilation) -> int {
122 EXPECT_TRUE(false) << "Should not call createForDevices";
123 return 1;
124 });
125
126 tflite::StatefulNnApiDelegate::Options options;
127 options.disallow_nnapi_cpu = false;
128 InitWithOptions(options);
129 m.Invoke();
130 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
131 }
132
TEST_F(NnApiDeviceSelectionTest,SetsDeviceBasedOnOptions)133 TEST_F(NnApiDeviceSelectionTest, SetsDeviceBasedOnOptions) {
134 nnapi_mock_->CompilationCreateReturns<1>();
135 nnapi_mock_->StubCompilationCreateForDevicesWith(
136 [](ANeuralNetworksModel* model,
137 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
138 ANeuralNetworksCompilation** compilation) -> int {
139 EXPECT_EQ(numDevices, 1);
140 EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
141 if (numDevices != 1 ||
142 devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1)) {
143 return 1;
144 } else {
145 *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
146 return ANEURALNETWORKS_NO_ERROR;
147 }
148 });
149
150 tflite::StatefulNnApiDelegate::Options options;
151 options.accelerator_name = "dsp";
152 InitWithOptions(options);
153 m.Invoke();
154 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
155 }
156
TEST_F(NnApiDeviceSelectionTest,DisallowsCPUBasedOnOptions)157 TEST_F(NnApiDeviceSelectionTest, DisallowsCPUBasedOnOptions) {
158 nnapi_mock_->CompilationCreateReturns<1>();
159 nnapi_mock_->StubCompilationCreateForDevicesWith(
160 [](ANeuralNetworksModel* model,
161 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
162 ANeuralNetworksCompilation** compilation) -> int {
163 EXPECT_EQ(numDevices, 2);
164 EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
165 EXPECT_EQ(devices[1], reinterpret_cast<ANeuralNetworksDevice*>(2));
166 if (numDevices != 2 ||
167 devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1) ||
168 devices[1] != reinterpret_cast<ANeuralNetworksDevice*>(2)) {
169 return 1;
170 } else {
171 *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
172 return ANEURALNETWORKS_NO_ERROR;
173 }
174 });
175
176 tflite::StatefulNnApiDelegate::Options options;
177 options.disallow_nnapi_cpu = true;
178 InitWithOptions(options);
179 m.Invoke();
180 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
181 }
182
TEST_F(NnApiDeviceSelectionTest,DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled)183 TEST_F(NnApiDeviceSelectionTest,
184 DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled) {
185 // Only nnapi-reference is available on device
186 nnapi_mock_->GetDeviceCountReturnsCount<1>();
187 nnapi_mock_->GetDeviceNameReturnsName("nnapi-reference");
188
189 tflite::StatefulNnApiDelegate::Options options;
190 options.disallow_nnapi_cpu = false;
191 InitWithOptions(options);
192 m.Invoke();
193 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
194 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
195 }
196
TEST_F(NnApiDeviceSelectionTest,DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled)197 TEST_F(NnApiDeviceSelectionTest,
198 DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled) {
199 // Only nnapi-reference is available on device
200 nnapi_mock_->GetDeviceCountReturnsCount<1>();
201 nnapi_mock_->GetDeviceNameReturnsName("nnapi-reference");
202
203 tflite::StatefulNnApiDelegate::Options options;
204 options.disallow_nnapi_cpu = true;
205 InitWithOptions(options);
206 m.Invoke();
207 EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
208 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
209 }
210
211 struct UnsupportedOperationOnDeviceTest
212 : ::tflite::delegate::nnapi::NnApiDelegateMockTest {};
213
214 class AcceleratedModel {
215 public:
GetDelegate()216 StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
217
218 protected:
219 // build a delegate with a target accelerator name.
AcceleratedModel(const NnApi * nnapi,const std::string & accelerator_name,int max_nnapi_partitions=0)220 AcceleratedModel(const NnApi* nnapi, const std::string& accelerator_name,
221 int max_nnapi_partitions = 0) {
222 StatefulNnApiDelegate::Options options;
223 options.accelerator_name = accelerator_name.c_str();
224 options.max_number_delegated_partitions = max_nnapi_partitions;
225 stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
226 }
227
228 // build a delegate with no target accelerator name, can disable the NNAPI CPU
229 // fallback implementation using the disallow_nnapi_cpu flag.
AcceleratedModel(const NnApi * nnapi,bool disallow_nnapi_cpu,int max_nnapi_partitions=0)230 AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu,
231 int max_nnapi_partitions = 0) {
232 StatefulNnApiDelegate::Options options;
233 options.disallow_nnapi_cpu = disallow_nnapi_cpu;
234 options.max_number_delegated_partitions = max_nnapi_partitions;
235 stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
236 }
237
238 private:
239 std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
240 };
241
242 class ArgMaxOpModel : public SingleOpModel, public AcceleratedModel {
243 public:
ArgMaxOpModel(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type,const NnApi * nnapi,const char * device_name)244 ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
245 int axis_value, TensorType output_type, const NnApi* nnapi,
246 const char* device_name)
247 : SingleOpModel(), AcceleratedModel(nnapi, device_name) {
248 Init(input_shape, input_type, axis_value, output_type);
249 }
250
ArgMaxOpModel(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type,const NnApi * nnapi,bool disallow_nnapi_cpu)251 ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
252 int axis_value, TensorType output_type, const NnApi* nnapi,
253 bool disallow_nnapi_cpu)
254 : SingleOpModel(), AcceleratedModel(nnapi, disallow_nnapi_cpu) {
255 Init(input_shape, input_type, axis_value, output_type);
256 }
257
input() const258 int input() const { return input_; }
259
260 protected:
261 int input_;
262 int axis_;
263 int output_;
264
Init(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type)265 void Init(std::initializer_list<int> input_shape, TensorType input_type,
266 int axis_value, TensorType output_type) {
267 SetDelegate(GetDelegate());
268 input_ = AddInput(input_type);
269 axis_ = AddConstInput(TensorType_INT32, {axis_value}, {1});
270 output_ = AddOutput(output_type);
271
272 SetBuiltinOp(BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions,
273 CreateArgMaxOptions(builder_, output_type).Union());
274 BuildInterpreter({input_shape, {1}});
275 }
276 };
277
TEST_F(UnsupportedOperationOnDeviceTest,ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice)278 TEST_F(UnsupportedOperationOnDeviceTest,
279 ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice) {
280 nnapi_mock_->SetAndroidSdkVersion(29);
281 nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
282 // Setting this here because I want the delegate not to be applied in the
283 // first case because the feature level is not high enough and not because the
284 // operations are not supported by the device.
285 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
286 [](const ANeuralNetworksModel* model,
287 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
288 bool* supportedOps) -> int {
289 std::fill(supportedOps, supportedOps + 1, true);
290 return ANEURALNETWORKS_NO_ERROR;
291 });
292
293 ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
294 TensorType_INT32, nnapi_mock_->GetNnApi(), "test-device");
295 m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
296 m.Invoke();
297
298 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
299 << "Expected Max not to be delegates since it not supported before NNAPI "
300 "1.2 and device declares to support only NNAPI 1.1.";
301
302 nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
303
304 ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
305 TensorType_INT32, nnapi_mock_->GetNnApi(), "test-device");
306 m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
307 m1.Invoke();
308
309 EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
310 << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
311 }
312
TEST_F(UnsupportedOperationOnDeviceTest,ShouldUseDeviceFeatureLevelWhenDisablingCPU)313 TEST_F(UnsupportedOperationOnDeviceTest,
314 ShouldUseDeviceFeatureLevelWhenDisablingCPU) {
315 nnapi_mock_->SetAndroidSdkVersion(29);
316 nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
317 // Setting this here because I want the delegate not to be applied in the
318 // first case because the feature level is not high enough and not because the
319 // operations are not supported by the device.
320 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
321 [](const ANeuralNetworksModel* model,
322 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
323 bool* supportedOps) -> int {
324 std::fill(supportedOps, supportedOps + 1, true);
325 return ANEURALNETWORKS_NO_ERROR;
326 });
327
328 ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
329 TensorType_INT32, nnapi_mock_->GetNnApi(),
330 /*disallow_nnapi_cpu=*/true);
331 m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
332 m.Invoke();
333
334 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
335 << "Expected Max not to be delegates since it not supported before NNAPI "
336 "1.2 and device declares to support only NNAPI 1.1.";
337
338 ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
339 TensorType_INT32, nnapi_mock_->GetNnApi(),
340 /*disallow_nnapi_cpu=*/false);
341 m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
342 m1.Invoke();
343
344 EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
345 << "Expected Max op to be delegated since we enabled NNAPI CPU "
346 "implementation.";
347
348 nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
349
350 ArgMaxOpModel m2({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
351 TensorType_INT32, nnapi_mock_->GetNnApi(),
352 /*disallow_nnapi_cpu=*/true);
353 m2.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
354 m2.Invoke();
355
356 EXPECT_EQ(m2.CountOpsExecutedByCpuKernel(), 0)
357 << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
358 }
359
360 // This is a model with two ops:
361 //
362 // input1 ---->
363 // ADD --
364 // input2 --> |
365 // -->
366 // SUB --> output
367 // input3 ---------------->
368 //
369 class AddSubOpsAcceleratedModel : public MultiOpModel, public AcceleratedModel {
370 public:
AddSubOpsAcceleratedModel(const TensorData & input1,const TensorData & input2,const TensorData & input3,const TensorData & output,ActivationFunctionType activation_type,const NnApi * nnapi,const std::string & accelerator_name,bool allow_fp32_relax_to_fp16=false)371 AddSubOpsAcceleratedModel(const TensorData& input1, const TensorData& input2,
372 const TensorData& input3, const TensorData& output,
373 ActivationFunctionType activation_type,
374 const NnApi* nnapi,
375 const std::string& accelerator_name,
376 bool allow_fp32_relax_to_fp16 = false)
377 : MultiOpModel(), AcceleratedModel(nnapi, accelerator_name) {
378 SetDelegate(GetDelegate());
379 Init(input1, input2, input3, output, activation_type,
380 allow_fp32_relax_to_fp16);
381 }
382
input1()383 int input1() { return input1_; }
input2()384 int input2() { return input2_; }
input3()385 int input3() { return input3_; }
386
GetOutput()387 std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
388
389 protected:
390 int input1_;
391 int input2_;
392 int input3_;
393 int output_;
394
395 private:
396 // Performs initialization logic shared across all constructors.
Init(const TensorData & input1,const TensorData & input2,const TensorData & input3,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)397 void Init(const TensorData& input1, const TensorData& input2,
398 const TensorData& input3, const TensorData& output,
399 ActivationFunctionType activation_type,
400 bool allow_fp32_relax_to_fp16 = false) {
401 input1_ = AddInput(input1);
402 input2_ = AddInput(input2);
403 input3_ = AddInput(input3);
404 const int add_output = AddInnerTensor<float>(output);
405 output_ = AddOutput(output);
406 AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
407 CreateAddOptions(builder_, activation_type).Union(),
408 {input1_, input2_}, {add_output});
409 AddBuiltinOp(BuiltinOperator_SUB, BuiltinOptions_SubOptions,
410 CreateSubOptions(builder_, activation_type).Union(),
411 {add_output, input3_}, {output_});
412 BuildInterpreter({GetShape(input1_), GetShape(input2_), GetShape(input3_)},
413 /*num_threads=*/-1, allow_fp32_relax_to_fp16,
414 /*apply_delegate=*/true);
415 }
416 };
417
418 int should_build_model_with_sup_ops_compilation_model_create_count = 0;
419 int should_build_model_with_sup_ops_add_operation_count = 0;
TEST_F(UnsupportedOperationOnDeviceTest,ShouldBuildModelWithOnlyDeviceSupportedOps)420 TEST_F(UnsupportedOperationOnDeviceTest,
421 ShouldBuildModelWithOnlyDeviceSupportedOps) {
422 nnapi_mock_->SetNnapiSupportedDevice("test-device");
423
424 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
425 [](const ANeuralNetworksModel* model,
426 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
427 bool* supportedOps) -> int {
428 // Returning the first as supported since this will leverage
429 // the assertion on caching.
430 supportedOps[0] = true;
431 supportedOps[1] = false;
432 return ANEURALNETWORKS_NO_ERROR;
433 });
434
435 nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
436 ++should_build_model_with_sup_ops_compilation_model_create_count;
437 *model = reinterpret_cast<ANeuralNetworksModel*>(1);
438 return ANEURALNETWORKS_NO_ERROR;
439 });
440
441 nnapi_mock_->StubAddOperationWith(
442 [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
443 uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
444 const uint32_t* outputs) -> int {
445 ++should_build_model_with_sup_ops_add_operation_count;
446 return ANEURALNETWORKS_NO_ERROR;
447 });
448
449 AddSubOpsAcceleratedModel m(
450 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
451 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
452 ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
453 /*accelerator_name=*/"test-device");
454 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
455 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
456 m.PopulateTensor<float>(m.input1(), input1);
457 m.PopulateTensor<float>(m.input2(), input2);
458 m.PopulateTensor<float>(m.input3(), input2);
459 m.Invoke();
460
461 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
462 ASSERT_EQ(should_build_model_with_sup_ops_compilation_model_create_count, 2)
463 << "Model with unsupported operations has been cached";
464 EXPECT_EQ(should_build_model_with_sup_ops_add_operation_count, 3)
465 << "The second model should contain only one operation";
466 }
467
TEST_F(UnsupportedOperationOnDeviceTest,ShouldRunOnCpuIfDeviceSupportsNoOps)468 TEST_F(UnsupportedOperationOnDeviceTest, ShouldRunOnCpuIfDeviceSupportsNoOps) {
469 nnapi_mock_->SetNnapiSupportedDevice("test-device");
470
471 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
472 [](const ANeuralNetworksModel* model,
473 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
474 bool* supportedOps) -> int {
475 std::fill(supportedOps, supportedOps + 2, false);
476 return ANEURALNETWORKS_NO_ERROR;
477 });
478
479 AddSubOpsAcceleratedModel m(
480 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
481 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
482 ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
483 /*accelerator_name=*/"test-device");
484 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
485 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
486 m.PopulateTensor<float>(m.input1(), input1);
487 m.PopulateTensor<float>(m.input2(), input2);
488 m.PopulateTensor<float>(m.input3(), input2);
489 m.Invoke();
490
491 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 2);
492 }
493
494 int should_cache_model_compilation_model_create_count = 0;
TEST_F(UnsupportedOperationOnDeviceTest,ShouldCacheModelCompilation)495 TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
496 nnapi_mock_->SetNnapiSupportedDevice("test-device");
497
498 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
499 [](const ANeuralNetworksModel* model,
500 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
501 bool* supportedOps) -> int {
502 std::fill(supportedOps, supportedOps + 2, true);
503 return ANEURALNETWORKS_NO_ERROR;
504 });
505
506 nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
507 ++should_cache_model_compilation_model_create_count;
508 *model = reinterpret_cast<ANeuralNetworksModel*>(1);
509 return ANEURALNETWORKS_NO_ERROR;
510 });
511
512 AddSubOpsAcceleratedModel m(
513 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
514 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
515 ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
516 /*accelerator_name=*/"test-device");
517 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
518 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
519 m.PopulateTensor<float>(m.input1(), input1);
520 m.PopulateTensor<float>(m.input2(), input2);
521 m.PopulateTensor<float>(m.input3(), input2);
522 m.Invoke();
523
524 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
525 EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
526 }
527
TEST_F(UnsupportedOperationOnDeviceTest,ShouldNotApplySupportedOperationsFilterBeforeAndroidSdk29)528 TEST_F(UnsupportedOperationOnDeviceTest,
529 ShouldNotApplySupportedOperationsFilterBeforeAndroidSdk29) {
530 nnapi_mock_->SetAndroidSdkVersion(28, /*set_unsupported_ops_to_null=*/true);
531 nnapi_mock_->ModelCreateReturns<0>();
532 AddSubOpsAcceleratedModel m(
533 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
534 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
535 ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
536 /*accelerator_name=*/"test-device");
537 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
538 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
539 m.PopulateTensor<float>(m.input1(), input1);
540 m.PopulateTensor<float>(m.input2(), input2);
541 m.PopulateTensor<float>(m.input3(), input2);
542 m.Invoke();
543
544 // Delegation succeded without failures and all nodes have been delegated.
545 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
546 }
547
548 // This is a model with two ops:
549 //
550 // input1 ----> HARD_SWISH ---->
551 // ADD --> output
552 // input2 ---------------------->
553 //
554 class HardSwishAddOpsAcceleratedModel : public MultiOpModel,
555 public AcceleratedModel {
556 public:
HardSwishAddOpsAcceleratedModel(const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,const NnApi * nnapi,const std::string & accelerator_name,bool allow_fp32_relax_to_fp16=false)557 HardSwishAddOpsAcceleratedModel(const TensorData& input1,
558 const TensorData& input2,
559 const TensorData& output,
560 ActivationFunctionType activation_type,
561 const NnApi* nnapi,
562 const std::string& accelerator_name,
563 bool allow_fp32_relax_to_fp16 = false)
564 : MultiOpModel(), AcceleratedModel(nnapi, accelerator_name) {
565 SetDelegate(GetDelegate());
566 Init(input1, input2, output, activation_type, allow_fp32_relax_to_fp16);
567 }
568
input1()569 int input1() { return input1_; }
input2()570 int input2() { return input2_; }
571
GetOutput()572 std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
573
574 protected:
575 int input1_;
576 int input2_;
577 int output_;
578
579 private:
580 // Performs initialization logic shared across all constructors.
Init(const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)581 void Init(const TensorData& input1, const TensorData& input2,
582 const TensorData& output, ActivationFunctionType activation_type,
583 bool allow_fp32_relax_to_fp16 = false) {
584 input1_ = AddInput(input1);
585 input2_ = AddInput(input2);
586 const int hard_swish_output = AddInnerTensor<float>(output);
587 output_ = AddOutput(output);
588 AddBuiltinOp(BuiltinOperator_HARD_SWISH, BuiltinOptions_HardSwishOptions,
589 CreateHardSwishOptions(builder_).Union(), {input1_},
590 {hard_swish_output});
591 AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
592 CreateAddOptions(builder_, activation_type).Union(),
593 {input1_, hard_swish_output}, {output_});
594 BuildInterpreter({GetShape(input1_), GetShape(input2_)}, /*num_threads=*/-1,
595 allow_fp32_relax_to_fp16, /*apply_delegate=*/true);
596 }
597 };
598
599 struct TfLiteOpMappedToMultipleNnApiOps
600 : ::tflite::delegate::nnapi::NnApiDelegateMockTest {};
601
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AllCostituentOpsNotSupported)602 TEST_F(TfLiteOpMappedToMultipleNnApiOps, AllCostituentOpsNotSupported) {
603 nnapi_mock_->ModelCreateReturns<0>();
604
605 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
606 [](const ANeuralNetworksModel* model,
607 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
608 bool* supportedOps) -> int {
609 // HardSwish is mapped to 4 NNAPI ops, none of which supported.
610 std::fill(supportedOps, supportedOps + 4, false);
611 // After that we have the ADD op that is supported.
612 supportedOps[4] = true;
613 return ANEURALNETWORKS_NO_ERROR;
614 });
615
616 HardSwishAddOpsAcceleratedModel m(
617 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
618 {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
619 nnapi_mock_->GetNnApi(),
620 /*accelerator_name=*/"test-device");
621 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
622 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
623 m.PopulateTensor<float>(m.input1(), input1);
624 m.PopulateTensor<float>(m.input2(), input2);
625 m.Invoke();
626
627 // Delegation succeded without failures and HardSwish has not been delegated
628 // but Add has been correctly delegated.
629 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
630 }
631
TEST_F(TfLiteOpMappedToMultipleNnApiOps,NotAllConstitutentOpsSupported)632 TEST_F(TfLiteOpMappedToMultipleNnApiOps, NotAllConstitutentOpsSupported) {
633 nnapi_mock_->ModelCreateReturns<0>();
634 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
635 [](const ANeuralNetworksModel* model,
636 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
637 bool* supportedOps) -> int {
638 // HardSwish is mapped to 4 NNAPI ops (the first 4 ones), so we have 5
639 // ops in the NNAPI model.
640 std::fill(supportedOps, supportedOps + 5, true);
641 // One of the NNAPI ops required by HardSwish is not supported.
642 supportedOps[2] = false;
643 return ANEURALNETWORKS_NO_ERROR;
644 });
645
646 HardSwishAddOpsAcceleratedModel m(
647 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
648 {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
649 nnapi_mock_->GetNnApi(),
650 /*accelerator_name=*/"test-device");
651 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
652 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
653 m.PopulateTensor<float>(m.input1(), input1);
654 m.PopulateTensor<float>(m.input2(), input2);
655 m.Invoke();
656
657 // Delegation succeded without failures. HardSwish has not been delegated
658 // but Add is delegated.
659 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
660 }
661
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AllConstitutentOpsSupported)662 TEST_F(TfLiteOpMappedToMultipleNnApiOps, AllConstitutentOpsSupported) {
663 nnapi_mock_->ModelCreateReturns<0>();
664 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
665 [](const ANeuralNetworksModel* model,
666 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
667 bool* supportedOps) -> int {
668 // HardSwish is mapped to 4 NNAPI ops (the first 4 ones), so we have 5
669 // ops in the NNAPI model.
670 // All ops are supported by the accelerator.
671 std::fill(supportedOps, supportedOps + 5, true);
672 return ANEURALNETWORKS_NO_ERROR;
673 });
674
675 HardSwishAddOpsAcceleratedModel m(
676 {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
677 {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
678 nnapi_mock_->GetNnApi(),
679 /*accelerator_name=*/"test-device");
680 std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
681 std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
682 m.PopulateTensor<float>(m.input1(), input1);
683 m.PopulateTensor<float>(m.input2(), input2);
684 m.Invoke();
685
686 // Delegation succeded without failures and all nodes have been delegated.
687 ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
688 }
689
690 class QuantizedWeightsConvolutionOpModel : public SingleOpModel,
691 public AcceleratedModel {
692 public:
QuantizedWeightsConvolutionOpModel(const NnApi * nnapi,std::string accelerator_name,const TensorData & input,const TensorData & filter,const TensorData & output,int stride_width=2,int stride_height=2,enum Padding padding=Padding_VALID,enum ActivationFunctionType activation=ActivationFunctionType_NONE,int dilation_width_factor=1,int dilation_height_factor=1,int num_threads=-1,std::initializer_list<uint8_t> filter_data={})693 QuantizedWeightsConvolutionOpModel(
694 const NnApi* nnapi, std::string accelerator_name, const TensorData& input,
695 const TensorData& filter, const TensorData& output, int stride_width = 2,
696 int stride_height = 2, enum Padding padding = Padding_VALID,
697 enum ActivationFunctionType activation = ActivationFunctionType_NONE,
698 int dilation_width_factor = 1, int dilation_height_factor = 1,
699 int num_threads = -1, std::initializer_list<uint8_t> filter_data = {})
700 : SingleOpModel(), AcceleratedModel(nnapi, accelerator_name) {
701 SetDelegate(GetDelegate());
702
703 input_ = AddInput(input);
704
705 if (filter_data.size()) {
706 filter_ = AddConstInput(filter, filter_data);
707 } else {
708 filter_ = AddInput(filter);
709 }
710
711 int bias_size = GetShape(filter_)[0];
712
713 bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
714
715 output_ = AddOutput(output);
716
717 SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
718 CreateConv2DOptions(
719 builder_, padding, stride_width, stride_height, activation,
720 dilation_width_factor, dilation_height_factor)
721 .Union());
722
723 BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
724 num_threads, /*allow_fp32_relax_to_fp16=*/false,
725 /*apply_delegate=*/true);
726 }
727
SetInput(std::initializer_list<float> data)728 void SetInput(std::initializer_list<float> data) {
729 PopulateTensor(input_, data);
730 }
731
SetFilter(std::initializer_list<float> data)732 void SetFilter(std::initializer_list<float> data) {
733 QuantizeAndPopulate<uint8_t>(filter_, data);
734 }
735
SetBias(std::initializer_list<float> data)736 void SetBias(std::initializer_list<float> data) {
737 PopulateTensor(input_, data);
738 }
739
GetOutput()740 std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
GetDequantizedOutput()741 std::vector<float> GetDequantizedOutput() {
742 return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
743 GetScale(output_), GetZeroPoint(output_));
744 }
745
746 protected:
747 int input_;
748 int filter_;
749 int bias_;
750 int output_;
751 };
752
753 int quantized_conv2d_model_added_nnapi_ops_count = 0;
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AddedDequantizationsAreAccountedInModelOps)754 TEST_F(TfLiteOpMappedToMultipleNnApiOps,
755 AddedDequantizationsAreAccountedInModelOps) {
756 nnapi_mock_->ModelCreateReturns<0>();
757 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
758 [](const ANeuralNetworksModel* model,
759 const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
760 bool* supportedOps) -> int {
761 std::fill(supportedOps,
762 supportedOps + quantized_conv2d_model_added_nnapi_ops_count,
763 true);
764 return ANEURALNETWORKS_NO_ERROR;
765 });
766 nnapi_mock_->StubAddOperationWith(
767 [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
768 uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
769 const uint32_t* outputs) -> int {
770 ++quantized_conv2d_model_added_nnapi_ops_count;
771 return ANEURALNETWORKS_NO_ERROR;
772 });
773
774 QuantizedWeightsConvolutionOpModel m(
775 nnapi_mock_->GetNnApi(),
776 /*accelerator_name=*/"test-device", {TensorType_FLOAT32, {2, 2, 4, 1}},
777 {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64}, {TensorType_FLOAT32, {}});
778 m.SetInput({
779 // First batch
780 1, 1, 1, 1, // row = 1
781 2, 2, 2, 2, // row = 2
782 // Second batch
783 1, 2, 3, 4, // row = 1
784 1, 2, 3, 4, // row = 2
785 });
786 m.SetFilter({
787 1, 2, 3, 4, // first 2x2 filter
788 -1, 1, -1, 1, // second 2x2 filter
789 -1, -1, 1, 1, // third 2x2 filter
790 });
791 m.SetBias({1, 2, 3});
792
793 EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
794 // When delegating quantized Conv2D, for each quantized inputs a
795 // dequantize operation is added to the model.
796 // In our case 1 Dequantize op for the weights is expected generating
797 // a 2 ops model.
798 EXPECT_EQ(quantized_conv2d_model_added_nnapi_ops_count, 2);
799 }
800
801 // Model with a chain of no-op (add with zero operations)
802 // interleaved with no-op custom nodes.
803 class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
804 public:
LongIdentityModel(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes,const NnApi * nnapi,const std::string & accelerator_name,int max_nnapi_partitions)805 LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
806 const std::unordered_set<int>& custom_nodes_indexes,
807 const NnApi* nnapi, const std::string& accelerator_name,
808 int max_nnapi_partitions)
809 : MultiOpModel(),
810 AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
811 Init(input_shape, graph_size, custom_nodes_indexes);
812 }
813
LongIdentityModel(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes,const NnApi * nnapi,int max_nnapi_partitions)814 LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
815 const std::unordered_set<int>& custom_nodes_indexes,
816 const NnApi* nnapi, int max_nnapi_partitions)
817 : MultiOpModel(), AcceleratedModel(nnapi, false, max_nnapi_partitions) {
818 Init(input_shape, graph_size, custom_nodes_indexes);
819 }
820
SetInput(std::vector<float> value)821 void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
822
CountNnApiPartitions()823 int CountNnApiPartitions() {
824 return std::count_if(
825 std::begin(interpreter_->execution_plan()),
826 std::end(interpreter_->execution_plan()), [this](const int node_index) {
827 return interpreter_->node_and_registration(node_index)
828 ->first.delegate != nullptr;
829 });
830 }
831
832 private:
Init(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes)833 void Init(const std::vector<int>& input_shape, int graph_size,
834 const std::unordered_set<int>& custom_nodes_indexes) {
835 SetDelegate(GetDelegate());
836
837 const TensorData tensor_data{TensorType_FLOAT32, input_shape};
838
839 input_ = AddInput(tensor_data);
840 zero_input_ = AddInput(tensor_data);
841
842 std::vector<int> intermediate_outputs(graph_size - 1);
843 std::generate(
844 std::begin(intermediate_outputs), std::end(intermediate_outputs),
845 [this, &tensor_data]() { return AddInnerTensor<float>(tensor_data); });
846
847 output_ = AddOutput(tensor_data);
848
849 AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
850 CreateAddOptions(builder_).Union(), {input_, zero_input_},
851 {intermediate_outputs[0]});
852
853 for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
854 if (custom_nodes_indexes.count(i + 1) == 1) {
855 AddCustomOp("custom_no_op", {}, [this]() { return CustomNoOpNode(); },
856 {intermediate_outputs[i]}, {intermediate_outputs[i + 1]});
857 } else {
858 AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
859 CreateAddOptions(builder_).Union(),
860 {intermediate_outputs[i], zero_input_},
861 {intermediate_outputs[i + 1]});
862 }
863 }
864
865 AddBuiltinOp(
866 BuiltinOperator_ADD, BuiltinOptions_AddOptions,
867 CreateAddOptions(builder_).Union(),
868 {intermediate_outputs[intermediate_outputs.size() - 1], zero_input_},
869 {output_});
870
871 BuildInterpreter({GetShape(input_), GetShape(zero_input_)});
872
873 std::vector<float> zero(GetTensorSize(input_), 0.0);
874 PopulateTensor(zero_input_, zero);
875 }
876
877 // Return the registration of a custom node simply copying input to output.
CustomNoOpNode()878 TfLiteRegistration* CustomNoOpNode() {
879 static TfLiteRegistration no_op = {
880 .init = [](TfLiteContext* context, const char* buffer,
881 size_t length) -> void* { return nullptr; },
882
883 .free = [](TfLiteContext* context, void* buffer) -> void {},
884
885 .prepare = [](TfLiteContext* context,
886 TfLiteNode* node) -> TfLiteStatus {
887 if (node->inputs->size != 1 || node->outputs->size != 1) {
888 return kTfLiteError;
889 }
890
891 return kTfLiteOk;
892 },
893
894 .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
895 auto input_tensor = context->tensors[node->inputs->data[0]];
896 auto output_tensor = context->tensors[node->outputs->data[0]];
897
898 std::copy(input_tensor.data.raw,
899 input_tensor.data.raw + input_tensor.bytes,
900 output_tensor.data.raw);
901
902 return kTfLiteOk;
903 },
904
905 .profiling_string = nullptr,
906 .builtin_code = kTfLiteBuiltinDelegate,
907 .custom_name = "NoOpTestDelegate",
908 .version = 1,
909 };
910
911 return &no_op;
912 }
913 int input_;
914 int zero_input_;
915 int output_;
916 };
917
918 class NodeFilter {
919 public:
ConfigureSupportedNodes(int graph_size,const std::unordered_set<int> & unsupported_indexes)920 void ConfigureSupportedNodes(
921 int graph_size, const std::unordered_set<int>& unsupported_indexes) {
922 graph_size_ = graph_size;
923 unsupported_indexes_ = unsupported_indexes;
924 }
925
SetNodeSupport(bool * supported_ops)926 void SetNodeSupport(bool* supported_ops) {
927 for (int i = 0; i < graph_size_; i++) {
928 supported_ops[i] = (unsupported_indexes_.count(i) == 0);
929 }
930 }
931
932 private:
933 int graph_size_;
934 std::unordered_set<int> unsupported_indexes_;
935 };
936
937 // Using the same node filter for all DelegatePartitionLimitTests
938 // because StubGetSupportedOperationsForDevicesWith wants a C function.
DelegatePartitionLimitTestNodeFilter()939 NodeFilter* DelegatePartitionLimitTestNodeFilter() {
940 static NodeFilter* node_filter = new NodeFilter();
941 return node_filter;
942 }
943
944 class DelegatePartitionLimitTest
945 : public ::tflite::delegate::nnapi::NnApiDelegateMockTest {
946 protected:
947 // Configure the underlying graph to generate a set of nnapi partition
948 // with the sizes specified in nnapi_partition_sizes and the given
949 // input_shape.
Init(int max_nnapi_partitions,const std::vector<int> & nnapi_partition_sizes,const std::vector<int> & input_shape,bool specify_accelerator=true)950 void Init(int max_nnapi_partitions,
951 const std::vector<int>& nnapi_partition_sizes,
952 const std::vector<int>& input_shape,
953 bool specify_accelerator = true) {
954 // The graph will have as number of nodes the sum of nodes in the NNAPI
955 // partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
956 // not supported by NNAPI and will cause the
957 graph_size_ = std::accumulate(std::begin(nnapi_partition_sizes),
958 std::end(nnapi_partition_sizes),
959 nnapi_partition_sizes.size() - 1);
960
961 std::unordered_set<int> unsupported_ops_idxs;
962 int partition_node_idx = -1;
963 for (int i = 0; i < nnapi_partition_sizes.size() - 1; i++) {
964 partition_node_idx += nnapi_partition_sizes[i] + 1;
965 unsupported_ops_idxs.insert(partition_node_idx);
966 }
967
968 if (specify_accelerator) {
969 // Building a model that will contain initially a single partition
970 // and will get then partitioned by checking the operations supported
971 // by the target accelerator.
972 // This because I am not able to know the size of each partition in my
973 // stubbed GetSupportedOperationsForDevices API.
974 DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
975 graph_size_, unsupported_ops_idxs);
976
977 nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
978 [](const ANeuralNetworksModel* model,
979 const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
980 bool* supported_ops) -> int {
981 DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(
982 supported_ops);
983 return ANEURALNETWORKS_NO_ERROR;
984 });
985
986 model_ = std::make_unique<LongIdentityModel>(
987 input_shape, graph_size_,
988 /*custom_nodes_indexes=*/std::unordered_set<int>(),
989 nnapi_mock_->GetNnApi(),
990 /*accelerator_name=*/"test-device", max_nnapi_partitions);
991 } else {
992 // Building a model containing custom nodes that won't be supported
993 // by the delegate and generate the partitions.
994 model_ = std::make_unique<LongIdentityModel>(
995 input_shape, graph_size_, unsupported_ops_idxs,
996 nnapi_mock_->GetNnApi(), max_nnapi_partitions);
997 }
998 }
999
1000 std::unique_ptr<LongIdentityModel> model_;
1001
OriginalGraphSize()1002 int OriginalGraphSize() { return graph_size_; }
1003
1004 private:
1005 int graph_size_;
1006 };
1007
TEST_F(DelegatePartitionLimitTest,ShouldDelegateOnePartitionOnly)1008 TEST_F(DelegatePartitionLimitTest, ShouldDelegateOnePartitionOnly) {
1009 Init(/*max_nnapi_partitions=*/1,
1010 /*nnapi_partition_sizes=*/{3, 2},
1011 /*input_shape=*/{1, 2, 2, 1});
1012
1013 EXPECT_EQ(model_->CountNnApiPartitions(), 1);
1014 }
1015
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfLimitIsZero)1016 TEST_F(DelegatePartitionLimitTest,
1017 ShouldDelegateAllPossiblePartitionsIfLimitIsZero) {
1018 Init(/*max_nnapi_partitions=*/0,
1019 /*nnapi_partition_sizes=*/{3, 2},
1020 /*input_shape=*/{1, 2, 2, 1});
1021
1022 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1023 }
1024
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfLimitIsNegative)1025 TEST_F(DelegatePartitionLimitTest,
1026 ShouldDelegateAllPossiblePartitionsIfLimitIsNegative) {
1027 Init(/*max_nnapi_partitions=*/0,
1028 /*nnapi_partition_sizes=*/{3, 2},
1029 /*input_shape=*/{1, 2, 2, 1});
1030
1031 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1032 }
1033
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfBelowLimit)1034 TEST_F(DelegatePartitionLimitTest,
1035 ShouldDelegateAllPossiblePartitionsIfBelowLimit) {
1036 Init(/*max_nnapi_partitions=*/3,
1037 /*nnapi_partition_sizes=*/{3, 2},
1038 /*input_shape=*/{1, 2, 2, 1});
1039
1040 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1041 }
1042
TEST_F(DelegatePartitionLimitTest,ShouldDelegatePartitionWithHigherNodeCount)1043 TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
1044 int kLargestModelSize = 3;
1045 Init(/*max_nnapi_partitions=*/1,
1046 /*nnapi_partition_sizes=*/{3, 2},
1047 /*input_shape=*/{1, 2, 2, 1});
1048
1049 EXPECT_EQ(model_->CountNnApiPartitions(), 1);
1050 EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(),
1051 OriginalGraphSize() - kLargestModelSize);
1052 }
1053
TEST_F(DelegatePartitionLimitTest,ShouldDelegatePartitionsWithHigherNodeCount)1054 TEST_F(DelegatePartitionLimitTest,
1055 ShouldDelegatePartitionsWithHigherNodeCount) {
1056 int kLargestModelSize = 5;
1057 int kSecondLargestModelSize = 4;
1058 Init(/*max_nnapi_partitions=*/2,
1059 /*nnapi_partition_sizes=*/
1060 {1, kLargestModelSize, 2, kSecondLargestModelSize},
1061 /*input_shape=*/{1, 2, 2, 1});
1062
1063 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1064 EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
1065 }
1066
TEST_F(DelegatePartitionLimitTest,ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified)1067 TEST_F(DelegatePartitionLimitTest,
1068 ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified) {
1069 int kLargestModelSize = 5;
1070 int kSecondLargestModelSize = 4;
1071 Init(/*max_nnapi_partitions=*/2,
1072 /*nnapi_partition_sizes=*/
1073 {1, kLargestModelSize, 2, kSecondLargestModelSize},
1074 /*input_shape=*/{1, 2, 2, 1}, /*specify_accelerator=*/false);
1075
1076 EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1077 EXPECT_EQ(
1078 model_->CountOpsExecutedByCpuKernel(),
1079 OriginalGraphSize() - (kLargestModelSize + kSecondLargestModelSize));
1080 }
1081
1082 } // namespace
1083 } // namespace tflite
1084