1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <sys/mman.h>
16 
17 #include <algorithm>
18 #include <array>
19 #include <cstdint>
20 #include <iterator>
21 #include <memory>
22 #include <numeric>
23 #include <ostream>
24 #include <string>
25 #include <unordered_set>
26 #include <vector>
27 
28 #include <gtest/gtest.h>
29 #include "tensorflow/lite/builtin_ops.h"
30 #include "tensorflow/lite/c/common.h"
31 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
32 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
33 #include "tensorflow/lite/interpreter.h"
34 #include "tensorflow/lite/kernels/test_util.h"
35 #include "tensorflow/lite/model.h"
36 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
37 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
38 
39 namespace tflite {
40 namespace {
41 
42 class FloatAddOpModel : public SingleOpModel {
43  public:
44   FloatAddOpModel() = default;
Init(const NnApi * nnapi,tflite::StatefulNnApiDelegate::Options options,const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)45   void Init(const NnApi* nnapi, tflite::StatefulNnApiDelegate::Options options,
46             const TensorData& input1, const TensorData& input2,
47             const TensorData& output, ActivationFunctionType activation_type,
48             bool allow_fp32_relax_to_fp16 = false) {
49     stateful_delegate_ =
50         std::make_unique<StatefulNnApiDelegate>(nnapi, options);
51     SetDelegate(stateful_delegate_.get());
52 
53     input1_ = AddInput(input1);
54     input2_ = AddInput(input2);
55     output_ = AddOutput(output);
56     SetBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
57                  CreateAddOptions(builder_, activation_type).Union());
58     BuildInterpreter({GetShape(input1_), GetShape(input2_)}, /*num_threads=*/-1,
59                      allow_fp32_relax_to_fp16, /*apply_delegate=*/false);
60     compilation_status_ = ApplyDelegate();
61   }
62 
input1()63   int input1() { return input1_; }
input2()64   int input2() { return input2_; }
65 
GetOutput()66   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
67 
GetCompilationStatus()68   TfLiteStatus GetCompilationStatus() { return compilation_status_; }
69 
70  protected:
71   int input1_;
72   int input2_;
73   int output_;
74 
75  private:
76   std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
77   TfLiteStatus compilation_status_;
78 };
79 
80 struct NnApiDeviceSelectionTest
81     : ::tflite::delegate::nnapi::NnApiDelegateMockTest {
SetUptflite::__anone115d1810111::NnApiDeviceSelectionTest82   void SetUp() override {
83     ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
84     nnapi_mock_->GetDeviceCountReturnsCount<3>();
85     nnapi_mock_->StubGetDeviceWith(
86         [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int {
87           *device = reinterpret_cast<ANeuralNetworksDevice*>(devIndex + 1);
88           return 0;
89         });
90     nnapi_mock_->StubGetDeviceNameWith(
91         [](const ANeuralNetworksDevice* device, const char** name) -> int {
92           if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
93             *name = "dsp";
94           } else if (device == reinterpret_cast<ANeuralNetworksDevice*>(2)) {
95             *name = "gpu";
96           } else {
97             *name = "nnapi-reference";
98           }
99           return ANEURALNETWORKS_NO_ERROR;
100         });
101     nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
102         [](const ANeuralNetworksModel* model,
103            const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
104            bool* supportedOps) -> int {
105           supportedOps[0] = true;
106           return ANEURALNETWORKS_NO_ERROR;
107         });
108   }
InitWithOptionstflite::__anone115d1810111::NnApiDeviceSelectionTest109   void InitWithOptions(tflite::StatefulNnApiDelegate::Options options) {
110     m.Init(nnapi_mock_->GetNnApi(), options, {TensorType_FLOAT32, {1, 2, 2, 1}},
111            {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
112            ActivationFunctionType_NONE);
113     m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
114     m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.5});
115   }
116   FloatAddOpModel m;
117 };
118 
TEST_F(NnApiDeviceSelectionTest,DoesntSetDevicesWhenCpuAllowed)119 TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWhenCpuAllowed) {
120   nnapi_mock_->StubCompilationCreateForDevicesWith(
121       [](ANeuralNetworksModel* model,
122          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
123          ANeuralNetworksCompilation** compilation) -> int {
124         EXPECT_TRUE(false) << "Should not call createForDevices";
125         return 1;
126       });
127 
128   tflite::StatefulNnApiDelegate::Options options;
129   options.disallow_nnapi_cpu = false;
130   InitWithOptions(options);
131   ASSERT_EQ(m.Invoke(), kTfLiteOk);
132   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
133 }
134 
TEST_F(NnApiDeviceSelectionTest,SetsDeviceBasedOnOptions)135 TEST_F(NnApiDeviceSelectionTest, SetsDeviceBasedOnOptions) {
136   nnapi_mock_->CompilationCreateReturns<1>();
137   nnapi_mock_->StubCompilationCreateForDevicesWith(
138       [](ANeuralNetworksModel* model,
139          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
140          ANeuralNetworksCompilation** compilation) -> int {
141         EXPECT_EQ(numDevices, 1);
142         EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
143         if (numDevices != 1 ||
144             devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1)) {
145           return 1;
146         } else {
147           *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
148           return ANEURALNETWORKS_NO_ERROR;
149         }
150       });
151 
152   tflite::StatefulNnApiDelegate::Options options;
153   options.accelerator_name = "dsp";
154   InitWithOptions(options);
155   ASSERT_EQ(m.Invoke(), kTfLiteOk);
156   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
157 }
158 
TEST_F(NnApiDeviceSelectionTest,DisallowsCPUBasedOnOptions)159 TEST_F(NnApiDeviceSelectionTest, DisallowsCPUBasedOnOptions) {
160   nnapi_mock_->CompilationCreateReturns<1>();
161   nnapi_mock_->StubCompilationCreateForDevicesWith(
162       [](ANeuralNetworksModel* model,
163          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
164          ANeuralNetworksCompilation** compilation) -> int {
165         EXPECT_EQ(numDevices, 2);
166         EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
167         EXPECT_EQ(devices[1], reinterpret_cast<ANeuralNetworksDevice*>(2));
168         if (numDevices != 2 ||
169             devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1) ||
170             devices[1] != reinterpret_cast<ANeuralNetworksDevice*>(2)) {
171           return 1;
172         } else {
173           *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
174           return ANEURALNETWORKS_NO_ERROR;
175         }
176       });
177 
178   tflite::StatefulNnApiDelegate::Options options;
179   options.disallow_nnapi_cpu = true;
180   InitWithOptions(options);
181   ASSERT_EQ(m.Invoke(), kTfLiteOk);
182   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
183 }
184 
TEST_F(NnApiDeviceSelectionTest,DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled)185 TEST_F(NnApiDeviceSelectionTest,
186        DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled) {
187   // Only nnapi-reference is available on device
188   nnapi_mock_->GetDeviceCountReturnsCount<1>();
189   nnapi_mock_->GetDeviceNameReturnsName("nnapi-reference");
190 
191   tflite::StatefulNnApiDelegate::Options options;
192   options.disallow_nnapi_cpu = false;
193   InitWithOptions(options);
194   ASSERT_EQ(m.Invoke(), kTfLiteOk);
195   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
196   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
197 }
198 
TEST_F(NnApiDeviceSelectionTest,DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled)199 TEST_F(NnApiDeviceSelectionTest,
200        DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled) {
201   // Only nnapi-reference is available on device
202   nnapi_mock_->GetDeviceCountReturnsCount<1>();
203   nnapi_mock_->GetDeviceNameReturnsName("nnapi-reference");
204 
205   tflite::StatefulNnApiDelegate::Options options;
206   options.disallow_nnapi_cpu = true;
207   InitWithOptions(options);
208   ASSERT_EQ(m.Invoke(), kTfLiteOk);
209   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
210   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
211 }
212 
213 struct UnsupportedOperationOnDeviceTest
214     : ::tflite::delegate::nnapi::NnApiDelegateMockTest {};
215 
216 class AcceleratedModel {
217  public:
GetDelegate()218   StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
219 
220  protected:
221   // build a delegate with a target accelerator name.
AcceleratedModel(const NnApi * nnapi,const std::string & accelerator_name,int max_nnapi_partitions=0)222   AcceleratedModel(const NnApi* nnapi, const std::string& accelerator_name,
223                    int max_nnapi_partitions = 0) {
224     StatefulNnApiDelegate::Options options;
225     options.accelerator_name = accelerator_name.c_str();
226     options.max_number_delegated_partitions = max_nnapi_partitions;
227     stateful_delegate_ =
228         std::make_unique<StatefulNnApiDelegate>(nnapi, options);
229   }
230 
231   // build a delegate with no target accelerator name, can disable the NNAPI CPU
232   // fallback implementation using the disallow_nnapi_cpu flag.
AcceleratedModel(const NnApi * nnapi,bool disallow_nnapi_cpu,int max_nnapi_partitions=0)233   AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu,
234                    int max_nnapi_partitions = 0) {
235     StatefulNnApiDelegate::Options options;
236     options.disallow_nnapi_cpu = disallow_nnapi_cpu;
237     options.max_number_delegated_partitions = max_nnapi_partitions;
238     stateful_delegate_ =
239         std::make_unique<StatefulNnApiDelegate>(nnapi, options);
240   }
241 
242  private:
243   std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
244 };
245 
246 class ArgMaxOpModel : public SingleOpModel, public AcceleratedModel {
247  public:
ArgMaxOpModel(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type,const NnApi * nnapi,const char * device_name)248   ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
249                 int axis_value, TensorType output_type, const NnApi* nnapi,
250                 const char* device_name)
251       : SingleOpModel(), AcceleratedModel(nnapi, device_name) {
252     Init(input_shape, input_type, axis_value, output_type);
253   }
254 
ArgMaxOpModel(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type,const NnApi * nnapi,bool disallow_nnapi_cpu)255   ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
256                 int axis_value, TensorType output_type, const NnApi* nnapi,
257                 bool disallow_nnapi_cpu)
258       : SingleOpModel(), AcceleratedModel(nnapi, disallow_nnapi_cpu) {
259     Init(input_shape, input_type, axis_value, output_type);
260   }
261 
input() const262   int input() const { return input_; }
263 
264  protected:
265   int input_;
266   int axis_;
267   int output_;
268 
Init(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type)269   void Init(std::initializer_list<int> input_shape, TensorType input_type,
270             int axis_value, TensorType output_type) {
271     SetDelegate(GetDelegate());
272     input_ = AddInput(input_type);
273     axis_ = AddConstInput(TensorType_INT32, {axis_value}, {1});
274     output_ = AddOutput(output_type);
275 
276     SetBuiltinOp(BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions,
277                  CreateArgMaxOptions(builder_, output_type).Union());
278     BuildInterpreter({input_shape, {1}});
279   }
280 };
281 
TEST_F(UnsupportedOperationOnDeviceTest,ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice)282 TEST_F(UnsupportedOperationOnDeviceTest,
283        ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice) {
284   nnapi_mock_->SetAndroidSdkVersion(29);
285   nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
286   // Setting this here because I want the delegate not to be applied in the
287   // first case because the feature level is not high enough and not because the
288   // operations are not supported by the device.
289   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
290       [](const ANeuralNetworksModel* model,
291          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
292          bool* supportedOps) -> int {
293         std::fill(supportedOps, supportedOps + 1, true);
294         return ANEURALNETWORKS_NO_ERROR;
295       });
296 
297   ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
298                   TensorType_INT32, nnapi_mock_->GetNnApi(), "test-device");
299   m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
300   ASSERT_EQ(m.Invoke(), kTfLiteOk);
301 
302   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
303       << "Expected Max not to be delegates since it not supported before NNAPI "
304          "1.2 and device declares to support only NNAPI 1.1.";
305 
306   nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
307 
308   ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
309                    TensorType_INT32, nnapi_mock_->GetNnApi(), "test-device");
310   m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
311   ASSERT_EQ(m1.Invoke(), kTfLiteOk);
312 
313   EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
314       << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
315 }
316 
TEST_F(UnsupportedOperationOnDeviceTest,ShouldUseDeviceFeatureLevelWhenDisablingCPU)317 TEST_F(UnsupportedOperationOnDeviceTest,
318        ShouldUseDeviceFeatureLevelWhenDisablingCPU) {
319   nnapi_mock_->SetAndroidSdkVersion(29);
320   nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
321   // Setting this here because I want the delegate not to be applied in the
322   // first case because the feature level is not high enough and not because the
323   // operations are not supported by the device.
324   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
325       [](const ANeuralNetworksModel* model,
326          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
327          bool* supportedOps) -> int {
328         std::fill(supportedOps, supportedOps + 1, true);
329         return ANEURALNETWORKS_NO_ERROR;
330       });
331 
332   ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
333                   TensorType_INT32, nnapi_mock_->GetNnApi(),
334                   /*disallow_nnapi_cpu=*/true);
335   m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
336   ASSERT_EQ(m.Invoke(), kTfLiteOk);
337 
338   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
339       << "Expected Max not to be delegates since it not supported before NNAPI "
340          "1.2 and device declares to support only NNAPI 1.1.";
341 
342   ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
343                    TensorType_INT32, nnapi_mock_->GetNnApi(),
344                    /*disallow_nnapi_cpu=*/false);
345   m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
346   ASSERT_EQ(m1.Invoke(), kTfLiteOk);
347 
348   EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
349       << "Expected Max op to be delegated since we enabled NNAPI CPU "
350          "implementation.";
351 
352   nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
353 
354   ArgMaxOpModel m2({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
355                    TensorType_INT32, nnapi_mock_->GetNnApi(),
356                    /*disallow_nnapi_cpu=*/true);
357   m2.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
358   ASSERT_EQ(m2.Invoke(), kTfLiteOk);
359 
360   EXPECT_EQ(m2.CountOpsExecutedByCpuKernel(), 0)
361       << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
362 }
363 
364 // This is a model with two ops:
365 //
366 //  input1 ---->
367 //                ADD --
368 //  input2   -->        |
369 //                       -->
370 //                          SUB --> output
371 //  input3 ---------------->
372 //
373 class AddSubOpsAcceleratedModel : public MultiOpModel, public AcceleratedModel {
374  public:
AddSubOpsAcceleratedModel(const TensorData & input1,const TensorData & input2,const TensorData & input3,const TensorData & output,ActivationFunctionType activation_type,const NnApi * nnapi,const std::string & accelerator_name,bool allow_fp32_relax_to_fp16=false)375   AddSubOpsAcceleratedModel(const TensorData& input1, const TensorData& input2,
376                             const TensorData& input3, const TensorData& output,
377                             ActivationFunctionType activation_type,
378                             const NnApi* nnapi,
379                             const std::string& accelerator_name,
380                             bool allow_fp32_relax_to_fp16 = false)
381       : MultiOpModel(), AcceleratedModel(nnapi, accelerator_name) {
382     SetDelegate(GetDelegate());
383     Init(input1, input2, input3, output, activation_type,
384          allow_fp32_relax_to_fp16);
385   }
386 
input1()387   int input1() { return input1_; }
input2()388   int input2() { return input2_; }
input3()389   int input3() { return input3_; }
390 
GetOutput()391   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
392 
393  protected:
394   int input1_;
395   int input2_;
396   int input3_;
397   int output_;
398 
399  private:
400   // Performs initialization logic shared across all constructors.
Init(const TensorData & input1,const TensorData & input2,const TensorData & input3,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)401   void Init(const TensorData& input1, const TensorData& input2,
402             const TensorData& input3, const TensorData& output,
403             ActivationFunctionType activation_type,
404             bool allow_fp32_relax_to_fp16 = false) {
405     input1_ = AddInput(input1);
406     input2_ = AddInput(input2);
407     input3_ = AddInput(input3);
408     const int add_output = AddInnerTensor<float>(output);
409     output_ = AddOutput(output);
410     AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
411                  CreateAddOptions(builder_, activation_type).Union(),
412                  {input1_, input2_}, {add_output});
413     AddBuiltinOp(BuiltinOperator_SUB, BuiltinOptions_SubOptions,
414                  CreateSubOptions(builder_, activation_type).Union(),
415                  {add_output, input3_}, {output_});
416     BuildInterpreter({GetShape(input1_), GetShape(input2_), GetShape(input3_)},
417                      /*num_threads=*/-1, allow_fp32_relax_to_fp16,
418                      /*apply_delegate=*/true);
419   }
420 };
421 
422 int should_build_model_with_sup_ops_compilation_model_create_count = 0;
423 int should_build_model_with_sup_ops_add_operation_count = 0;
TEST_F(UnsupportedOperationOnDeviceTest,ShouldBuildModelWithOnlyDeviceSupportedOps)424 TEST_F(UnsupportedOperationOnDeviceTest,
425        ShouldBuildModelWithOnlyDeviceSupportedOps) {
426   nnapi_mock_->SetNnapiSupportedDevice("test-device");
427 
428   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
429       [](const ANeuralNetworksModel* model,
430          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
431          bool* supportedOps) -> int {
432         // Returning the first as supported since this will leverage
433         // the assertion on caching.
434         supportedOps[0] = true;
435         supportedOps[1] = false;
436         return ANEURALNETWORKS_NO_ERROR;
437       });
438 
439   nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
440     ++should_build_model_with_sup_ops_compilation_model_create_count;
441     *model = reinterpret_cast<ANeuralNetworksModel*>(1);
442     return ANEURALNETWORKS_NO_ERROR;
443   });
444 
445   nnapi_mock_->StubAddOperationWith(
446       [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
447          uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
448          const uint32_t* outputs) -> int {
449         ++should_build_model_with_sup_ops_add_operation_count;
450         return ANEURALNETWORKS_NO_ERROR;
451       });
452 
453   AddSubOpsAcceleratedModel m(
454       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
455       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
456       ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
457       /*accelerator_name=*/"test-device");
458   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
459   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
460   m.PopulateTensor<float>(m.input1(), input1);
461   m.PopulateTensor<float>(m.input2(), input2);
462   m.PopulateTensor<float>(m.input3(), input2);
463   ASSERT_EQ(m.Invoke(), kTfLiteOk);
464 
465   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
466   ASSERT_EQ(should_build_model_with_sup_ops_compilation_model_create_count, 2)
467       << "Model with unsupported operations has been cached";
468   EXPECT_EQ(should_build_model_with_sup_ops_add_operation_count, 3)
469       << "The second model should contain only one operation";
470 }
471 
TEST_F(UnsupportedOperationOnDeviceTest,ShouldRunOnCpuIfDeviceSupportsNoOps)472 TEST_F(UnsupportedOperationOnDeviceTest, ShouldRunOnCpuIfDeviceSupportsNoOps) {
473   nnapi_mock_->SetNnapiSupportedDevice("test-device");
474 
475   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
476       [](const ANeuralNetworksModel* model,
477          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
478          bool* supportedOps) -> int {
479         std::fill(supportedOps, supportedOps + 2, false);
480         return ANEURALNETWORKS_NO_ERROR;
481       });
482 
483   AddSubOpsAcceleratedModel m(
484       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
485       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
486       ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
487       /*accelerator_name=*/"test-device");
488   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
489   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
490   m.PopulateTensor<float>(m.input1(), input1);
491   m.PopulateTensor<float>(m.input2(), input2);
492   m.PopulateTensor<float>(m.input3(), input2);
493   ASSERT_EQ(m.Invoke(), kTfLiteOk);
494 
495   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 2);
496 }
497 
498 int should_cache_model_compilation_model_create_count = 0;
TEST_F(UnsupportedOperationOnDeviceTest,ShouldCacheModelCompilation)499 TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
500   nnapi_mock_->SetNnapiSupportedDevice("test-device");
501 
502   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
503       [](const ANeuralNetworksModel* model,
504          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
505          bool* supportedOps) -> int {
506         std::fill(supportedOps, supportedOps + 2, true);
507         return ANEURALNETWORKS_NO_ERROR;
508       });
509 
510   nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
511     ++should_cache_model_compilation_model_create_count;
512     *model = reinterpret_cast<ANeuralNetworksModel*>(1);
513     return ANEURALNETWORKS_NO_ERROR;
514   });
515 
516   AddSubOpsAcceleratedModel m(
517       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
518       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
519       ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
520       /*accelerator_name=*/"test-device");
521   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
522   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
523   m.PopulateTensor<float>(m.input1(), input1);
524   m.PopulateTensor<float>(m.input2(), input2);
525   m.PopulateTensor<float>(m.input3(), input2);
526   ASSERT_EQ(m.Invoke(), kTfLiteOk);
527 
528   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
529   EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
530 }
531 
TEST_F(UnsupportedOperationOnDeviceTest,ShouldNotApplySupportedOperationsFilterBeforeAndroidSdk29)532 TEST_F(UnsupportedOperationOnDeviceTest,
533        ShouldNotApplySupportedOperationsFilterBeforeAndroidSdk29) {
534   nnapi_mock_->SetAndroidSdkVersion(28, /*set_unsupported_ops_to_null=*/true);
535   nnapi_mock_->ModelCreateReturns<0>();
536   AddSubOpsAcceleratedModel m(
537       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
538       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
539       ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
540       /*accelerator_name=*/"test-device");
541   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
542   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
543   m.PopulateTensor<float>(m.input1(), input1);
544   m.PopulateTensor<float>(m.input2(), input2);
545   m.PopulateTensor<float>(m.input3(), input2);
546   ASSERT_EQ(m.Invoke(), kTfLiteOk);
547 
548   // Delegation succeded without failures and all nodes have been delegated.
549   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
550 }
551 
552 // This is a model with two ops:
553 //
554 //  input1 ----> HARD_SWISH ---->
555 //                                ADD --> output
556 //  input2 ---------------------->
557 //
558 class HardSwishAddOpsAcceleratedModel : public MultiOpModel,
559                                         public AcceleratedModel {
560  public:
HardSwishAddOpsAcceleratedModel(const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,const NnApi * nnapi,const std::string & accelerator_name,bool allow_fp32_relax_to_fp16=false)561   HardSwishAddOpsAcceleratedModel(const TensorData& input1,
562                                   const TensorData& input2,
563                                   const TensorData& output,
564                                   ActivationFunctionType activation_type,
565                                   const NnApi* nnapi,
566                                   const std::string& accelerator_name,
567                                   bool allow_fp32_relax_to_fp16 = false)
568       : MultiOpModel(), AcceleratedModel(nnapi, accelerator_name) {
569     SetDelegate(GetDelegate());
570     Init(input1, input2, output, activation_type, allow_fp32_relax_to_fp16);
571   }
572 
input1()573   int input1() { return input1_; }
input2()574   int input2() { return input2_; }
575 
GetOutput()576   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
577 
578  protected:
579   int input1_;
580   int input2_;
581   int output_;
582 
583  private:
584   // Performs initialization logic shared across all constructors.
Init(const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)585   void Init(const TensorData& input1, const TensorData& input2,
586             const TensorData& output, ActivationFunctionType activation_type,
587             bool allow_fp32_relax_to_fp16 = false) {
588     input1_ = AddInput(input1);
589     input2_ = AddInput(input2);
590     const int hard_swish_output = AddInnerTensor<float>(output);
591     output_ = AddOutput(output);
592     AddBuiltinOp(BuiltinOperator_HARD_SWISH, BuiltinOptions_HardSwishOptions,
593                  CreateHardSwishOptions(builder_).Union(), {input1_},
594                  {hard_swish_output});
595     AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
596                  CreateAddOptions(builder_, activation_type).Union(),
597                  {input1_, hard_swish_output}, {output_});
598     BuildInterpreter({GetShape(input1_), GetShape(input2_)}, /*num_threads=*/-1,
599                      allow_fp32_relax_to_fp16, /*apply_delegate=*/true);
600   }
601 };
602 
603 struct TfLiteOpMappedToMultipleNnApiOps
604     : ::tflite::delegate::nnapi::NnApiDelegateMockTest {};
605 
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AllCostituentOpsNotSupported)606 TEST_F(TfLiteOpMappedToMultipleNnApiOps, AllCostituentOpsNotSupported) {
607   nnapi_mock_->ModelCreateReturns<0>();
608 
609   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
610       [](const ANeuralNetworksModel* model,
611          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
612          bool* supportedOps) -> int {
613         // HardSwish is mapped to 4 NNAPI ops, none of which supported.
614         std::fill(supportedOps, supportedOps + 4, false);
615         // After that we have the ADD op that is supported.
616         supportedOps[4] = true;
617         return ANEURALNETWORKS_NO_ERROR;
618       });
619 
620   HardSwishAddOpsAcceleratedModel m(
621       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
622       {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
623       nnapi_mock_->GetNnApi(),
624       /*accelerator_name=*/"test-device");
625   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
626   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
627   m.PopulateTensor<float>(m.input1(), input1);
628   m.PopulateTensor<float>(m.input2(), input2);
629   ASSERT_EQ(m.Invoke(), kTfLiteOk);
630 
631   // Delegation succeded without failures and HardSwish has not been delegated
632   // but Add has been correctly delegated.
633   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
634 }
635 
TEST_F(TfLiteOpMappedToMultipleNnApiOps,NotAllConstitutentOpsSupported)636 TEST_F(TfLiteOpMappedToMultipleNnApiOps, NotAllConstitutentOpsSupported) {
637   nnapi_mock_->ModelCreateReturns<0>();
638   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
639       [](const ANeuralNetworksModel* model,
640          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
641          bool* supportedOps) -> int {
642         // HardSwish is mapped to 4 NNAPI ops (the first 4 ones), so we have 5
643         // ops in the NNAPI model.
644         std::fill(supportedOps, supportedOps + 5, true);
645         // One of the NNAPI ops required by HardSwish is not supported.
646         supportedOps[2] = false;
647         return ANEURALNETWORKS_NO_ERROR;
648       });
649 
650   HardSwishAddOpsAcceleratedModel m(
651       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
652       {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
653       nnapi_mock_->GetNnApi(),
654       /*accelerator_name=*/"test-device");
655   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
656   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
657   m.PopulateTensor<float>(m.input1(), input1);
658   m.PopulateTensor<float>(m.input2(), input2);
659   ASSERT_EQ(m.Invoke(), kTfLiteOk);
660 
661   // Delegation succeded without failures. HardSwish has not been delegated
662   // but Add is delegated.
663   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
664 }
665 
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AllConstitutentOpsSupported)666 TEST_F(TfLiteOpMappedToMultipleNnApiOps, AllConstitutentOpsSupported) {
667   nnapi_mock_->ModelCreateReturns<0>();
668   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
669       [](const ANeuralNetworksModel* model,
670          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
671          bool* supportedOps) -> int {
672         // HardSwish is mapped to 4 NNAPI ops (the first 4 ones), so we have 5
673         // ops in the NNAPI model.
674         // All ops are supported by the accelerator.
675         std::fill(supportedOps, supportedOps + 5, true);
676         return ANEURALNETWORKS_NO_ERROR;
677       });
678 
679   HardSwishAddOpsAcceleratedModel m(
680       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
681       {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
682       nnapi_mock_->GetNnApi(),
683       /*accelerator_name=*/"test-device");
684   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
685   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
686   m.PopulateTensor<float>(m.input1(), input1);
687   m.PopulateTensor<float>(m.input2(), input2);
688   ASSERT_EQ(m.Invoke(), kTfLiteOk);
689 
690   // Delegation succeded without failures and all nodes have been delegated.
691   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
692 }
693 
694 class QuantizedWeightsConvolutionOpModel : public SingleOpModel,
695                                            public AcceleratedModel {
696  public:
QuantizedWeightsConvolutionOpModel(const NnApi * nnapi,std::string accelerator_name,const TensorData & input,const TensorData & filter,const TensorData & output,int stride_width=2,int stride_height=2,enum Padding padding=Padding_VALID,enum ActivationFunctionType activation=ActivationFunctionType_NONE,int dilation_width_factor=1,int dilation_height_factor=1,int num_threads=-1,std::initializer_list<uint8_t> filter_data={})697   QuantizedWeightsConvolutionOpModel(
698       const NnApi* nnapi, std::string accelerator_name, const TensorData& input,
699       const TensorData& filter, const TensorData& output, int stride_width = 2,
700       int stride_height = 2, enum Padding padding = Padding_VALID,
701       enum ActivationFunctionType activation = ActivationFunctionType_NONE,
702       int dilation_width_factor = 1, int dilation_height_factor = 1,
703       int num_threads = -1, std::initializer_list<uint8_t> filter_data = {})
704       : SingleOpModel(), AcceleratedModel(nnapi, accelerator_name) {
705     SetDelegate(GetDelegate());
706 
707     input_ = AddInput(input);
708 
709     if (filter_data.size()) {
710       filter_ = AddConstInput(filter, filter_data);
711     } else {
712       filter_ = AddInput(filter);
713     }
714 
715     int bias_size = GetShape(filter_)[0];
716 
717     bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
718 
719     output_ = AddOutput(output);
720 
721     SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
722                  CreateConv2DOptions(
723                      builder_, padding, stride_width, stride_height, activation,
724                      dilation_width_factor, dilation_height_factor)
725                      .Union());
726 
727     BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
728                      num_threads, /*allow_fp32_relax_to_fp16=*/false,
729                      /*apply_delegate=*/true);
730   }
731 
SetInput(std::initializer_list<float> data)732   void SetInput(std::initializer_list<float> data) {
733     PopulateTensor(input_, data);
734   }
735 
SetFilter(std::initializer_list<float> data)736   void SetFilter(std::initializer_list<float> data) {
737     QuantizeAndPopulate<uint8_t>(filter_, data);
738   }
739 
SetBias(std::initializer_list<float> data)740   void SetBias(std::initializer_list<float> data) {
741     PopulateTensor(input_, data);
742   }
743 
GetOutput()744   std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
GetDequantizedOutput()745   std::vector<float> GetDequantizedOutput() {
746     return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
747                                GetScale(output_), GetZeroPoint(output_));
748   }
749 
750  protected:
751   int input_;
752   int filter_;
753   int bias_;
754   int output_;
755 };
756 
757 int quantized_conv2d_model_added_nnapi_ops_count = 0;
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AddedDequantizationsAreAccountedInModelOps)758 TEST_F(TfLiteOpMappedToMultipleNnApiOps,
759        AddedDequantizationsAreAccountedInModelOps) {
760   nnapi_mock_->ModelCreateReturns<0>();
761   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
762       [](const ANeuralNetworksModel* model,
763          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
764          bool* supportedOps) -> int {
765         std::fill(supportedOps,
766                   supportedOps + quantized_conv2d_model_added_nnapi_ops_count,
767                   true);
768         return ANEURALNETWORKS_NO_ERROR;
769       });
770   nnapi_mock_->StubAddOperationWith(
771       [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
772          uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
773          const uint32_t* outputs) -> int {
774         ++quantized_conv2d_model_added_nnapi_ops_count;
775         return ANEURALNETWORKS_NO_ERROR;
776       });
777 
778   QuantizedWeightsConvolutionOpModel m(
779       nnapi_mock_->GetNnApi(),
780       /*accelerator_name=*/"test-device", {TensorType_FLOAT32, {2, 2, 4, 1}},
781       {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64}, {TensorType_FLOAT32, {}});
782   m.SetInput({
783       // First batch
784       1, 1, 1, 1,  // row = 1
785       2, 2, 2, 2,  // row = 2
786       // Second batch
787       1, 2, 3, 4,  // row = 1
788       1, 2, 3, 4,  // row = 2
789   });
790   m.SetFilter({
791       1, 2, 3, 4,    // first 2x2 filter
792       -1, 1, -1, 1,  // second 2x2 filter
793       -1, -1, 1, 1,  // third 2x2 filter
794   });
795   m.SetBias({1, 2, 3});
796 
797   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
798   // When delegating quantized Conv2D, for each quantized inputs a
799   // dequantize operation is added to the model.
800   // In our case 1 Dequantize op for the weights is expected generating
801   // a 2 ops model.
802   EXPECT_EQ(quantized_conv2d_model_added_nnapi_ops_count, 2);
803 }
804 
805 // Model with a chain of no-op (add with zero operations)
806 // interleaved with no-op custom nodes.
807 class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
808  public:
LongIdentityModel(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes,const NnApi * nnapi,const std::string & accelerator_name,int max_nnapi_partitions)809   LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
810                     const std::unordered_set<int>& custom_nodes_indexes,
811                     const NnApi* nnapi, const std::string& accelerator_name,
812                     int max_nnapi_partitions)
813       : MultiOpModel(),
814         AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
815     Init(input_shape, graph_size, custom_nodes_indexes);
816   }
817 
LongIdentityModel(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes,const NnApi * nnapi,int max_nnapi_partitions)818   LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
819                     const std::unordered_set<int>& custom_nodes_indexes,
820                     const NnApi* nnapi, int max_nnapi_partitions)
821       : MultiOpModel(), AcceleratedModel(nnapi, false, max_nnapi_partitions) {
822     Init(input_shape, graph_size, custom_nodes_indexes);
823   }
824 
SetInput(std::vector<float> value)825   void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
826 
CountNnApiPartitions()827   int CountNnApiPartitions() {
828     return std::count_if(
829         std::begin(interpreter_->execution_plan()),
830         std::end(interpreter_->execution_plan()), [this](const int node_index) {
831           return interpreter_->node_and_registration(node_index)
832                      ->first.delegate != nullptr;
833         });
834   }
835 
836  private:
Init(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes)837   void Init(const std::vector<int>& input_shape, int graph_size,
838             const std::unordered_set<int>& custom_nodes_indexes) {
839     SetDelegate(GetDelegate());
840 
841     const TensorData tensor_data{TensorType_FLOAT32, input_shape};
842 
843     input_ = AddInput(tensor_data);
844     zero_input_ = AddInput(tensor_data);
845 
846     std::vector<int> intermediate_outputs(graph_size - 1);
847     std::generate(
848         std::begin(intermediate_outputs), std::end(intermediate_outputs),
849         [this, &tensor_data]() { return AddInnerTensor<float>(tensor_data); });
850 
851     output_ = AddOutput(tensor_data);
852 
853     AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
854                  CreateAddOptions(builder_).Union(), {input_, zero_input_},
855                  {intermediate_outputs[0]});
856 
857     for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
858       if (custom_nodes_indexes.count(i + 1) == 1) {
859         AddCustomOp("custom_no_op", {}, [this]() { return CustomNoOpNode(); },
860                     {intermediate_outputs[i]}, {intermediate_outputs[i + 1]});
861       } else {
862         AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
863                      CreateAddOptions(builder_).Union(),
864                      {intermediate_outputs[i], zero_input_},
865                      {intermediate_outputs[i + 1]});
866       }
867     }
868 
869     AddBuiltinOp(
870         BuiltinOperator_ADD, BuiltinOptions_AddOptions,
871         CreateAddOptions(builder_).Union(),
872         {intermediate_outputs[intermediate_outputs.size() - 1], zero_input_},
873         {output_});
874 
875     BuildInterpreter({GetShape(input_), GetShape(zero_input_)});
876 
877     std::vector<float> zero(GetTensorSize(input_), 0.0);
878     PopulateTensor(zero_input_, zero);
879   }
880 
881   // Return the registration of a custom node simply copying input to output.
CustomNoOpNode()882   TfLiteRegistration* CustomNoOpNode() {
883     static TfLiteRegistration no_op = {
884         .init = [](TfLiteContext* context, const char* buffer,
885                    size_t length) -> void* { return nullptr; },
886 
887         .free = [](TfLiteContext* context, void* buffer) -> void {},
888 
889         .prepare = [](TfLiteContext* context,
890                       TfLiteNode* node) -> TfLiteStatus {
891           if (node->inputs->size != 1 || node->outputs->size != 1) {
892             return kTfLiteError;
893           }
894 
895           return kTfLiteOk;
896         },
897 
898         .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
899           auto input_tensor = context->tensors[node->inputs->data[0]];
900           auto output_tensor = context->tensors[node->outputs->data[0]];
901 
902           std::copy(input_tensor.data.raw,
903                     input_tensor.data.raw + input_tensor.bytes,
904                     output_tensor.data.raw);
905 
906           return kTfLiteOk;
907         },
908 
909         .profiling_string = nullptr,
910         .builtin_code = kTfLiteBuiltinDelegate,
911         .custom_name = "NoOpTestDelegate",
912         .version = 1,
913     };
914 
915     return &no_op;
916   }
917   int input_;
918   int zero_input_;
919   int output_;
920 };
921 
922 class NodeFilter {
923  public:
ConfigureSupportedNodes(int graph_size,const std::unordered_set<int> & unsupported_indexes)924   void ConfigureSupportedNodes(
925       int graph_size, const std::unordered_set<int>& unsupported_indexes) {
926     graph_size_ = graph_size;
927     unsupported_indexes_ = unsupported_indexes;
928   }
929 
SetNodeSupport(bool * supported_ops)930   void SetNodeSupport(bool* supported_ops) {
931     for (int i = 0; i < graph_size_; i++) {
932       supported_ops[i] = (unsupported_indexes_.count(i) == 0);
933     }
934   }
935 
936  private:
937   int graph_size_;
938   std::unordered_set<int> unsupported_indexes_;
939 };
940 
941 // Using the same node filter for all DelegatePartitionLimitTests
942 // because StubGetSupportedOperationsForDevicesWith wants a C function.
DelegatePartitionLimitTestNodeFilter()943 NodeFilter* DelegatePartitionLimitTestNodeFilter() {
944   static NodeFilter* node_filter = new NodeFilter();
945   return node_filter;
946 }
947 
948 class DelegatePartitionLimitTest
949     : public ::tflite::delegate::nnapi::NnApiDelegateMockTest {
950  protected:
951   // Configure the underlying graph to generate a set of nnapi partition
952   // with the sizes specified in nnapi_partition_sizes and the given
953   // input_shape.
Init(int max_nnapi_partitions,const std::vector<int> & nnapi_partition_sizes,const std::vector<int> & input_shape,bool specify_accelerator=true)954   void Init(int max_nnapi_partitions,
955             const std::vector<int>& nnapi_partition_sizes,
956             const std::vector<int>& input_shape,
957             bool specify_accelerator = true) {
958     // The graph will have as number of nodes the sum of nodes in the NNAPI
959     // partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
960     // not supported by NNAPI and will cause the
961     graph_size_ = std::accumulate(std::begin(nnapi_partition_sizes),
962                                   std::end(nnapi_partition_sizes),
963                                   nnapi_partition_sizes.size() - 1);
964 
965     std::unordered_set<int> unsupported_ops_idxs;
966     int partition_node_idx = -1;
967     for (int i = 0; i < nnapi_partition_sizes.size() - 1; i++) {
968       partition_node_idx += nnapi_partition_sizes[i] + 1;
969       unsupported_ops_idxs.insert(partition_node_idx);
970     }
971 
972     if (specify_accelerator) {
973       // Building a model that will contain initially a single partition
974       // and will get then partitioned by checking the operations supported
975       // by the target accelerator.
976       // This because I am not able to know the size of each partition in my
977       // stubbed GetSupportedOperationsForDevices API.
978       DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
979           graph_size_, unsupported_ops_idxs);
980 
981       nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
982           [](const ANeuralNetworksModel* model,
983              const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
984              bool* supported_ops) -> int {
985             DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(
986                 supported_ops);
987             return ANEURALNETWORKS_NO_ERROR;
988           });
989 
990       model_ = std::make_unique<LongIdentityModel>(
991           input_shape, graph_size_,
992           /*custom_nodes_indexes=*/std::unordered_set<int>(),
993           nnapi_mock_->GetNnApi(),
994           /*accelerator_name=*/"test-device", max_nnapi_partitions);
995     } else {
996       // Building a model containing custom nodes that won't be supported
997       // by the delegate and generate the partitions.
998       model_ = std::make_unique<LongIdentityModel>(
999           input_shape, graph_size_, unsupported_ops_idxs,
1000           nnapi_mock_->GetNnApi(), max_nnapi_partitions);
1001     }
1002   }
1003 
1004   std::unique_ptr<LongIdentityModel> model_;
1005 
OriginalGraphSize()1006   int OriginalGraphSize() { return graph_size_; }
1007 
1008  private:
1009   int graph_size_;
1010 };
1011 
TEST_F(DelegatePartitionLimitTest,ShouldDelegateOnePartitionOnly)1012 TEST_F(DelegatePartitionLimitTest, ShouldDelegateOnePartitionOnly) {
1013   Init(/*max_nnapi_partitions=*/1,
1014        /*nnapi_partition_sizes=*/{3, 2},
1015        /*input_shape=*/{1, 2, 2, 1});
1016 
1017   EXPECT_EQ(model_->CountNnApiPartitions(), 1);
1018 }
1019 
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfLimitIsZero)1020 TEST_F(DelegatePartitionLimitTest,
1021        ShouldDelegateAllPossiblePartitionsIfLimitIsZero) {
1022   Init(/*max_nnapi_partitions=*/0,
1023        /*nnapi_partition_sizes=*/{3, 2},
1024        /*input_shape=*/{1, 2, 2, 1});
1025 
1026   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1027 }
1028 
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfLimitIsNegative)1029 TEST_F(DelegatePartitionLimitTest,
1030        ShouldDelegateAllPossiblePartitionsIfLimitIsNegative) {
1031   Init(/*max_nnapi_partitions=*/0,
1032        /*nnapi_partition_sizes=*/{3, 2},
1033        /*input_shape=*/{1, 2, 2, 1});
1034 
1035   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1036 }
1037 
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfBelowLimit)1038 TEST_F(DelegatePartitionLimitTest,
1039        ShouldDelegateAllPossiblePartitionsIfBelowLimit) {
1040   Init(/*max_nnapi_partitions=*/3,
1041        /*nnapi_partition_sizes=*/{3, 2},
1042        /*input_shape=*/{1, 2, 2, 1});
1043 
1044   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1045 }
1046 
TEST_F(DelegatePartitionLimitTest,ShouldDelegatePartitionWithHigherNodeCount)1047 TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
1048   int kLargestModelSize = 3;
1049   Init(/*max_nnapi_partitions=*/1,
1050        /*nnapi_partition_sizes=*/{3, 2},
1051        /*input_shape=*/{1, 2, 2, 1});
1052 
1053   EXPECT_EQ(model_->CountNnApiPartitions(), 1);
1054   EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(),
1055             OriginalGraphSize() - kLargestModelSize);
1056 }
1057 
TEST_F(DelegatePartitionLimitTest,ShouldDelegatePartitionsWithHigherNodeCount)1058 TEST_F(DelegatePartitionLimitTest,
1059        ShouldDelegatePartitionsWithHigherNodeCount) {
1060   int kLargestModelSize = 5;
1061   int kSecondLargestModelSize = 4;
1062   Init(/*max_nnapi_partitions=*/2,
1063        /*nnapi_partition_sizes=*/
1064        {1, kLargestModelSize, 2, kSecondLargestModelSize},
1065        /*input_shape=*/{1, 2, 2, 1});
1066 
1067   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1068   EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
1069 }
1070 
TEST_F(DelegatePartitionLimitTest,ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified)1071 TEST_F(DelegatePartitionLimitTest,
1072        ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified) {
1073   int kLargestModelSize = 5;
1074   int kSecondLargestModelSize = 4;
1075   Init(/*max_nnapi_partitions=*/2,
1076        /*nnapi_partition_sizes=*/
1077        {1, kLargestModelSize, 2, kSecondLargestModelSize},
1078        /*input_shape=*/{1, 2, 2, 1}, /*specify_accelerator=*/false);
1079 
1080   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1081   EXPECT_EQ(
1082       model_->CountOpsExecutedByCpuKernel(),
1083       OriginalGraphSize() - (kLargestModelSize + kSecondLargestModelSize));
1084 }
1085 
1086 }  // namespace
1087 }  // namespace tflite
1088