• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <sys/mman.h>
16 
17 #include <algorithm>
18 #include <array>
19 #include <cstdint>
20 #include <iterator>
21 #include <memory>
22 #include <numeric>
23 #include <ostream>
24 #include <unordered_set>
25 #include <vector>
26 
27 #include <gtest/gtest.h>
28 #include "tensorflow/lite/builtin_ops.h"
29 #include "tensorflow/lite/c/common.h"
30 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
31 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
32 #include "tensorflow/lite/interpreter.h"
33 #include "tensorflow/lite/kernels/test_util.h"
34 #include "tensorflow/lite/model.h"
35 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
36 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
37 
38 namespace tflite {
39 namespace {
40 
41 class FloatAddOpModel : public SingleOpModel {
42  public:
43   FloatAddOpModel() = default;
Init(const NnApi * nnapi,tflite::StatefulNnApiDelegate::Options options,const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)44   void Init(const NnApi* nnapi, tflite::StatefulNnApiDelegate::Options options,
45             const TensorData& input1, const TensorData& input2,
46             const TensorData& output, ActivationFunctionType activation_type,
47             bool allow_fp32_relax_to_fp16 = false) {
48     stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
49     SetDelegate(stateful_delegate_.get());
50 
51     input1_ = AddInput(input1);
52     input2_ = AddInput(input2);
53     output_ = AddOutput(output);
54     SetBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
55                  CreateAddOptions(builder_, activation_type).Union());
56     BuildInterpreter({GetShape(input1_), GetShape(input2_)}, /*num_threads=*/-1,
57                      allow_fp32_relax_to_fp16, /*apply_delegate=*/false);
58     compilation_status_ = ApplyDelegate();
59   }
60 
input1()61   int input1() { return input1_; }
input2()62   int input2() { return input2_; }
63 
GetOutput()64   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
65 
GetCompilationStatus()66   TfLiteStatus GetCompilationStatus() { return compilation_status_; }
67 
68  protected:
69   int input1_;
70   int input2_;
71   int output_;
72 
73  private:
74   std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
75   TfLiteStatus compilation_status_;
76 };
77 
78 struct NnApiDeviceSelectionTest
79     : ::tflite::delegate::nnapi::NnApiDelegateMockTest {
SetUptflite::__anone5dffd370111::NnApiDeviceSelectionTest80   void SetUp() override {
81     ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
82     nnapi_mock_->GetDeviceCountReturnsCount<3>();
83     nnapi_mock_->StubGetDeviceWith(
84         [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int {
85           *device = reinterpret_cast<ANeuralNetworksDevice*>(devIndex + 1);
86           return 0;
87         });
88     nnapi_mock_->StubGetDeviceNameWith(
89         [](const ANeuralNetworksDevice* device, const char** name) -> int {
90           if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
91             *name = "dsp";
92           } else if (device == reinterpret_cast<ANeuralNetworksDevice*>(2)) {
93             *name = "gpu";
94           } else {
95             *name = "nnapi-reference";
96           }
97           return ANEURALNETWORKS_NO_ERROR;
98         });
99     nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
100         [](const ANeuralNetworksModel* model,
101            const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
102            bool* supportedOps) -> int {
103           supportedOps[0] = true;
104           return ANEURALNETWORKS_NO_ERROR;
105         });
106   }
InitWithOptionstflite::__anone5dffd370111::NnApiDeviceSelectionTest107   void InitWithOptions(tflite::StatefulNnApiDelegate::Options options) {
108     m.Init(nnapi_mock_->GetNnApi(), options, {TensorType_FLOAT32, {1, 2, 2, 1}},
109            {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
110            ActivationFunctionType_NONE);
111     m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
112     m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.5});
113   }
114   FloatAddOpModel m;
115 };
116 
TEST_F(NnApiDeviceSelectionTest,DoesntSetDevicesWhenCpuAllowed)117 TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWhenCpuAllowed) {
118   nnapi_mock_->StubCompilationCreateForDevicesWith(
119       [](ANeuralNetworksModel* model,
120          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
121          ANeuralNetworksCompilation** compilation) -> int {
122         EXPECT_TRUE(false) << "Should not call createForDevices";
123         return 1;
124       });
125 
126   tflite::StatefulNnApiDelegate::Options options;
127   options.disallow_nnapi_cpu = false;
128   InitWithOptions(options);
129   m.Invoke();
130   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
131 }
132 
TEST_F(NnApiDeviceSelectionTest,SetsDeviceBasedOnOptions)133 TEST_F(NnApiDeviceSelectionTest, SetsDeviceBasedOnOptions) {
134   nnapi_mock_->CompilationCreateReturns<1>();
135   nnapi_mock_->StubCompilationCreateForDevicesWith(
136       [](ANeuralNetworksModel* model,
137          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
138          ANeuralNetworksCompilation** compilation) -> int {
139         EXPECT_EQ(numDevices, 1);
140         EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
141         if (numDevices != 1 ||
142             devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1)) {
143           return 1;
144         } else {
145           *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
146           return ANEURALNETWORKS_NO_ERROR;
147         }
148       });
149 
150   tflite::StatefulNnApiDelegate::Options options;
151   options.accelerator_name = "dsp";
152   InitWithOptions(options);
153   m.Invoke();
154   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
155 }
156 
TEST_F(NnApiDeviceSelectionTest,DisallowsCPUBasedOnOptions)157 TEST_F(NnApiDeviceSelectionTest, DisallowsCPUBasedOnOptions) {
158   nnapi_mock_->CompilationCreateReturns<1>();
159   nnapi_mock_->StubCompilationCreateForDevicesWith(
160       [](ANeuralNetworksModel* model,
161          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
162          ANeuralNetworksCompilation** compilation) -> int {
163         EXPECT_EQ(numDevices, 2);
164         EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
165         EXPECT_EQ(devices[1], reinterpret_cast<ANeuralNetworksDevice*>(2));
166         if (numDevices != 2 ||
167             devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1) ||
168             devices[1] != reinterpret_cast<ANeuralNetworksDevice*>(2)) {
169           return 1;
170         } else {
171           *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
172           return ANEURALNETWORKS_NO_ERROR;
173         }
174       });
175 
176   tflite::StatefulNnApiDelegate::Options options;
177   options.disallow_nnapi_cpu = true;
178   InitWithOptions(options);
179   m.Invoke();
180   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
181 }
182 
TEST_F(NnApiDeviceSelectionTest,DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled)183 TEST_F(NnApiDeviceSelectionTest,
184        DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled) {
185   // Only nnapi-reference is available on device
186   nnapi_mock_->GetDeviceCountReturnsCount<1>();
187   nnapi_mock_->GetDeviceNameReturnsName("nnapi-reference");
188 
189   tflite::StatefulNnApiDelegate::Options options;
190   options.disallow_nnapi_cpu = false;
191   InitWithOptions(options);
192   m.Invoke();
193   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
194   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
195 }
196 
TEST_F(NnApiDeviceSelectionTest,DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled)197 TEST_F(NnApiDeviceSelectionTest,
198        DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled) {
199   // Only nnapi-reference is available on device
200   nnapi_mock_->GetDeviceCountReturnsCount<1>();
201   nnapi_mock_->GetDeviceNameReturnsName("nnapi-reference");
202 
203   tflite::StatefulNnApiDelegate::Options options;
204   options.disallow_nnapi_cpu = true;
205   InitWithOptions(options);
206   m.Invoke();
207   EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
208   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
209 }
210 
211 struct UnsupportedOperationOnDeviceTest
212     : ::tflite::delegate::nnapi::NnApiDelegateMockTest {};
213 
214 class AcceleratedModel {
215  public:
GetDelegate()216   StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
217 
218  protected:
219   // build a delegate with a target accelerator name.
AcceleratedModel(const NnApi * nnapi,const std::string & accelerator_name,int max_nnapi_partitions=0)220   AcceleratedModel(const NnApi* nnapi, const std::string& accelerator_name,
221                    int max_nnapi_partitions = 0) {
222     StatefulNnApiDelegate::Options options;
223     options.accelerator_name = accelerator_name.c_str();
224     options.max_number_delegated_partitions = max_nnapi_partitions;
225     stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
226   }
227 
228   // build a delegate with no target accelerator name, can disable the NNAPI CPU
229   // fallback implementation using the disallow_nnapi_cpu flag.
AcceleratedModel(const NnApi * nnapi,bool disallow_nnapi_cpu,int max_nnapi_partitions=0)230   AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu,
231                    int max_nnapi_partitions = 0) {
232     StatefulNnApiDelegate::Options options;
233     options.disallow_nnapi_cpu = disallow_nnapi_cpu;
234     options.max_number_delegated_partitions = max_nnapi_partitions;
235     stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
236   }
237 
238  private:
239   std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
240 };
241 
242 class ArgMaxOpModel : public SingleOpModel, public AcceleratedModel {
243  public:
ArgMaxOpModel(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type,const NnApi * nnapi,const char * device_name)244   ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
245                 int axis_value, TensorType output_type, const NnApi* nnapi,
246                 const char* device_name)
247       : SingleOpModel(), AcceleratedModel(nnapi, device_name) {
248     Init(input_shape, input_type, axis_value, output_type);
249   }
250 
ArgMaxOpModel(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type,const NnApi * nnapi,bool disallow_nnapi_cpu)251   ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
252                 int axis_value, TensorType output_type, const NnApi* nnapi,
253                 bool disallow_nnapi_cpu)
254       : SingleOpModel(), AcceleratedModel(nnapi, disallow_nnapi_cpu) {
255     Init(input_shape, input_type, axis_value, output_type);
256   }
257 
input() const258   int input() const { return input_; }
259 
260  protected:
261   int input_;
262   int axis_;
263   int output_;
264 
Init(std::initializer_list<int> input_shape,TensorType input_type,int axis_value,TensorType output_type)265   void Init(std::initializer_list<int> input_shape, TensorType input_type,
266             int axis_value, TensorType output_type) {
267     SetDelegate(GetDelegate());
268     input_ = AddInput(input_type);
269     axis_ = AddConstInput(TensorType_INT32, {axis_value}, {1});
270     output_ = AddOutput(output_type);
271 
272     SetBuiltinOp(BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions,
273                  CreateArgMaxOptions(builder_, output_type).Union());
274     BuildInterpreter({input_shape, {1}});
275   }
276 };
277 
TEST_F(UnsupportedOperationOnDeviceTest,ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice)278 TEST_F(UnsupportedOperationOnDeviceTest,
279        ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice) {
280   nnapi_mock_->SetAndroidSdkVersion(29);
281   nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
282   // Setting this here because I want the delegate not to be applied in the
283   // first case because the feature level is not high enough and not because the
284   // operations are not supported by the device.
285   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
286       [](const ANeuralNetworksModel* model,
287          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
288          bool* supportedOps) -> int {
289         std::fill(supportedOps, supportedOps + 1, true);
290         return ANEURALNETWORKS_NO_ERROR;
291       });
292 
293   ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
294                   TensorType_INT32, nnapi_mock_->GetNnApi(), "test-device");
295   m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
296   m.Invoke();
297 
298   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
299       << "Expected Max not to be delegates since it not supported before NNAPI "
300          "1.2 and device declares to support only NNAPI 1.1.";
301 
302   nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
303 
304   ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
305                    TensorType_INT32, nnapi_mock_->GetNnApi(), "test-device");
306   m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
307   m1.Invoke();
308 
309   EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
310       << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
311 }
312 
TEST_F(UnsupportedOperationOnDeviceTest,ShouldUseDeviceFeatureLevelWhenDisablingCPU)313 TEST_F(UnsupportedOperationOnDeviceTest,
314        ShouldUseDeviceFeatureLevelWhenDisablingCPU) {
315   nnapi_mock_->SetAndroidSdkVersion(29);
316   nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
317   // Setting this here because I want the delegate not to be applied in the
318   // first case because the feature level is not high enough and not because the
319   // operations are not supported by the device.
320   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
321       [](const ANeuralNetworksModel* model,
322          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
323          bool* supportedOps) -> int {
324         std::fill(supportedOps, supportedOps + 1, true);
325         return ANEURALNETWORKS_NO_ERROR;
326       });
327 
328   ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
329                   TensorType_INT32, nnapi_mock_->GetNnApi(),
330                   /*disallow_nnapi_cpu=*/true);
331   m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
332   m.Invoke();
333 
334   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
335       << "Expected Max not to be delegates since it not supported before NNAPI "
336          "1.2 and device declares to support only NNAPI 1.1.";
337 
338   ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
339                    TensorType_INT32, nnapi_mock_->GetNnApi(),
340                    /*disallow_nnapi_cpu=*/false);
341   m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
342   m1.Invoke();
343 
344   EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
345       << "Expected Max op to be delegated since we enabled NNAPI CPU "
346          "implementation.";
347 
348   nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
349 
350   ArgMaxOpModel m2({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
351                    TensorType_INT32, nnapi_mock_->GetNnApi(),
352                    /*disallow_nnapi_cpu=*/true);
353   m2.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
354   m2.Invoke();
355 
356   EXPECT_EQ(m2.CountOpsExecutedByCpuKernel(), 0)
357       << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
358 }
359 
360 // This is a model with two ops:
361 //
362 //  input1 ---->
363 //                ADD --
364 //  input2   -->        |
365 //                       -->
366 //                          SUB --> output
367 //  input3 ---------------->
368 //
369 class AddSubOpsAcceleratedModel : public MultiOpModel, public AcceleratedModel {
370  public:
AddSubOpsAcceleratedModel(const TensorData & input1,const TensorData & input2,const TensorData & input3,const TensorData & output,ActivationFunctionType activation_type,const NnApi * nnapi,const std::string & accelerator_name,bool allow_fp32_relax_to_fp16=false)371   AddSubOpsAcceleratedModel(const TensorData& input1, const TensorData& input2,
372                             const TensorData& input3, const TensorData& output,
373                             ActivationFunctionType activation_type,
374                             const NnApi* nnapi,
375                             const std::string& accelerator_name,
376                             bool allow_fp32_relax_to_fp16 = false)
377       : MultiOpModel(), AcceleratedModel(nnapi, accelerator_name) {
378     SetDelegate(GetDelegate());
379     Init(input1, input2, input3, output, activation_type,
380          allow_fp32_relax_to_fp16);
381   }
382 
input1()383   int input1() { return input1_; }
input2()384   int input2() { return input2_; }
input3()385   int input3() { return input3_; }
386 
GetOutput()387   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
388 
389  protected:
390   int input1_;
391   int input2_;
392   int input3_;
393   int output_;
394 
395  private:
396   // Performs initialization logic shared across all constructors.
Init(const TensorData & input1,const TensorData & input2,const TensorData & input3,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)397   void Init(const TensorData& input1, const TensorData& input2,
398             const TensorData& input3, const TensorData& output,
399             ActivationFunctionType activation_type,
400             bool allow_fp32_relax_to_fp16 = false) {
401     input1_ = AddInput(input1);
402     input2_ = AddInput(input2);
403     input3_ = AddInput(input3);
404     const int add_output = AddInnerTensor<float>(output);
405     output_ = AddOutput(output);
406     AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
407                  CreateAddOptions(builder_, activation_type).Union(),
408                  {input1_, input2_}, {add_output});
409     AddBuiltinOp(BuiltinOperator_SUB, BuiltinOptions_SubOptions,
410                  CreateSubOptions(builder_, activation_type).Union(),
411                  {add_output, input3_}, {output_});
412     BuildInterpreter({GetShape(input1_), GetShape(input2_), GetShape(input3_)},
413                      /*num_threads=*/-1, allow_fp32_relax_to_fp16,
414                      /*apply_delegate=*/true);
415   }
416 };
417 
418 int should_build_model_with_sup_ops_compilation_model_create_count = 0;
419 int should_build_model_with_sup_ops_add_operation_count = 0;
TEST_F(UnsupportedOperationOnDeviceTest,ShouldBuildModelWithOnlyDeviceSupportedOps)420 TEST_F(UnsupportedOperationOnDeviceTest,
421        ShouldBuildModelWithOnlyDeviceSupportedOps) {
422   nnapi_mock_->SetNnapiSupportedDevice("test-device");
423 
424   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
425       [](const ANeuralNetworksModel* model,
426          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
427          bool* supportedOps) -> int {
428         // Returning the first as supported since this will leverage
429         // the assertion on caching.
430         supportedOps[0] = true;
431         supportedOps[1] = false;
432         return ANEURALNETWORKS_NO_ERROR;
433       });
434 
435   nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
436     ++should_build_model_with_sup_ops_compilation_model_create_count;
437     *model = reinterpret_cast<ANeuralNetworksModel*>(1);
438     return ANEURALNETWORKS_NO_ERROR;
439   });
440 
441   nnapi_mock_->StubAddOperationWith(
442       [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
443          uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
444          const uint32_t* outputs) -> int {
445         ++should_build_model_with_sup_ops_add_operation_count;
446         return ANEURALNETWORKS_NO_ERROR;
447       });
448 
449   AddSubOpsAcceleratedModel m(
450       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
451       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
452       ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
453       /*accelerator_name=*/"test-device");
454   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
455   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
456   m.PopulateTensor<float>(m.input1(), input1);
457   m.PopulateTensor<float>(m.input2(), input2);
458   m.PopulateTensor<float>(m.input3(), input2);
459   m.Invoke();
460 
461   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
462   ASSERT_EQ(should_build_model_with_sup_ops_compilation_model_create_count, 2)
463       << "Model with unsupported operations has been cached";
464   EXPECT_EQ(should_build_model_with_sup_ops_add_operation_count, 3)
465       << "The second model should contain only one operation";
466 }
467 
TEST_F(UnsupportedOperationOnDeviceTest,ShouldRunOnCpuIfDeviceSupportsNoOps)468 TEST_F(UnsupportedOperationOnDeviceTest, ShouldRunOnCpuIfDeviceSupportsNoOps) {
469   nnapi_mock_->SetNnapiSupportedDevice("test-device");
470 
471   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
472       [](const ANeuralNetworksModel* model,
473          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
474          bool* supportedOps) -> int {
475         std::fill(supportedOps, supportedOps + 2, false);
476         return ANEURALNETWORKS_NO_ERROR;
477       });
478 
479   AddSubOpsAcceleratedModel m(
480       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
481       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
482       ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
483       /*accelerator_name=*/"test-device");
484   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
485   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
486   m.PopulateTensor<float>(m.input1(), input1);
487   m.PopulateTensor<float>(m.input2(), input2);
488   m.PopulateTensor<float>(m.input3(), input2);
489   m.Invoke();
490 
491   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 2);
492 }
493 
494 int should_cache_model_compilation_model_create_count = 0;
TEST_F(UnsupportedOperationOnDeviceTest,ShouldCacheModelCompilation)495 TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
496   nnapi_mock_->SetNnapiSupportedDevice("test-device");
497 
498   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
499       [](const ANeuralNetworksModel* model,
500          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
501          bool* supportedOps) -> int {
502         std::fill(supportedOps, supportedOps + 2, true);
503         return ANEURALNETWORKS_NO_ERROR;
504       });
505 
506   nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
507     ++should_cache_model_compilation_model_create_count;
508     *model = reinterpret_cast<ANeuralNetworksModel*>(1);
509     return ANEURALNETWORKS_NO_ERROR;
510   });
511 
512   AddSubOpsAcceleratedModel m(
513       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
514       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
515       ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
516       /*accelerator_name=*/"test-device");
517   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
518   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
519   m.PopulateTensor<float>(m.input1(), input1);
520   m.PopulateTensor<float>(m.input2(), input2);
521   m.PopulateTensor<float>(m.input3(), input2);
522   m.Invoke();
523 
524   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
525   EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
526 }
527 
TEST_F(UnsupportedOperationOnDeviceTest,ShouldNotApplySupportedOperationsFilterBeforeAndroidSdk29)528 TEST_F(UnsupportedOperationOnDeviceTest,
529        ShouldNotApplySupportedOperationsFilterBeforeAndroidSdk29) {
530   nnapi_mock_->SetAndroidSdkVersion(28, /*set_unsupported_ops_to_null=*/true);
531   nnapi_mock_->ModelCreateReturns<0>();
532   AddSubOpsAcceleratedModel m(
533       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
534       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
535       ActivationFunctionType_NONE, nnapi_mock_->GetNnApi(),
536       /*accelerator_name=*/"test-device");
537   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
538   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
539   m.PopulateTensor<float>(m.input1(), input1);
540   m.PopulateTensor<float>(m.input2(), input2);
541   m.PopulateTensor<float>(m.input3(), input2);
542   m.Invoke();
543 
544   // Delegation succeded without failures and all nodes have been delegated.
545   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
546 }
547 
548 // This is a model with two ops:
549 //
550 //  input1 ----> HARD_SWISH ---->
551 //                                ADD --> output
552 //  input2 ---------------------->
553 //
554 class HardSwishAddOpsAcceleratedModel : public MultiOpModel,
555                                         public AcceleratedModel {
556  public:
HardSwishAddOpsAcceleratedModel(const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,const NnApi * nnapi,const std::string & accelerator_name,bool allow_fp32_relax_to_fp16=false)557   HardSwishAddOpsAcceleratedModel(const TensorData& input1,
558                                   const TensorData& input2,
559                                   const TensorData& output,
560                                   ActivationFunctionType activation_type,
561                                   const NnApi* nnapi,
562                                   const std::string& accelerator_name,
563                                   bool allow_fp32_relax_to_fp16 = false)
564       : MultiOpModel(), AcceleratedModel(nnapi, accelerator_name) {
565     SetDelegate(GetDelegate());
566     Init(input1, input2, output, activation_type, allow_fp32_relax_to_fp16);
567   }
568 
input1()569   int input1() { return input1_; }
input2()570   int input2() { return input2_; }
571 
GetOutput()572   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
573 
574  protected:
575   int input1_;
576   int input2_;
577   int output_;
578 
579  private:
580   // Performs initialization logic shared across all constructors.
Init(const TensorData & input1,const TensorData & input2,const TensorData & output,ActivationFunctionType activation_type,bool allow_fp32_relax_to_fp16=false)581   void Init(const TensorData& input1, const TensorData& input2,
582             const TensorData& output, ActivationFunctionType activation_type,
583             bool allow_fp32_relax_to_fp16 = false) {
584     input1_ = AddInput(input1);
585     input2_ = AddInput(input2);
586     const int hard_swish_output = AddInnerTensor<float>(output);
587     output_ = AddOutput(output);
588     AddBuiltinOp(BuiltinOperator_HARD_SWISH, BuiltinOptions_HardSwishOptions,
589                  CreateHardSwishOptions(builder_).Union(), {input1_},
590                  {hard_swish_output});
591     AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
592                  CreateAddOptions(builder_, activation_type).Union(),
593                  {input1_, hard_swish_output}, {output_});
594     BuildInterpreter({GetShape(input1_), GetShape(input2_)}, /*num_threads=*/-1,
595                      allow_fp32_relax_to_fp16, /*apply_delegate=*/true);
596   }
597 };
598 
599 struct TfLiteOpMappedToMultipleNnApiOps
600     : ::tflite::delegate::nnapi::NnApiDelegateMockTest {};
601 
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AllCostituentOpsNotSupported)602 TEST_F(TfLiteOpMappedToMultipleNnApiOps, AllCostituentOpsNotSupported) {
603   nnapi_mock_->ModelCreateReturns<0>();
604 
605   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
606       [](const ANeuralNetworksModel* model,
607          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
608          bool* supportedOps) -> int {
609         // HardSwish is mapped to 4 NNAPI ops, none of which supported.
610         std::fill(supportedOps, supportedOps + 4, false);
611         // After that we have the ADD op that is supported.
612         supportedOps[4] = true;
613         return ANEURALNETWORKS_NO_ERROR;
614       });
615 
616   HardSwishAddOpsAcceleratedModel m(
617       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
618       {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
619       nnapi_mock_->GetNnApi(),
620       /*accelerator_name=*/"test-device");
621   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
622   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
623   m.PopulateTensor<float>(m.input1(), input1);
624   m.PopulateTensor<float>(m.input2(), input2);
625   m.Invoke();
626 
627   // Delegation succeded without failures and HardSwish has not been delegated
628   // but Add has been correctly delegated.
629   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
630 }
631 
TEST_F(TfLiteOpMappedToMultipleNnApiOps,NotAllConstitutentOpsSupported)632 TEST_F(TfLiteOpMappedToMultipleNnApiOps, NotAllConstitutentOpsSupported) {
633   nnapi_mock_->ModelCreateReturns<0>();
634   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
635       [](const ANeuralNetworksModel* model,
636          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
637          bool* supportedOps) -> int {
638         // HardSwish is mapped to 4 NNAPI ops (the first 4 ones), so we have 5
639         // ops in the NNAPI model.
640         std::fill(supportedOps, supportedOps + 5, true);
641         // One of the NNAPI ops required by HardSwish is not supported.
642         supportedOps[2] = false;
643         return ANEURALNETWORKS_NO_ERROR;
644       });
645 
646   HardSwishAddOpsAcceleratedModel m(
647       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
648       {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
649       nnapi_mock_->GetNnApi(),
650       /*accelerator_name=*/"test-device");
651   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
652   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
653   m.PopulateTensor<float>(m.input1(), input1);
654   m.PopulateTensor<float>(m.input2(), input2);
655   m.Invoke();
656 
657   // Delegation succeded without failures. HardSwish has not been delegated
658   // but Add is delegated.
659   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
660 }
661 
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AllConstitutentOpsSupported)662 TEST_F(TfLiteOpMappedToMultipleNnApiOps, AllConstitutentOpsSupported) {
663   nnapi_mock_->ModelCreateReturns<0>();
664   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
665       [](const ANeuralNetworksModel* model,
666          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
667          bool* supportedOps) -> int {
668         // HardSwish is mapped to 4 NNAPI ops (the first 4 ones), so we have 5
669         // ops in the NNAPI model.
670         // All ops are supported by the accelerator.
671         std::fill(supportedOps, supportedOps + 5, true);
672         return ANEURALNETWORKS_NO_ERROR;
673       });
674 
675   HardSwishAddOpsAcceleratedModel m(
676       {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
677       {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE,
678       nnapi_mock_->GetNnApi(),
679       /*accelerator_name=*/"test-device");
680   std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
681   std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
682   m.PopulateTensor<float>(m.input1(), input1);
683   m.PopulateTensor<float>(m.input2(), input2);
684   m.Invoke();
685 
686   // Delegation succeded without failures and all nodes have been delegated.
687   ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
688 }
689 
690 class QuantizedWeightsConvolutionOpModel : public SingleOpModel,
691                                            public AcceleratedModel {
692  public:
QuantizedWeightsConvolutionOpModel(const NnApi * nnapi,std::string accelerator_name,const TensorData & input,const TensorData & filter,const TensorData & output,int stride_width=2,int stride_height=2,enum Padding padding=Padding_VALID,enum ActivationFunctionType activation=ActivationFunctionType_NONE,int dilation_width_factor=1,int dilation_height_factor=1,int num_threads=-1,std::initializer_list<uint8_t> filter_data={})693   QuantizedWeightsConvolutionOpModel(
694       const NnApi* nnapi, std::string accelerator_name, const TensorData& input,
695       const TensorData& filter, const TensorData& output, int stride_width = 2,
696       int stride_height = 2, enum Padding padding = Padding_VALID,
697       enum ActivationFunctionType activation = ActivationFunctionType_NONE,
698       int dilation_width_factor = 1, int dilation_height_factor = 1,
699       int num_threads = -1, std::initializer_list<uint8_t> filter_data = {})
700       : SingleOpModel(), AcceleratedModel(nnapi, accelerator_name) {
701     SetDelegate(GetDelegate());
702 
703     input_ = AddInput(input);
704 
705     if (filter_data.size()) {
706       filter_ = AddConstInput(filter, filter_data);
707     } else {
708       filter_ = AddInput(filter);
709     }
710 
711     int bias_size = GetShape(filter_)[0];
712 
713     bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
714 
715     output_ = AddOutput(output);
716 
717     SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
718                  CreateConv2DOptions(
719                      builder_, padding, stride_width, stride_height, activation,
720                      dilation_width_factor, dilation_height_factor)
721                      .Union());
722 
723     BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
724                      num_threads, /*allow_fp32_relax_to_fp16=*/false,
725                      /*apply_delegate=*/true);
726   }
727 
SetInput(std::initializer_list<float> data)728   void SetInput(std::initializer_list<float> data) {
729     PopulateTensor(input_, data);
730   }
731 
SetFilter(std::initializer_list<float> data)732   void SetFilter(std::initializer_list<float> data) {
733     QuantizeAndPopulate<uint8_t>(filter_, data);
734   }
735 
SetBias(std::initializer_list<float> data)736   void SetBias(std::initializer_list<float> data) {
737     PopulateTensor(input_, data);
738   }
739 
GetOutput()740   std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
GetDequantizedOutput()741   std::vector<float> GetDequantizedOutput() {
742     return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
743                                GetScale(output_), GetZeroPoint(output_));
744   }
745 
746  protected:
747   int input_;
748   int filter_;
749   int bias_;
750   int output_;
751 };
752 
753 int quantized_conv2d_model_added_nnapi_ops_count = 0;
TEST_F(TfLiteOpMappedToMultipleNnApiOps,AddedDequantizationsAreAccountedInModelOps)754 TEST_F(TfLiteOpMappedToMultipleNnApiOps,
755        AddedDequantizationsAreAccountedInModelOps) {
756   nnapi_mock_->ModelCreateReturns<0>();
757   nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
758       [](const ANeuralNetworksModel* model,
759          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
760          bool* supportedOps) -> int {
761         std::fill(supportedOps,
762                   supportedOps + quantized_conv2d_model_added_nnapi_ops_count,
763                   true);
764         return ANEURALNETWORKS_NO_ERROR;
765       });
766   nnapi_mock_->StubAddOperationWith(
767       [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
768          uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
769          const uint32_t* outputs) -> int {
770         ++quantized_conv2d_model_added_nnapi_ops_count;
771         return ANEURALNETWORKS_NO_ERROR;
772       });
773 
774   QuantizedWeightsConvolutionOpModel m(
775       nnapi_mock_->GetNnApi(),
776       /*accelerator_name=*/"test-device", {TensorType_FLOAT32, {2, 2, 4, 1}},
777       {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64}, {TensorType_FLOAT32, {}});
778   m.SetInput({
779       // First batch
780       1, 1, 1, 1,  // row = 1
781       2, 2, 2, 2,  // row = 2
782       // Second batch
783       1, 2, 3, 4,  // row = 1
784       1, 2, 3, 4,  // row = 2
785   });
786   m.SetFilter({
787       1, 2, 3, 4,    // first 2x2 filter
788       -1, 1, -1, 1,  // second 2x2 filter
789       -1, -1, 1, 1,  // third 2x2 filter
790   });
791   m.SetBias({1, 2, 3});
792 
793   EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
794   // When delegating quantized Conv2D, for each quantized inputs a
795   // dequantize operation is added to the model.
796   // In our case 1 Dequantize op for the weights is expected generating
797   // a 2 ops model.
798   EXPECT_EQ(quantized_conv2d_model_added_nnapi_ops_count, 2);
799 }
800 
801 // Model with a chain of no-op (add with zero operations)
802 // interleaved with no-op custom nodes.
803 class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
804  public:
LongIdentityModel(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes,const NnApi * nnapi,const std::string & accelerator_name,int max_nnapi_partitions)805   LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
806                     const std::unordered_set<int>& custom_nodes_indexes,
807                     const NnApi* nnapi, const std::string& accelerator_name,
808                     int max_nnapi_partitions)
809       : MultiOpModel(),
810         AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
811     Init(input_shape, graph_size, custom_nodes_indexes);
812   }
813 
LongIdentityModel(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes,const NnApi * nnapi,int max_nnapi_partitions)814   LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
815                     const std::unordered_set<int>& custom_nodes_indexes,
816                     const NnApi* nnapi, int max_nnapi_partitions)
817       : MultiOpModel(), AcceleratedModel(nnapi, false, max_nnapi_partitions) {
818     Init(input_shape, graph_size, custom_nodes_indexes);
819   }
820 
SetInput(std::vector<float> value)821   void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
822 
CountNnApiPartitions()823   int CountNnApiPartitions() {
824     return std::count_if(
825         std::begin(interpreter_->execution_plan()),
826         std::end(interpreter_->execution_plan()), [this](const int node_index) {
827           return interpreter_->node_and_registration(node_index)
828                      ->first.delegate != nullptr;
829         });
830   }
831 
832  private:
Init(const std::vector<int> & input_shape,int graph_size,const std::unordered_set<int> & custom_nodes_indexes)833   void Init(const std::vector<int>& input_shape, int graph_size,
834             const std::unordered_set<int>& custom_nodes_indexes) {
835     SetDelegate(GetDelegate());
836 
837     const TensorData tensor_data{TensorType_FLOAT32, input_shape};
838 
839     input_ = AddInput(tensor_data);
840     zero_input_ = AddInput(tensor_data);
841 
842     std::vector<int> intermediate_outputs(graph_size - 1);
843     std::generate(
844         std::begin(intermediate_outputs), std::end(intermediate_outputs),
845         [this, &tensor_data]() { return AddInnerTensor<float>(tensor_data); });
846 
847     output_ = AddOutput(tensor_data);
848 
849     AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
850                  CreateAddOptions(builder_).Union(), {input_, zero_input_},
851                  {intermediate_outputs[0]});
852 
853     for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
854       if (custom_nodes_indexes.count(i + 1) == 1) {
855         AddCustomOp("custom_no_op", {}, [this]() { return CustomNoOpNode(); },
856                     {intermediate_outputs[i]}, {intermediate_outputs[i + 1]});
857       } else {
858         AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
859                      CreateAddOptions(builder_).Union(),
860                      {intermediate_outputs[i], zero_input_},
861                      {intermediate_outputs[i + 1]});
862       }
863     }
864 
865     AddBuiltinOp(
866         BuiltinOperator_ADD, BuiltinOptions_AddOptions,
867         CreateAddOptions(builder_).Union(),
868         {intermediate_outputs[intermediate_outputs.size() - 1], zero_input_},
869         {output_});
870 
871     BuildInterpreter({GetShape(input_), GetShape(zero_input_)});
872 
873     std::vector<float> zero(GetTensorSize(input_), 0.0);
874     PopulateTensor(zero_input_, zero);
875   }
876 
877   // Return the registration of a custom node simply copying input to output.
CustomNoOpNode()878   TfLiteRegistration* CustomNoOpNode() {
879     static TfLiteRegistration no_op = {
880         .init = [](TfLiteContext* context, const char* buffer,
881                    size_t length) -> void* { return nullptr; },
882 
883         .free = [](TfLiteContext* context, void* buffer) -> void {},
884 
885         .prepare = [](TfLiteContext* context,
886                       TfLiteNode* node) -> TfLiteStatus {
887           if (node->inputs->size != 1 || node->outputs->size != 1) {
888             return kTfLiteError;
889           }
890 
891           return kTfLiteOk;
892         },
893 
894         .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
895           auto input_tensor = context->tensors[node->inputs->data[0]];
896           auto output_tensor = context->tensors[node->outputs->data[0]];
897 
898           std::copy(input_tensor.data.raw,
899                     input_tensor.data.raw + input_tensor.bytes,
900                     output_tensor.data.raw);
901 
902           return kTfLiteOk;
903         },
904 
905         .profiling_string = nullptr,
906         .builtin_code = kTfLiteBuiltinDelegate,
907         .custom_name = "NoOpTestDelegate",
908         .version = 1,
909     };
910 
911     return &no_op;
912   }
913   int input_;
914   int zero_input_;
915   int output_;
916 };
917 
918 class NodeFilter {
919  public:
ConfigureSupportedNodes(int graph_size,const std::unordered_set<int> & unsupported_indexes)920   void ConfigureSupportedNodes(
921       int graph_size, const std::unordered_set<int>& unsupported_indexes) {
922     graph_size_ = graph_size;
923     unsupported_indexes_ = unsupported_indexes;
924   }
925 
SetNodeSupport(bool * supported_ops)926   void SetNodeSupport(bool* supported_ops) {
927     for (int i = 0; i < graph_size_; i++) {
928       supported_ops[i] = (unsupported_indexes_.count(i) == 0);
929     }
930   }
931 
932  private:
933   int graph_size_;
934   std::unordered_set<int> unsupported_indexes_;
935 };
936 
937 // Using the same node filter for all DelegatePartitionLimitTests
938 // because StubGetSupportedOperationsForDevicesWith wants a C function.
DelegatePartitionLimitTestNodeFilter()939 NodeFilter* DelegatePartitionLimitTestNodeFilter() {
940   static NodeFilter* node_filter = new NodeFilter();
941   return node_filter;
942 }
943 
944 class DelegatePartitionLimitTest
945     : public ::tflite::delegate::nnapi::NnApiDelegateMockTest {
946  protected:
947   // Configure the underlying graph to generate a set of nnapi partition
948   // with the sizes specified in nnapi_partition_sizes and the given
949   // input_shape.
Init(int max_nnapi_partitions,const std::vector<int> & nnapi_partition_sizes,const std::vector<int> & input_shape,bool specify_accelerator=true)950   void Init(int max_nnapi_partitions,
951             const std::vector<int>& nnapi_partition_sizes,
952             const std::vector<int>& input_shape,
953             bool specify_accelerator = true) {
954     // The graph will have as number of nodes the sum of nodes in the NNAPI
955     // partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
956     // not supported by NNAPI and will cause the
957     graph_size_ = std::accumulate(std::begin(nnapi_partition_sizes),
958                                   std::end(nnapi_partition_sizes),
959                                   nnapi_partition_sizes.size() - 1);
960 
961     std::unordered_set<int> unsupported_ops_idxs;
962     int partition_node_idx = -1;
963     for (int i = 0; i < nnapi_partition_sizes.size() - 1; i++) {
964       partition_node_idx += nnapi_partition_sizes[i] + 1;
965       unsupported_ops_idxs.insert(partition_node_idx);
966     }
967 
968     if (specify_accelerator) {
969       // Building a model that will contain initially a single partition
970       // and will get then partitioned by checking the operations supported
971       // by the target accelerator.
972       // This because I am not able to know the size of each partition in my
973       // stubbed GetSupportedOperationsForDevices API.
974       DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
975           graph_size_, unsupported_ops_idxs);
976 
977       nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
978           [](const ANeuralNetworksModel* model,
979              const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
980              bool* supported_ops) -> int {
981             DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(
982                 supported_ops);
983             return ANEURALNETWORKS_NO_ERROR;
984           });
985 
986       model_ = std::make_unique<LongIdentityModel>(
987           input_shape, graph_size_,
988           /*custom_nodes_indexes=*/std::unordered_set<int>(),
989           nnapi_mock_->GetNnApi(),
990           /*accelerator_name=*/"test-device", max_nnapi_partitions);
991     } else {
992       // Building a model containing custom nodes that won't be supported
993       // by the delegate and generate the partitions.
994       model_ = std::make_unique<LongIdentityModel>(
995           input_shape, graph_size_, unsupported_ops_idxs,
996           nnapi_mock_->GetNnApi(), max_nnapi_partitions);
997     }
998   }
999 
1000   std::unique_ptr<LongIdentityModel> model_;
1001 
OriginalGraphSize()1002   int OriginalGraphSize() { return graph_size_; }
1003 
1004  private:
1005   int graph_size_;
1006 };
1007 
TEST_F(DelegatePartitionLimitTest,ShouldDelegateOnePartitionOnly)1008 TEST_F(DelegatePartitionLimitTest, ShouldDelegateOnePartitionOnly) {
1009   Init(/*max_nnapi_partitions=*/1,
1010        /*nnapi_partition_sizes=*/{3, 2},
1011        /*input_shape=*/{1, 2, 2, 1});
1012 
1013   EXPECT_EQ(model_->CountNnApiPartitions(), 1);
1014 }
1015 
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfLimitIsZero)1016 TEST_F(DelegatePartitionLimitTest,
1017        ShouldDelegateAllPossiblePartitionsIfLimitIsZero) {
1018   Init(/*max_nnapi_partitions=*/0,
1019        /*nnapi_partition_sizes=*/{3, 2},
1020        /*input_shape=*/{1, 2, 2, 1});
1021 
1022   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1023 }
1024 
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfLimitIsNegative)1025 TEST_F(DelegatePartitionLimitTest,
1026        ShouldDelegateAllPossiblePartitionsIfLimitIsNegative) {
1027   Init(/*max_nnapi_partitions=*/0,
1028        /*nnapi_partition_sizes=*/{3, 2},
1029        /*input_shape=*/{1, 2, 2, 1});
1030 
1031   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1032 }
1033 
TEST_F(DelegatePartitionLimitTest,ShouldDelegateAllPossiblePartitionsIfBelowLimit)1034 TEST_F(DelegatePartitionLimitTest,
1035        ShouldDelegateAllPossiblePartitionsIfBelowLimit) {
1036   Init(/*max_nnapi_partitions=*/3,
1037        /*nnapi_partition_sizes=*/{3, 2},
1038        /*input_shape=*/{1, 2, 2, 1});
1039 
1040   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1041 }
1042 
TEST_F(DelegatePartitionLimitTest,ShouldDelegatePartitionWithHigherNodeCount)1043 TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
1044   int kLargestModelSize = 3;
1045   Init(/*max_nnapi_partitions=*/1,
1046        /*nnapi_partition_sizes=*/{3, 2},
1047        /*input_shape=*/{1, 2, 2, 1});
1048 
1049   EXPECT_EQ(model_->CountNnApiPartitions(), 1);
1050   EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(),
1051             OriginalGraphSize() - kLargestModelSize);
1052 }
1053 
TEST_F(DelegatePartitionLimitTest,ShouldDelegatePartitionsWithHigherNodeCount)1054 TEST_F(DelegatePartitionLimitTest,
1055        ShouldDelegatePartitionsWithHigherNodeCount) {
1056   int kLargestModelSize = 5;
1057   int kSecondLargestModelSize = 4;
1058   Init(/*max_nnapi_partitions=*/2,
1059        /*nnapi_partition_sizes=*/
1060        {1, kLargestModelSize, 2, kSecondLargestModelSize},
1061        /*input_shape=*/{1, 2, 2, 1});
1062 
1063   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1064   EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
1065 }
1066 
TEST_F(DelegatePartitionLimitTest,ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified)1067 TEST_F(DelegatePartitionLimitTest,
1068        ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified) {
1069   int kLargestModelSize = 5;
1070   int kSecondLargestModelSize = 4;
1071   Init(/*max_nnapi_partitions=*/2,
1072        /*nnapi_partition_sizes=*/
1073        {1, kLargestModelSize, 2, kSecondLargestModelSize},
1074        /*input_shape=*/{1, 2, 2, 1}, /*specify_accelerator=*/false);
1075 
1076   EXPECT_EQ(model_->CountNnApiPartitions(), 2);
1077   EXPECT_EQ(
1078       model_->CountOpsExecutedByCpuKernel(),
1079       OriginalGraphSize() - (kLargestModelSize + kSecondLargestModelSize));
1080 }
1081 
1082 }  // namespace
1083 }  // namespace tflite
1084