• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <stddef.h>
16 
17 #include <cstdint>
18 #include <initializer_list>
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <vector>
23 
24 #include <gmock/gmock.h>
25 #include <gtest/gtest.h>
26 #include "absl/memory/memory.h"
27 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
28 #include "tensorflow/lite/core/api/op_resolver.h"
29 #include "tensorflow/lite/interpreter.h"
30 #include "tensorflow/lite/kernels/internal/test_util.h"
31 #include "tensorflow/lite/kernels/test_util.h"
32 #include "tensorflow/lite/schema/schema_generated.h"
33 #include "tensorflow/lite/string_type.h"
34 
35 namespace tflite {
36 
37 namespace ops {
38 namespace builtin {
39 
40 TfLiteRegistration* Register_DEPTHWISE_CONV_2D_UINT8();
41 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF();
42 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT();
43 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT();
44 
45 }  // namespace builtin
46 }  // namespace ops
47 
48 namespace {
49 
50 using ::testing::ElementsAreArray;
51 
52 class BaseDepthwiseConvolutionOpModel : public SingleOpModel {
53  public:
BaseDepthwiseConvolutionOpModel(TfLiteRegistration * registration,const TensorData & input,const TensorData & filter,const TensorData & output,Padding padding_type,int dilation_factor=1,int stride_width=1,int stride_height=1,ActivationFunctionType fused_activation_function=ActivationFunctionType_NONE)54   BaseDepthwiseConvolutionOpModel(
55       TfLiteRegistration* registration, const TensorData& input,
56       const TensorData& filter, const TensorData& output, Padding padding_type,
57       int dilation_factor = 1, int stride_width = 1, int stride_height = 1,
58       ActivationFunctionType fused_activation_function =
59           ActivationFunctionType_NONE) {
60     input_ = AddInput(input);
61     filter_ = AddInput(filter);
62 
63     int bias_size = GetShape(filter_)[3];
64     if (input.type == TensorType_FLOAT32) {
65       bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
66     } else {
67       // This is a quantized version. The scale of 'bias' depends on the scales
68       // of input and filter. Supposedly this is correctly set during quantized
69       // training.
70       if (filter.per_channel_quantization) {
71         // per channel quantization.
72         std::vector<float> bias_scale(
73             filter.per_channel_quantization_scales.size());
74         std::vector<int64_t> bias_zero_points(
75             filter.per_channel_quantization_scales.size());
76         for (size_t i = 0; i < filter.per_channel_quantization_scales.size();
77              ++i) {
78           bias_scale[i] =
79               input.scale * filter.per_channel_quantization_scales[i];
80           bias_zero_points[i] = 0;
81         }
82         tflite::TensorType bias_type = TensorType_INT32;
83         if (input.type == TensorType_INT16) {
84           bias_type = TensorType_INT64;
85         }
86         TensorData bias{bias_type,
87                         {bias_size},
88                         /*min=*/0,
89                         /*max=*/0,
90                         /*scale=*/0,
91                         /*zero_point=*/0,
92                         true,
93                         /*per_channel_quantization_scales=*/bias_scale,
94                         /*per_channel_quantization_offsets=*/bias_zero_points,
95                         /*channel_index==*/0};
96         bias_ = AddInput(bias);
97       } else {
98         // per tensor quantization.
99         auto bias_scale = GetScale(input_) * GetScale(filter_);
100         TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale};
101         bias_ = AddInput(bias);
102       }
103     }
104 
105     output_ = AddOutput(output);
106     // The CPU kernel now ignores `depthwise_multiplier`. However delegates
107     // like NNAPI still relies on the attribute.
108     int input_depth = GetShape(input_)[3];
109     int output_depth = GetShape(filter_)[3];
110     int depth_mul = output_depth / input_depth;
111 
112     SetBuiltinOp(
113         BuiltinOperator_DEPTHWISE_CONV_2D,
114         BuiltinOptions_DepthwiseConv2DOptions,
115         CreateDepthwiseConv2DOptions(
116             builder_, padding_type, stride_width, stride_height, depth_mul,
117             fused_activation_function, dilation_factor, dilation_factor)
118             .Union());
119 
120     resolver_ = std::make_unique<SingleOpResolver>(
121         BuiltinOperator_DEPTHWISE_CONV_2D, registration);
122 
123     BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)});
124   }
125 
126  protected:
127   int input_;
128   int filter_;
129   int bias_;
130   int output_;
131 };
132 
133 class DepthwiseConvolutionOpModel : public BaseDepthwiseConvolutionOpModel {
134  public:
135   using BaseDepthwiseConvolutionOpModel::BaseDepthwiseConvolutionOpModel;
136 
SetFilter(std::initializer_list<float> f)137   void SetFilter(std::initializer_list<float> f) { PopulateTensor(filter_, f); }
138 
SetBias(std::initializer_list<float> f)139   void SetBias(std::initializer_list<float> f) { PopulateTensor(bias_, f); }
140 
SetInput(std::initializer_list<float> data)141   void SetInput(std::initializer_list<float> data) {
142     PopulateTensor(input_, data);
143   }
144 
GetOutput()145   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
146 };
147 
148 const auto kKernelMap = new std::map<string, TfLiteRegistration*>({
149     {"Reference", ops::builtin::Register_DEPTHWISE_CONVOLUTION_REF()},
150     {"GenericOptimized",
151      ops::builtin::Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT()},
152     {"NeonOptimized", ops::builtin::Register_DEPTHWISE_CONVOLUTION_NEON_OPT()},
153 });
154 
155 class DepthwiseConvolutionOpTest : public SingleOpTest {
156  protected:
GetKernelMap()157   const std::map<string, TfLiteRegistration*>& GetKernelMap() override {
158     return *kKernelMap;
159   }
160 };
161 
TEST_P(DepthwiseConvolutionOpTest,ActivationReluTest)162 TEST_P(DepthwiseConvolutionOpTest, ActivationReluTest) {
163   DepthwiseConvolutionOpModel m(
164       GetRegistration(), {TensorType_FLOAT32, {1, 3, 2, 2}},
165       {TensorType_FLOAT32, {1, 2, 2, 4}}, {TensorType_FLOAT32, {}},
166       Padding_VALID,
167       /*dilation_factor*/ 1,
168       /*stride_width*/ 1,
169       /*stride_height*/ 1,
170       /*ActivationFunctionType*/ ActivationFunctionType_RELU);
171 
172   m.SetInput({
173       1, 2, 7, 8,    // column 1
174       3, 4, 9, 10,   // column 2
175       5, 6, 11, 12,  // column 3
176   });
177   m.SetFilter({
178       1, 2, 3, 4,        //
179       -9, 10, -11, 12,   //
180       5, 6, 7, 8,        //
181       13, -14, 15, -16,  //
182   });
183   m.SetBias({1, 2, 3, 4});
184 
185   ASSERT_EQ(m.Invoke(), kTfLiteOk);
186 
187   EXPECT_THAT(m.GetOutput(), ElementsAreArray({
188                                  71, 0, 99, 0,   //
189                                  91, 0, 127, 0,  //
190                              }));
191 }
192 
TEST_P(DepthwiseConvolutionOpTest,ActivationReluN1Test)193 TEST_P(DepthwiseConvolutionOpTest, ActivationReluN1Test) {
194   DepthwiseConvolutionOpModel m(
195       GetRegistration(), {TensorType_FLOAT32, {1, 3, 2, 2}},
196       {TensorType_FLOAT32, {1, 2, 2, 4}}, {TensorType_FLOAT32, {}},
197       Padding_VALID,
198       /*dilation_factor*/ 1,
199       /*stride_width*/ 1,
200       /*stride_height*/ 1,
201       /*ActivationFunctionType*/ ActivationFunctionType_RELU_N1_TO_1);
202 
203   m.SetInput({
204       1, 2, 7, 8,    // column 1
205       3, 4, 9, 10,   // column 2
206       5, 6, 11, 12,  // column 3
207   });
208   m.SetFilter({
209       1, 2, 3, 4,        //
210       -9, 10, -11, 12,   //
211       5, 6, 7, 8,        //
212       13, -14, 15, -16,  //
213   });
214   m.SetBias({1, 2, 3, 4});
215 
216   ASSERT_EQ(m.Invoke(), kTfLiteOk);
217 
218   EXPECT_THAT(m.GetOutput(), ElementsAreArray({
219                                  1, -1, 1, -1,  //
220                                  1, -1, 1, -1,  //
221                              }));
222 }
223 
TEST_P(DepthwiseConvolutionOpTest,ActivationRelu6Test)224 TEST_P(DepthwiseConvolutionOpTest, ActivationRelu6Test) {
225   DepthwiseConvolutionOpModel m(
226       GetRegistration(), {TensorType_FLOAT32, {1, 3, 2, 2}},
227       {TensorType_FLOAT32, {1, 2, 2, 4}}, {TensorType_FLOAT32, {}},
228       Padding_VALID,
229       /*dilation_factor*/ 1,
230       /*stride_width*/ 1,
231       /*stride_height*/ 1,
232       /*ActivationFunctionType*/ ActivationFunctionType_RELU6);
233 
234   m.SetInput({
235       1, 2, 7, 8,    // column 1
236       3, 4, 9, 10,   // column 2
237       5, 6, 11, 12,  // column 3
238   });
239   m.SetFilter({
240       1, 2, 3, 4,        //
241       -9, 10, -11, 12,   //
242       5, 6, 7, 8,        //
243       13, -14, 15, -16,  //
244   });
245   m.SetBias({1, 2, 3, 4});
246 
247   ASSERT_EQ(m.Invoke(), kTfLiteOk);
248 
249   EXPECT_THAT(m.GetOutput(), ElementsAreArray({
250                                  6, 0, 6, 0,  //
251                                  6, 0, 6, 0,  //
252                              }));
253 }
254 
StrideTest(TfLiteRegistration * registration,int num_thread)255 void StrideTest(TfLiteRegistration* registration, int num_thread) {
256   DepthwiseConvolutionOpModel m(
257       registration, {TensorType_FLOAT32, {1, 3, 2, 2}},
258       {TensorType_FLOAT32, {1, 2, 2, 4}}, {TensorType_FLOAT32, {}},
259       Padding_VALID,
260       /*dilation_factor*/ 1,
261       /*stride_width*/ 2,
262       /*stride_height*/ 2,
263       /*ActivationFunctionType*/ ActivationFunctionType_NONE);
264 
265   m.SetInput({
266       1, 2, 7, 8,    // column 1
267       3, 4, 9, 10,   // column 2
268       5, 6, 11, 12,  // column 3
269   });
270   m.SetFilter({
271       1, 2, 3, 4,        //
272       -9, 10, -11, 12,   //
273       5, 6, 7, 8,        //
274       13, -14, 15, -16,  //
275   });
276   m.SetBias({1, 2, 3, 4});
277 
278   ASSERT_EQ(m.Invoke(), kTfLiteOk);
279 
280   EXPECT_THAT(m.GetOutput(), ElementsAreArray({
281                                  71, -34, 99, -20,  //
282                              }));
283 }
284 
TEST_P(DepthwiseConvolutionOpTest,StrideTest)285 TEST_P(DepthwiseConvolutionOpTest, StrideTest) {
286   StrideTest(GetRegistration(), /*num_thread=*/1);
287 }
288 
TEST_P(DepthwiseConvolutionOpTest,MultithreadStrideTest)289 TEST_P(DepthwiseConvolutionOpTest, MultithreadStrideTest) {
290   StrideTest(GetRegistration(), /*num_thread=*/4);
291 }
292 
PaddingTest(TfLiteRegistration * registration,int num_thread)293 void PaddingTest(TfLiteRegistration* registration, int num_thread) {
294   DepthwiseConvolutionOpModel m(
295       registration, {TensorType_FLOAT32, {1, 3, 2, 2}},
296       {TensorType_FLOAT32, {1, 2, 2, 4}}, {TensorType_FLOAT32, {}},
297       Padding_SAME,
298       /*dilation_factor*/ 1,
299       /*stride_width*/ 2,
300       /*stride_height*/ 2,
301       /*ActivationFunctionType*/ ActivationFunctionType_NONE);
302 
303   m.SetInput({
304       1, 2, 7, 8,    // column 1
305       3, 4, 9, 10,   // column 2
306       5, 6, 11, 12,  // column 3
307   });
308   m.SetFilter({
309       1, 2, 3, 4,        //
310       -9, 10, -11, 12,   //
311       5, 6, 7, 8,        //
312       13, -14, 15, -16,  //
313   });
314   m.SetBias({1, 2, 3, 4});
315 
316   ASSERT_EQ(m.Invoke(), kTfLiteOk);
317 
318   EXPECT_THAT(m.GetOutput(), ElementsAreArray({
319                                  71, -34, 99, -20,     //
320                                  -93, 122, -111, 172,  //
321                              }));
322 }
323 
TEST_P(DepthwiseConvolutionOpTest,PaddingTest)324 TEST_P(DepthwiseConvolutionOpTest, PaddingTest) {
325   PaddingTest(GetRegistration(), /*num_thread=*/1);
326 }
327 
TEST_P(DepthwiseConvolutionOpTest,MultithreadPaddingTest)328 TEST_P(DepthwiseConvolutionOpTest, MultithreadPaddingTest) {
329   PaddingTest(GetRegistration(), /*num_thread=*/4);
330 }
331 
SimpleTest(TfLiteRegistration * registration,int num_thread)332 void SimpleTest(TfLiteRegistration* registration, int num_thread) {
333   DepthwiseConvolutionOpModel m(registration,
334                                 {TensorType_FLOAT32, {1, 3, 2, 2}},
335                                 {TensorType_FLOAT32, {1, 2, 2, 4}},
336                                 {TensorType_FLOAT32, {}}, Padding_VALID);
337 
338   m.SetInput({
339       1, 2, 7, 8,    // column 1
340       3, 4, 9, 10,   // column 2
341       5, 6, 11, 12,  // column 3
342   });
343   m.SetFilter({
344       1, 2, 3, 4,        //
345       -9, 10, -11, 12,   //
346       5, 6, 7, 8,        //
347       13, -14, 15, -16,  //
348   });
349   m.SetBias({1, 2, 3, 4});
350 
351   ASSERT_EQ(m.Invoke(), kTfLiteOk);
352 
353   EXPECT_THAT(m.GetOutput(), ElementsAreArray({
354                                  71, -34, 99, -20,  //
355                                  91, -26, 127, -4,  //
356                              }));
357 }
358 
TEST_P(DepthwiseConvolutionOpTest,SimpleTest)359 TEST_P(DepthwiseConvolutionOpTest, SimpleTest) {
360   SimpleTest(GetRegistration(), /*num_thread=*/1);
361 }
362 
TEST_P(DepthwiseConvolutionOpTest,MultithreadSimpleTest)363 TEST_P(DepthwiseConvolutionOpTest, MultithreadSimpleTest) {
364   SimpleTest(GetRegistration(), /*num_thread=*/4);
365 }
366 
SimpleDilatedTestPaddingValid(TfLiteRegistration * registration,int num_thread)367 void SimpleDilatedTestPaddingValid(TfLiteRegistration* registration,
368                                    int num_thread) {
369   const int depth = 1;
370   const int image_width = 9;
371   const int image_height = 9;
372   const int image_batch_count = 1;
373   const int filter_size = 3;
374   const int filter_count = 1;
375   const int dilation_factor = 3;
376   DepthwiseConvolutionOpModel m(
377       registration,
378       {TensorType_FLOAT32,
379        {image_batch_count, image_height, image_width, depth}},
380       {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}},
381       {TensorType_FLOAT32, {}}, Padding_VALID, dilation_factor);
382 
383   // The image matrix is:
384   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
385   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
386   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
387   // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
388   // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
389   // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
390   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
391   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
392   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
393   // clang-format off
394   m.SetInput({0, 0, 0, 0, 0, 0, 0, 0, 0,
395               0, 0, 0, 0, 0, 0, 0, 0, 0,
396               0, 0, 0, 0, 0, 0, 0, 0, 0,
397               0, 0, 0, 1, 1, 1, 0, 0, 0,
398               0, 0, 0, 1, 1, 1, 0, 0, 0,
399               0, 0, 0, 1, 1, 1, 0, 0, 0,
400               0, 0, 0, 0, 0, 0, 0, 0, 0,
401               0, 0, 0, 0, 0, 0, 0, 0, 0,
402               0, 0, 0, 0, 0, 0, 0, 0, 0});
403   // clang-format on
404   // The filter matrix is:
405   // | 1 | 2 | 3 |
406   // | 4 | 5 | 6 |
407   // | 7 | 8 | 9 |
408   m.SetFilter({1, 2, 3, 4, 5, 6, 7, 8, 9});
409   // No bias for this test.
410   m.SetBias({0});
411   ASSERT_EQ(m.Invoke(), kTfLiteOk);
412 
413   // Since the dilation rate is 3 this will reduce the size of the output from
414   // 10x10 to 3x3 of all 5s. Specifically:
415   // | 5 | 5 | 5 |
416   // | 5 | 5 | 5 |
417   // | 5 | 5 | 5 |
418   EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5}));
419 }
420 
TEST_P(DepthwiseConvolutionOpTest,SimpleDilatedTestPaddingValid)421 TEST_P(DepthwiseConvolutionOpTest, SimpleDilatedTestPaddingValid) {
422   SimpleDilatedTestPaddingValid(GetRegistration(), /*num_thread=*/1);
423 }
424 
TEST_P(DepthwiseConvolutionOpTest,MultithreadSimpleDilatedTestPaddingValid)425 TEST_P(DepthwiseConvolutionOpTest, MultithreadSimpleDilatedTestPaddingValid) {
426   SimpleDilatedTestPaddingValid(GetRegistration(), /*num_thread=*/4);
427 }
428 
SimpleDilatedTestPaddingSame(TfLiteRegistration * registration,int num_thread)429 void SimpleDilatedTestPaddingSame(TfLiteRegistration* registration,
430                                   int num_thread) {
431   const int depth = 1;
432   const int image_width = 3;
433   const int image_height = 3;
434   const int image_batch_count = 1;
435   const int filter_size = 2;
436   const int filter_count = 1;
437   const int dilation_factor = 2;
438   DepthwiseConvolutionOpModel m(
439       registration,
440       {TensorType_FLOAT32,
441        {image_batch_count, image_height, image_width, depth}},
442       {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}},
443       {TensorType_FLOAT32, {}}, Padding_SAME, dilation_factor);
444 
445   // The image matrix is:
446   // | 1 | 1 | 1 |
447   // | 1 | 1 | 1 |
448   // | 1 | 1 | 1 |
449   m.SetInput({1, 1, 1, 1, 1, 1, 1, 1, 1});
450   // The filter matrix is:
451   // | 1 | 2 |
452   // | 3 | 4 |
453   m.SetFilter({1, 2, 3, 4});
454   // No bias for this test.
455   m.SetBias({0});
456   m.SetNumThreads(num_thread);
457   ASSERT_EQ(m.Invoke(), kTfLiteOk);
458 
459   // Output:
460   // | 4 | 7 | 3 |
461   // | 6 |10 | 4 |
462   // | 2 | 3 | 1 |
463   EXPECT_THAT(m.GetOutput(), ElementsAreArray({4, 7, 3, 6, 10, 4, 2, 3, 1}));
464 }
465 
TEST_P(DepthwiseConvolutionOpTest,SimpleDilatedTestPaddingSame)466 TEST_P(DepthwiseConvolutionOpTest, SimpleDilatedTestPaddingSame) {
467   SimpleDilatedTestPaddingSame(GetRegistration(), /*num_thread=*/1);
468 }
469 
TEST_P(DepthwiseConvolutionOpTest,MultithreadSimpleDilatedTestPaddingSame)470 TEST_P(DepthwiseConvolutionOpTest, MultithreadSimpleDilatedTestPaddingSame) {
471   SimpleDilatedTestPaddingSame(GetRegistration(), /*num_thread=*/4);
472 }
473 
BatchPaddingValidTest(TfLiteRegistration * registration,int num_thread)474 void BatchPaddingValidTest(TfLiteRegistration* registration, int num_thread) {
475   const int input_batch = 2;
476   const int input_width = 3;
477   const int input_height = 3;
478   const int input_depth = 4;
479   const int filter_batch = 1;
480   const int filter_size = 3;
481   const int filter_depth = 4;
482   DepthwiseConvolutionOpModel m(
483       registration,
484       {TensorType_FLOAT32,
485        {input_batch, input_height, input_width, input_depth}},
486       {TensorType_FLOAT32,
487        {filter_batch, filter_size, filter_size, filter_depth}},
488       {TensorType_FLOAT32, {}}, Padding_VALID);
489 
490   // clang-format off
491   m.SetInput({
492       // array of 3 x 24 => [2, 3, 3, 4]
493       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
494       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
495       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0
496   });
497 
498   m.SetFilter({
499       // array of 9 x 4 => [1, 3, 3, 4]
500       1, 2, 3, 4,
501       1, 2, 3, 4,
502       1, 2, 3, 4,
503       1, 2, 3, 4,
504       1, 2, 3, 4,
505       1, 2, 3, 4,
506       1, 2, 3, 4,
507       1, 2, 3, 4,
508       1, 2, 3, 4
509   });
510   // clang-format on
511 
512   // No bias for this test.
513   m.SetBias({0, 0, 0, 0});
514   m.SetNumThreads(num_thread);
515   ASSERT_EQ(m.Invoke(), kTfLiteOk);
516 
517   // clang-format off
518   EXPECT_THAT(
519       m.GetOutput(),
520       ElementsAreArray({
521         9, 18, 0, 0,
522         9, 18, 0, 0
523       }));
524   // clang-format on
525 }
526 
TEST_P(DepthwiseConvolutionOpTest,BatchPaddingValidTest)527 TEST_P(DepthwiseConvolutionOpTest, BatchPaddingValidTest) {
528   BatchPaddingValidTest(GetRegistration(), /*num_thread=*/1);
529 }
530 
TEST_P(DepthwiseConvolutionOpTest,MultithreadBatchPaddingValidTest)531 TEST_P(DepthwiseConvolutionOpTest, MultithreadBatchPaddingValidTest) {
532   BatchPaddingValidTest(GetRegistration(), /*num_thread=*/4);
533 }
534 
BatchPaddingSameTest(TfLiteRegistration * registration,int num_thread)535 void BatchPaddingSameTest(TfLiteRegistration* registration, int num_thread) {
536   const int input_batch = 4;
537   const int input_width = 2;
538   const int input_height = 2;
539   const int input_depth = 1;
540   const int filter_batch = 1;
541   const int filter_size = 3;
542   const int filter_depth = 1;
543   DepthwiseConvolutionOpModel m(
544       registration,
545       {TensorType_FLOAT32,
546        {input_batch, input_height, input_width, input_depth}},
547       {TensorType_FLOAT32,
548        {filter_batch, filter_size, filter_size, filter_depth}},
549       {TensorType_FLOAT32, {}}, Padding_SAME);
550 
551   // clang-format off
552   m.SetInput({
553       // array of 4 x 4 => [4, 2, 2, 1]
554       1, 1, 1, 1,
555       0, 0, 0, 0,
556       1, 1, 2, 2,
557       2, 2, 2, 2
558   });
559 
560   m.SetFilter({
561       // array of 3 x 3 => [1, 3, 3, 1]
562       1, 1, 1,
563       0, 2, 0,
564       1, 1, 1
565   });
566   // clang-format on
567 
568   // No bias for this test.
569   m.SetBias({0});
570   m.SetNumThreads(num_thread);
571   ASSERT_EQ(m.Invoke(), kTfLiteOk);
572 
573   // clang-format off
574   EXPECT_THAT(
575       m.GetOutput(),
576       ElementsAreArray({
577         4, 4, 4, 4,
578         0, 0, 0, 0,
579         6, 6, 6, 6,
580         8, 8, 8, 8
581       }));
582   // clang-format on
583 }
584 
TEST_P(DepthwiseConvolutionOpTest,BatchPaddingSameTest)585 TEST_P(DepthwiseConvolutionOpTest, BatchPaddingSameTest) {
586   BatchPaddingSameTest(GetRegistration(), /*num_thread=*/1);
587 }
588 
TEST_P(DepthwiseConvolutionOpTest,MultithreadBatchPaddingSameTest)589 TEST_P(DepthwiseConvolutionOpTest, MultithreadBatchPaddingSameTest) {
590   BatchPaddingSameTest(GetRegistration(), /*num_thread=*/4);
591 }
592 
593 class QuantizedDepthwiseConvolutionOpModel
594     : public BaseDepthwiseConvolutionOpModel {
595  public:
596   using BaseDepthwiseConvolutionOpModel::BaseDepthwiseConvolutionOpModel;
597 
SetInput(std::initializer_list<float> data)598   void SetInput(std::initializer_list<float> data) {
599     QuantizeAndPopulate<uint8_t>(input_, data);
600   }
SetInput(const std::vector<float> & data)601   void SetInput(const std::vector<float>& data) {
602     QuantizeAndPopulate<uint8_t>(input_, data);
603   }
604 
SetFilter(std::initializer_list<float> data)605   void SetFilter(std::initializer_list<float> data) {
606     QuantizeAndPopulate<uint8_t>(filter_, data);
607   }
SetFilter(const std::vector<float> & data)608   void SetFilter(const std::vector<float>& data) {
609     QuantizeAndPopulate<uint8_t>(filter_, data);
610   }
611 
SetBias(std::initializer_list<float> data)612   void SetBias(std::initializer_list<float> data) {
613     QuantizeAndPopulate<int32_t>(bias_, data);
614   }
SetBias(const std::vector<float> & data)615   void SetBias(const std::vector<float>& data) {
616     QuantizeAndPopulate<int32_t>(bias_, data);
617   }
618 
GetOutput()619   std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
GetDequantizedOutput()620   std::vector<float> GetDequantizedOutput() {
621     return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
622                                GetScale(output_), GetZeroPoint(output_));
623   }
624 };
625 
626 const auto kQuantizedKernelMap = new std::map<string, TfLiteRegistration*>({
627     {"Reference", ops::builtin::Register_DEPTHWISE_CONVOLUTION_REF()},
628     {"GenericOptimized",
629      ops::builtin::Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT()},
630     {"NeonOptimized", ops::builtin::Register_DEPTHWISE_CONVOLUTION_NEON_OPT()},
631     {"Uint8", ops::builtin::Register_DEPTHWISE_CONV_2D_UINT8()},
632 });
633 
634 class QuantizedDepthwiseConvolutionOpTest : public SingleOpTest {
635  protected:
GetKernelMap()636   const std::map<string, TfLiteRegistration*>& GetKernelMap() override {
637     return *kQuantizedKernelMap;
638   }
639 };
640 
641 // Only enable this test for neon.
642 #ifdef USE_NEON
TEST_F(QuantizedDepthwiseConvolutionOpTest,LargeOutputChannelTest)643 TEST_F(QuantizedDepthwiseConvolutionOpTest, LargeOutputChannelTest) {
644   const TensorData input({TensorType_UINT8, {1, 4, 4, 2400}, -63.5, 64});
645   const TensorData filter({TensorType_UINT8, {1, 3, 3, 2400}, -63.5, 64});
646   const TensorData output({TensorType_UINT8, {}, -127, 128});
647   const Padding padding = Padding_VALID;
648 
649   // Populate input, filter & bias data.
650   const int input_size = 1 * 4 * 4 * 2400;
651   const int filter_size = 1 * 3 * 3 * 2400;
652   const int bias_size = 2400;
653   std::vector<float> input_data(input_size);
654   std::vector<float> filter_data(filter_size);
655   std::vector<float> bias_data(bias_size);
656   for (int i = 0; i < input_size; ++i) {
657     input_data[i] = UniformRandomFloat(-1, -1);
658   }
659   for (int i = 0; i < filter_size; ++i) {
660     filter_data[i] = UniformRandomFloat(-1, -1);
661   }
662   for (int i = 0; i < bias_size; ++i) {
663     bias_data[i] = UniformRandomFloat(-1, -1);
664   }
665 
666   // Make sure reference impl & optimized impl produce the same result.
667   QuantizedDepthwiseConvolutionOpModel reference_impl(
668       ops::builtin::Register_DEPTHWISE_CONVOLUTION_REF(), input, filter, output,
669       padding);
670   reference_impl.SetInput(input_data);
671   reference_impl.SetFilter(filter_data);
672   reference_impl.SetBias(bias_data);
673   reference_impl.Invoke();
674 
675   QuantizedDepthwiseConvolutionOpModel optimized_impl(
676       ops::builtin::Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT(), input, filter,
677       output, padding);
678   optimized_impl.SetInput(input_data);
679   optimized_impl.SetFilter(filter_data);
680   optimized_impl.SetBias(bias_data);
681   optimized_impl.Invoke();
682 
683   EXPECT_THAT(reference_impl.GetOutput(), optimized_impl.GetOutput());
684 }
685 #endif
686 
687 // In this test we set the input and output scales so that the results match
688 // exactly the 'non-quantized' version.
TEST_P(QuantizedDepthwiseConvolutionOpTest,SimpleTestQuantized)689 TEST_P(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) {
690   QuantizedDepthwiseConvolutionOpModel m(
691       GetRegistration(), {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64},
692       {TensorType_UINT8, {1, 2, 2, 4}, -63.5, 64},
693       {TensorType_UINT8, {}, -127, 128}, Padding_VALID);
694 
695   m.SetInput({
696       1, 2, 7, 8,    // column 1
697       3, 4, 9, 10,   // column 2
698       5, 6, 11, 12,  // column 3
699   });
700   m.SetFilter({
701       1, 2, 3, 4,        //
702       -9, 10, -11, 12,   //
703       5, 6, 7, 8,        //
704       13, -14, 15, -16,  //
705   });
706   m.SetBias({1, 2, 3, 4});
707 
708   ASSERT_EQ(m.Invoke(), kTfLiteOk);
709 
710   EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear(
711                                             {
712                                                 71, -34, 99, -20,  //
713                                                 91, -26, 127, -4,  //
714                                             },
715                                             1e-5)));
716   // For good  measure, let's also verify the quantized values:
717   EXPECT_THAT(m.GetOutput(), ElementsAreArray({
718                                  198, 93, 226, 107,   //
719                                  218, 101, 254, 123,  //
720                              }));
721 }
722 
TEST_P(DepthwiseConvolutionOpTest,SimpleTestQuantizedFilterMultiplierGreaterThan1)723 TEST_P(DepthwiseConvolutionOpTest,
724        SimpleTestQuantizedFilterMultiplierGreaterThan1) {
725   QuantizedDepthwiseConvolutionOpModel quant_op(
726       GetRegistration(), {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64},
727       {TensorType_UINT8, {1, 2, 2, 4}, -128.5, 128},
728       {TensorType_UINT8, {}, -127, 128}, Padding_VALID);
729   DepthwiseConvolutionOpModel float_op(GetRegistration(),
730                                        {TensorType_FLOAT32, {1, 3, 2, 2}},
731                                        {TensorType_FLOAT32, {1, 2, 2, 4}},
732                                        {TensorType_FLOAT32, {}}, Padding_VALID);
733 
734   std::initializer_list<float> input = {
735       1, 2, 7,  8,   // column 1
736       3, 4, 9,  10,  // column 2
737       5, 6, 11, 12,  // column 3
738   };
739   std::initializer_list<float> filter = {
740       1,  2,   3,   4,    //
741       -9, 10,  -11, 12,   //
742       5,  6,   7,   8,    //
743       13, -14, 15,  -16,  //
744   };
745   std::initializer_list<float> bias = {1, 2, 3, 4};
746 
747   quant_op.SetInput(input);
748   quant_op.SetFilter(filter);
749   quant_op.SetBias(bias);
750   ASSERT_EQ(quant_op.Invoke(), kTfLiteOk);
751 
752   float_op.SetInput(input);
753   float_op.SetFilter(filter);
754   float_op.SetBias(bias);
755   ASSERT_EQ(float_op.Invoke(), kTfLiteOk);
756 
757   EXPECT_THAT(quant_op.GetDequantizedOutput(),
758               ElementsAreArray(ArrayFloatNear(float_op.GetOutput(), 1)));
759 }
760 
TEST_P(DepthwiseConvolutionOpTest,SimpleTestQuantizedOutputMultiplierGreaterThan1)761 TEST_P(DepthwiseConvolutionOpTest,
762        SimpleTestQuantizedOutputMultiplierGreaterThan1) {
763   QuantizedDepthwiseConvolutionOpModel quant_op(
764       GetRegistration(), {TensorType_UINT8, {1, 3, 2, 2}, -128.5, 128},
765       {TensorType_UINT8, {1, 2, 2, 4}, -128.5, 128},
766       {TensorType_UINT8, {}, -127, 128}, Padding_VALID);
767   DepthwiseConvolutionOpModel float_op(GetRegistration(),
768                                        {TensorType_FLOAT32, {1, 3, 2, 2}},
769                                        {TensorType_FLOAT32, {1, 2, 2, 4}},
770                                        {TensorType_FLOAT32, {}}, Padding_VALID);
771 
772   std::initializer_list<float> input = {
773       1, 2, 7,  8,   // column 1
774       3, 4, 9,  10,  // column 2
775       5, 6, 11, 12,  // column 3
776   };
777   std::initializer_list<float> filter = {
778       1,  2,   3,   4,    //
779       -9, 10,  -11, 12,   //
780       5,  6,   7,   8,    //
781       13, -14, 15,  -16,  //
782   };
783   std::initializer_list<float> bias = {1, 2, 3, 4};
784 
785   quant_op.SetInput(input);
786   quant_op.SetFilter(filter);
787   quant_op.SetBias(bias);
788   ASSERT_EQ(quant_op.Invoke(), kTfLiteOk);
789 
790   float_op.SetInput(input);
791   float_op.SetFilter(filter);
792   float_op.SetBias(bias);
793   ASSERT_EQ(float_op.Invoke(), kTfLiteOk);
794 
795   EXPECT_THAT(quant_op.GetDequantizedOutput(),
796               ElementsAreArray(ArrayFloatNear(float_op.GetOutput(), 1)));
797 }
798 
TEST_P(QuantizedDepthwiseConvolutionOpTest,SimpleDilatedTestPaddingValid)799 TEST_P(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTestPaddingValid) {
800   const int depth = 1;
801   const int image_width = 9;
802   const int image_height = 9;
803   const int image_batch_count = 1;
804   const int filter_size = 3;
805   const int filter_count = 1;
806   const int dilation_factor = 3;
807   QuantizedDepthwiseConvolutionOpModel m(
808       GetRegistration(),
809       {TensorType_UINT8,
810        {image_batch_count, image_height, image_width, depth},
811        0,
812        255},
813       {TensorType_UINT8,
814        {depth, filter_size, filter_size, filter_count},
815        0,
816        255},
817       {TensorType_UINT8, {}, 0, 255}, Padding_VALID, dilation_factor);
818 
819   // The image matrix is:
820   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
821   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
822   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
823   // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
824   // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
825   // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
826   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
827   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
828   // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
829   // clang-format off
830   m.SetInput({0, 0, 0, 0, 0, 0, 0, 0, 0,
831               0, 0, 0, 0, 0, 0, 0, 0, 0,
832               0, 0, 0, 0, 0, 0, 0, 0, 0,
833               0, 0, 0, 1, 1, 1, 0, 0, 0,
834               0, 0, 0, 1, 1, 1, 0, 0, 0,
835               0, 0, 0, 1, 1, 1, 0, 0, 0,
836               0, 0, 0, 0, 0, 0, 0, 0, 0,
837               0, 0, 0, 0, 0, 0, 0, 0, 0,
838               0, 0, 0, 0, 0, 0, 0, 0, 0});
839   // clang-format on
840   // The filter matrix is:
841   // | 1 | 2 | 3 |
842   // | 4 | 5 | 6 |
843   // | 7 | 8 | 9 |
844   m.SetFilter({1, 2, 3, 4, 5, 6, 7, 8, 9});
845   // No bias for this test.
846   m.SetBias({0});
847   ASSERT_EQ(m.Invoke(), kTfLiteOk);
848 
849   // Since the dilation rate is 3 this will reduce the size of the output from
850   // 10x10 to 3x3 of all 5s. Specifically:
851   // | 5 | 5 | 5 |
852   // | 5 | 5 | 5 |
853   // | 5 | 5 | 5 |
854   EXPECT_THAT(m.GetDequantizedOutput(),
855               ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5}));
856 }
857 
TEST_P(QuantizedDepthwiseConvolutionOpTest,SimpleDilatedTestPaddingSame)858 TEST_P(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTestPaddingSame) {
859   const int depth = 1;
860   const int image_width = 3;
861   const int image_height = 3;
862   const int image_batch_count = 1;
863   const int filter_size = 2;
864   const int filter_count = 1;
865   const int dilation_factor = 2;
866   QuantizedDepthwiseConvolutionOpModel m(
867       GetRegistration(),
868       {TensorType_UINT8,
869        {image_batch_count, image_height, image_width, depth},
870        0,
871        255},
872       {TensorType_UINT8,
873        {depth, filter_size, filter_size, filter_count},
874        0,
875        255},
876       {TensorType_UINT8, {}, 0, 255}, Padding_SAME, dilation_factor);
877 
878   // The image matrix is:
879   // | 1 | 1 | 1 |
880   // | 1 | 1 | 1 |
881   // | 1 | 1 | 1 |
882   m.SetInput({1, 1, 1, 1, 1, 1, 1, 1, 1});
883   // The filter matrix is:
884   // | 1 | 2 |
885   // | 3 | 4 |
886   m.SetFilter({1, 2, 3, 4});
887   // No bias for this test.
888   m.SetBias({0});
889   ASSERT_EQ(m.Invoke(), kTfLiteOk);
890 
891   // Output:
892   // | 4 | 7 | 3 |
893   // | 6 |10 | 4 |
894   // | 2 | 3 | 1 |
895   EXPECT_THAT(m.GetDequantizedOutput(),
896               ElementsAreArray({4, 7, 3, 6, 10, 4, 2, 3, 1}));
897 }
898 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadOnRowUint8GeneralTest)899 TEST_P(QuantizedDepthwiseConvolutionOpTest, MultithreadOnRowUint8GeneralTest) {
900   const int depth = 1;
901   const int image_width = 4;
902   const int image_height = 28;
903   const int image_batch_count = 3;
904   const int filter_size = 3;
905   const int filter_count = 1;
906 
907   QuantizedDepthwiseConvolutionOpModel m(
908       GetRegistration(),
909       {TensorType_UINT8,
910        {image_batch_count, image_height, image_width, depth},
911        0,
912        255},
913       {TensorType_UINT8,
914        {depth, filter_size, filter_size, filter_count},
915        0,
916        255},
917       {TensorType_UINT8, {}, 0, 255}, Padding_VALID);
918 
919   // clang-format off
920   m.SetInput({
921       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
922       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
923       1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,
924       1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,
925       1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,
926       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
927       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
928 
929       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
930       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
931       2, 2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2,
932       2, 2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2,
933       2, 2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2,
934       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
935       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
936 
937       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
938       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
939       3, 3, 3, 3,  3, 3, 3, 3,  3, 3, 3, 3,  3, 3, 3, 3,
940       3, 3, 3, 3,  3, 3, 3, 3,  3, 3, 3, 3,  3, 3, 3, 3,
941       3, 3, 3, 3,  3, 3, 3, 3,  3, 3, 3, 3,  3, 3, 3, 3,
942       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
943       0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0
944   });
945   // clang-format on
946 
947   // The filter matrix is:
948   // | 1 | 2 | 3 |
949   // | 4 | 5 | 6 |
950   // | 7 | 8 | 9 |
951   m.SetFilter({1, 2, 3, 4, 5, 6, 7, 8, 9});
952   // No bias for this test.
953   m.SetBias({0});
954   m.SetNumThreads(4);
955   ASSERT_EQ(m.Invoke(), kTfLiteOk);
956 
957   // clang-format off
958   EXPECT_THAT(
959       m.GetDequantizedOutput(),
960       ElementsAreArray({
961           0, 0,    0, 0,    0, 0,    0, 0,
962           0, 0,    0, 0,    24, 24,  39, 39,
963           45, 45,  45, 45,  45, 45,  45, 45,
964           45, 45,  45, 45,  45, 45,  45, 45,
965           45, 45,  45, 45,  21, 21,  6, 6,
966           0, 0,    0, 0,    0, 0,    0, 0,
967           0, 0,    0, 0,
968 
969           0, 0,    0, 0,    0, 0,    0, 0,
970           0, 0,    0, 0,    48, 48,  78, 78,
971           90, 90,  90, 90,  90, 90,  90, 90,
972           90, 90,  90, 90,  90, 90,  90, 90,
973           90, 90,  90, 90,  42, 42,  12, 12,
974           0, 0,    0, 0,    0, 0,    0, 0,
975           0, 0,    0, 0,
976 
977           0, 0,      0, 0,      0, 0,      0, 0,
978           0, 0,      0, 0,      72, 72,    117, 117,
979           135, 135,  135, 135,  135, 135,  135, 135,
980           135, 135,  135, 135,  135, 135,  135, 135,
981           135, 135,  135, 135,  63, 63,    18, 18,
982           0, 0,      0, 0,      0, 0,      0, 0,
983           0, 0,      0, 0,
984       }));
985   // clang-format on
986 }
987 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadOnBatchUint8GeneralTest)988 TEST_P(QuantizedDepthwiseConvolutionOpTest,
989        MultithreadOnBatchUint8GeneralTest) {
990   const int depth = 1;
991   const int image_width = 8;
992   const int image_height = 4;
993   const int image_batch_count = 6;
994   const int filter_size = 3;
995   const int filter_count = 1;
996 
997   QuantizedDepthwiseConvolutionOpModel m(
998       GetRegistration(),
999       {TensorType_UINT8,
1000        {image_batch_count, image_height, image_width, depth},
1001        0,
1002        255},
1003       {TensorType_UINT8,
1004        {depth, filter_size, filter_size, filter_count},
1005        0,
1006        255},
1007       {TensorType_UINT8, {}, 0, 255}, Padding_VALID);
1008 
1009   // clang-format off
1010   m.SetInput({
1011       0, 0, 0, 0,  0, 0, 0, 0,  1, 1, 1, 1,  1, 1, 1, 1,
1012       1, 1, 1, 1,  1, 1, 1, 1,  0, 0, 0, 0,  0, 0, 0, 0,
1013 
1014       0, 0, 0, 0,  0, 0, 0, 0,  1, 1, 1, 1,  1, 1, 1, 1,
1015       1, 1, 1, 1,  1, 1, 1, 1,  0, 0, 0, 0,  0, 0, 0, 0,
1016 
1017       0, 0, 0, 0,  0, 0, 0, 0,  1, 1, 1, 1,  1, 1, 1, 1,
1018       1, 1, 1, 1,  1, 1, 1, 1,  0, 0, 0, 0,  0, 0, 0, 0,
1019 
1020       0, 0, 0, 0,  0, 0, 0, 0,  1, 1, 1, 1,  1, 1, 1, 1,
1021       1, 1, 1, 1,  1, 1, 1, 1,  0, 0, 0, 0,  0, 0, 0, 0,
1022 
1023       0, 0, 0, 0,  0, 0, 0, 0,  1, 1, 1, 1,  1, 1, 1, 1,
1024       1, 1, 1, 1,  1, 1, 1, 1,  0, 0, 0, 0,  0, 0, 0, 0,
1025 
1026       0, 0, 0, 0,  0, 0, 0, 0,  1, 1, 1, 1,  1, 1, 1, 1,
1027       1, 1, 1, 1,  1, 1, 1, 1,  0, 0, 0, 0,  0, 0, 0, 0
1028   });
1029   // clang-format on
1030 
1031   // The filter matrix is:
1032   // | 1 | 2 | 3 |
1033   // | 4 | 5 | 6 |
1034   // | 7 | 8 | 9 |
1035   m.SetFilter({1, 2, 3, 4, 5, 6, 7, 8, 9});
1036   // No bias for this test.
1037   m.SetBias({0});
1038   m.SetNumThreads(4);
1039   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1040 
1041   // clang-format off
1042   EXPECT_THAT(
1043       m.GetDequantizedOutput(),
1044       ElementsAreArray({
1045           39, 39, 39, 39, 39, 39,
1046           21, 21, 21, 21, 21, 21,
1047 
1048           39, 39, 39, 39, 39, 39,
1049           21, 21, 21, 21, 21, 21,
1050 
1051           39, 39, 39, 39, 39, 39,
1052           21, 21, 21, 21, 21, 21,
1053 
1054           39, 39, 39, 39, 39, 39,
1055           21, 21, 21, 21, 21, 21,
1056 
1057           39, 39, 39, 39, 39, 39,
1058           21, 21, 21, 21, 21, 21,
1059 
1060           39, 39, 39, 39, 39, 39,
1061           21, 21, 21, 21, 21, 21
1062       }));
1063   // clang-format on
1064 }
1065 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadOnRowValidPaddingTest)1066 TEST_P(QuantizedDepthwiseConvolutionOpTest, MultithreadOnRowValidPaddingTest) {
1067   // This test runs through DepthwiseConv3x3Filter with __aarch64__, and runs
1068   // through DepthwiseConvGeneral with other configs.
1069   const int input_batch = 1;
1070   const int input_width = 3;
1071   const int input_height = 5;
1072   const int input_depth = 8;
1073   const int filter_batch = 1;
1074   const int filter_size = 3;
1075   const int filter_depth = 8;
1076 
1077   QuantizedDepthwiseConvolutionOpModel m(
1078       GetRegistration(),
1079       {TensorType_UINT8,
1080        {input_batch, input_height, input_width, input_depth},
1081        0,
1082        128},
1083       {TensorType_UINT8,
1084        {filter_batch, filter_size, filter_size, filter_depth},
1085        0,
1086        128},
1087       {TensorType_UINT8, {}, 0, 255}, Padding_VALID);
1088 
1089   // clang-format off
1090   m.SetInput({
1091     // array of 15 x 8 => [1, 5, 3, 8]
1092       1, 1, 0, 0,  1, 1, 0, 0,
1093       1, 1, 0, 0,  1, 1, 0, 0,
1094       1, 1, 0, 0,  1, 1, 0, 0,
1095       1, 1, 0, 0,  1, 1, 0, 0,
1096       1, 1, 0, 0,  1, 1, 0, 0,
1097       1, 1, 0, 0,  1, 1, 0, 0,
1098       1, 1, 0, 0,  1, 1, 0, 0,
1099       1, 1, 0, 0,  1, 1, 0, 0,
1100       1, 1, 0, 0,  1, 1, 0, 0,
1101       1, 1, 0, 0,  1, 1, 0, 0,
1102       1, 1, 0, 0,  1, 1, 0, 0,
1103       1, 1, 0, 0,  1, 1, 0, 0,
1104       1, 1, 0, 0,  1, 1, 0, 0,
1105       1, 1, 0, 0,  1, 1, 0, 0,
1106       1, 1, 0, 0,  1, 1, 0, 0
1107   });
1108 
1109   m.SetFilter({
1110     // array of 9 x 8 => [1, 3, 3, 8]
1111       1, 2, 3, 4, 5, 6, 7, 8,
1112       1, 2, 3, 4, 5, 6, 7, 8,
1113       1, 2, 3, 4, 5, 6, 7, 8,
1114       1, 2, 3, 4, 5, 6, 7, 8,
1115       1, 2, 3, 4, 5, 6, 7, 8,
1116       1, 2, 3, 4, 5, 6, 7, 8,
1117       1, 2, 3, 4, 5, 6, 7, 8,
1118       1, 2, 3, 4, 5, 6, 7, 8,
1119       1, 2, 3, 4, 5, 6, 7, 8
1120   });
1121   // clang-format on
1122 
1123   // No bias for this test.
1124   m.SetBias({0, 0, 0, 0, 0, 0, 0, 0});
1125   m.SetNumThreads(4);
1126   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1127 
1128   // clang-format off
1129   EXPECT_THAT(
1130       m.GetDequantizedOutput(),
1131       ElementsAreArray({
1132         9, 18, 0, 0, 46, 55, 0, 0,
1133         9, 18, 0, 0, 46, 55, 0, 0,
1134         9, 18, 0, 0, 46, 55, 0, 0
1135       }));
1136   // clang-format on
1137 }
1138 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadOnRowSamePaddingTest)1139 TEST_P(QuantizedDepthwiseConvolutionOpTest, MultithreadOnRowSamePaddingTest) {
1140   // This test runs through DepthwiseConv3x3Filter with __aarch64__, and runs
1141   // through DepthwiseConvGeneral with other configs.
1142   const int input_batch = 1;
1143   const int input_width = 3;
1144   const int input_height = 3;
1145   const int input_depth = 8;
1146   const int filter_batch = 1;
1147   const int filter_size = 3;
1148   const int filter_depth = 8;
1149 
1150   QuantizedDepthwiseConvolutionOpModel m(
1151       GetRegistration(),
1152       {TensorType_UINT8,
1153        {input_batch, input_height, input_width, input_depth},
1154        0,
1155        128},
1156       {TensorType_UINT8,
1157        {filter_batch, filter_size, filter_size, filter_depth},
1158        0,
1159        128},
1160       {TensorType_UINT8, {}, 0, 255}, Padding_SAME);
1161 
1162   // clang-format off
1163   m.SetInput({
1164       // array of 9 x 8 => [1, 3, 3, 8]
1165       1, 1, 0, 0,  1, 1, 0, 0,
1166       1, 1, 0, 0,  1, 1, 0, 0,
1167       1, 1, 0, 0,  1, 1, 0, 0,
1168       1, 1, 0, 0,  1, 1, 0, 0,
1169       1, 1, 0, 0,  1, 1, 0, 0,
1170       1, 1, 0, 0,  1, 1, 0, 0,
1171       1, 1, 0, 0,  1, 1, 0, 0,
1172       1, 1, 0, 0,  1, 1, 0, 0,
1173       1, 1, 0, 0,  1, 1, 0, 0
1174   });
1175 
1176   m.SetFilter({
1177       // array of 9 x 8 => [1, 3, 3, 8]
1178       1, 2, 3, 4, 5, 6, 7, 8,
1179       1, 2, 3, 4, 5, 6, 7, 8,
1180       1, 2, 3, 4, 5, 6, 7, 8,
1181       1, 2, 3, 4, 5, 6, 7, 8,
1182       1, 2, 3, 4, 5, 6, 7, 8,
1183       1, 2, 3, 4, 5, 6, 7, 8,
1184       1, 2, 3, 4, 5, 6, 7, 8,
1185       1, 2, 3, 4, 5, 6, 7, 8,
1186       1, 2, 3, 4, 5, 6, 7, 8
1187   });
1188   // clang-format on
1189 
1190   // No bias for this test.
1191   m.SetBias({0, 0, 0, 0, 0, 0, 0, 0});
1192   m.SetNumThreads(3);
1193   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1194 
1195   // clang-format off
1196   EXPECT_THAT(
1197       m.GetDequantizedOutput(),
1198       ElementsAreArray({
1199         // array of 9 x 8 => [1, 3, 3, 8]
1200         4, 8, 0, 0, 20, 24, 0, 0,
1201         6, 12, 0, 0, 30, 37, 0, 0,
1202         4, 8, 0, 0, 20, 24, 0, 0,
1203         6, 12, 0, 0, 30, 37, 0, 0,
1204         9, 18, 0, 0, 46, 55, 0, 0,
1205         6, 12, 0, 0, 30, 37, 0, 0,
1206         4, 8, 0, 0, 20, 24, 0, 0,
1207         6, 12, 0, 0, 30, 37, 0, 0,
1208         4, 8, 0, 0, 20, 24, 0, 0,
1209       }));
1210   // clang-format on
1211 }
1212 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadOnBatchValidPaddingTest)1213 TEST_P(QuantizedDepthwiseConvolutionOpTest,
1214        MultithreadOnBatchValidPaddingTest) {
1215   // This test runs through DepthwiseConv3x3Filter with __aarch64__, and runs
1216   // through DepthwiseConvGeneral with other configs.
1217   const int input_batch = 2;
1218   const int input_width = 3;
1219   const int input_height = 3;
1220   const int input_depth = 8;
1221   const int filter_batch = 1;
1222   const int filter_size = 3;
1223   const int filter_depth = 8;
1224 
1225   QuantizedDepthwiseConvolutionOpModel m(
1226       GetRegistration(),
1227       {TensorType_UINT8,
1228        {input_batch, input_height, input_width, input_depth},
1229        0,
1230        128},
1231       {TensorType_UINT8,
1232        {filter_batch, filter_size, filter_size, filter_depth},
1233        0,
1234        128},
1235       {TensorType_UINT8, {}, 0, 255}, Padding_VALID);
1236 
1237   // clang-format off
1238   m.SetInput({
1239       // array of 2 x 3 x 24 => [2, 3, 3, 8]
1240       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1241       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1242       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1243 
1244       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1245       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1246       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0
1247   });
1248 
1249   m.SetFilter({
1250       // array of 9 x 8 => [1, 3, 3, 8]
1251       1, 2, 3, 4, 5, 6, 7, 8,
1252       1, 2, 3, 4, 5, 6, 7, 8,
1253       1, 2, 3, 4, 5, 6, 7, 8,
1254       1, 2, 3, 4, 5, 6, 7, 8,
1255       1, 2, 3, 4, 5, 6, 7, 8,
1256       1, 2, 3, 4, 5, 6, 7, 8,
1257       1, 2, 3, 4, 5, 6, 7, 8,
1258       1, 2, 3, 4, 5, 6, 7, 8,
1259       1, 2, 3, 4, 5, 6, 7, 8
1260   });
1261   // clang-format on
1262 
1263   // No bias for this test.
1264   m.SetBias({0, 0, 0, 0, 0, 0, 0, 0});
1265   m.SetNumThreads(2);
1266   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1267 
1268   // clang-format off
1269   EXPECT_THAT(
1270       m.GetDequantizedOutput(),
1271       ElementsAreArray({
1272         9, 18, 0, 0, 46, 55, 0, 0,
1273         9, 18, 0, 0, 46, 55, 0, 0
1274       }));
1275   // clang-format on
1276 }
1277 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadOnBatchSamePaddingTest)1278 TEST_P(QuantizedDepthwiseConvolutionOpTest, MultithreadOnBatchSamePaddingTest) {
1279   // This test runs through DepthwiseConv3x3Filter with __aarch64__, and runs
1280   // through DepthwiseConvGeneral with other configs.
1281   const int input_batch = 2;
1282   const int input_width = 3;
1283   const int input_height = 3;
1284   const int input_depth = 8;
1285   const int filter_batch = 1;
1286   const int filter_size = 3;
1287   const int filter_depth = 8;
1288 
1289   QuantizedDepthwiseConvolutionOpModel m(
1290       GetRegistration(),
1291       {TensorType_UINT8,
1292        {input_batch, input_height, input_width, input_depth},
1293        0,
1294        128},
1295       {TensorType_UINT8,
1296        {filter_batch, filter_size, filter_size, filter_depth},
1297        0,
1298        128},
1299       {TensorType_UINT8, {}, 0, 255}, Padding_SAME);
1300 
1301   // clang-format off
1302   m.SetInput({
1303       // array of 2 x 3 x 24 => [2, 3, 3, 8]
1304       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1305       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1306       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1307 
1308       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1309       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1310       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0
1311   });
1312 
1313   m.SetFilter({
1314       // array of 9 x 8 => [1, 3, 3, 8]
1315       1, 2, 3, 4, 5, 6, 7, 8,
1316       1, 2, 3, 4, 5, 6, 7, 8,
1317       1, 2, 3, 4, 5, 6, 7, 8,
1318       1, 2, 3, 4, 5, 6, 7, 8,
1319       1, 2, 3, 4, 5, 6, 7, 8,
1320       1, 2, 3, 4, 5, 6, 7, 8,
1321       1, 2, 3, 4, 5, 6, 7, 8,
1322       1, 2, 3, 4, 5, 6, 7, 8,
1323       1, 2, 3, 4, 5, 6, 7, 8
1324   });
1325   // clang-format on
1326 
1327   // No bias for this test.
1328   m.SetBias({0, 0, 0, 0, 0, 0, 0, 0});
1329   m.SetNumThreads(3);
1330   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1331 
1332   // clang-format off
1333   EXPECT_THAT(
1334       m.GetDequantizedOutput(),
1335       ElementsAreArray({
1336         // array of 9 x 16 => [2, 3, 3, 8]
1337         4, 8,  0, 0, 20, 24, 0, 0,   6, 12, 0, 0, 30, 37, 0, 0,
1338         4, 8,  0, 0, 20, 24, 0, 0,   6, 12, 0, 0, 30, 37, 0, 0,
1339         9, 18, 0, 0, 46, 55, 0, 0,   6, 12, 0, 0, 30, 37, 0, 0,
1340         4, 8,  0, 0, 20, 24, 0, 0,   6, 12, 0, 0, 30, 37, 0, 0,
1341         4, 8,  0, 0, 20, 24, 0, 0,   4, 8,  0, 0, 20, 24, 0, 0,
1342         6, 12, 0, 0, 30, 37, 0, 0,   4, 8,  0, 0, 20, 24, 0, 0,
1343         6, 12, 0, 0, 30, 37, 0, 0,   9, 18, 0, 0, 46, 55, 0, 0,
1344         6, 12, 0, 0, 30, 37, 0, 0,   4, 8,  0, 0, 20, 24, 0, 0,
1345         6, 12, 0, 0, 30, 37, 0, 0,   4, 8,  0, 0, 20, 24, 0, 0,
1346       }));
1347   // clang-format on
1348 }
1349 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadOnRowSamePaddingStrideTest)1350 TEST_P(QuantizedDepthwiseConvolutionOpTest,
1351        MultithreadOnRowSamePaddingStrideTest) {
1352   // This test runs through DepthwiseConv3x3Filter with __aarch64__, and runs
1353   // through DepthwiseConvGeneral with other configs.
1354   const int input_batch = 1;
1355   const int input_width = 3;
1356   const int input_height = 3;
1357   const int input_depth = 8;
1358   const int filter_batch = 1;
1359   const int filter_size = 3;
1360   const int filter_depth = 8;
1361 
1362   QuantizedDepthwiseConvolutionOpModel m(
1363       GetRegistration(),
1364       {TensorType_UINT8,
1365        {input_batch, input_height, input_width, input_depth},
1366        0,
1367        128},
1368       {TensorType_UINT8,
1369        {filter_batch, filter_size, filter_size, filter_depth},
1370        0,
1371        128},
1372       {TensorType_UINT8, {}, 0, 255}, Padding_SAME,
1373       /* dilation_factor = */ 1,
1374       /* stride_width = */ 2,
1375       /* stride_height = */ 2);
1376 
1377   // clang-format off
1378   m.SetInput({
1379       // array of 3 x 24 => [1, 3, 3, 8]
1380       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1381       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1382       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0
1383   });
1384 
1385   m.SetFilter({
1386       // array of 9 x 8 => [1, 3, 3, 8]
1387       1, 2, 3, 4, 5, 6, 7, 8,
1388       1, 2, 3, 4, 5, 6, 7, 8,
1389       1, 2, 3, 4, 5, 6, 7, 8,
1390       1, 2, 3, 4, 5, 6, 7, 8,
1391       1, 2, 3, 4, 5, 6, 7, 8,
1392       1, 2, 3, 4, 5, 6, 7, 8,
1393       1, 2, 3, 4, 5, 6, 7, 8,
1394       1, 2, 3, 4, 5, 6, 7, 8,
1395       1, 2, 3, 4, 5, 6, 7, 8
1396   });
1397   // clang-format on
1398 
1399   // No bias for this test.
1400   m.SetBias({0, 0, 0, 0, 0, 0, 0, 0});
1401   m.SetNumThreads(4);
1402   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1403 
1404   // clang-format off
1405   EXPECT_THAT(
1406       m.GetDequantizedOutput(),
1407       ElementsAreArray({
1408         4, 8, 0, 0, 20, 24, 0, 0,
1409         4, 8, 0, 0, 20, 24, 0, 0,
1410         4, 8, 0, 0, 20, 24, 0, 0,
1411         4, 8, 0, 0, 20, 24, 0, 0,
1412       }));
1413   // clang-format on
1414 }
1415 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadOnRowValidPaddingStrideTest)1416 TEST_P(QuantizedDepthwiseConvolutionOpTest,
1417        MultithreadOnRowValidPaddingStrideTest) {
1418   const int input_batch = 1;
1419   const int input_width = 5;
1420   const int input_height = 5;
1421   const int input_depth = 8;
1422   const int filter_batch = 1;
1423   const int filter_size = 3;
1424   const int filter_depth = 8;
1425 
1426   QuantizedDepthwiseConvolutionOpModel m(
1427       GetRegistration(),
1428       {TensorType_UINT8,
1429        {input_batch, input_height, input_width, input_depth},
1430        0,
1431        128},
1432       {TensorType_UINT8,
1433        {filter_batch, filter_size, filter_size, filter_depth},
1434        0,
1435        128},
1436       {TensorType_UINT8, {}, 0, 255}, Padding_VALID,
1437       /* dilation_factor = */ 1,
1438       /* stride_width = */ 2,
1439       /* stride_height = */ 2);
1440 
1441   // clang-format off
1442   m.SetInput({
1443     // array of 8 x 24 + 8 => [1, 5, 5, 8]
1444       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1445       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1446       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1447       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1448       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1449       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1450       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1451       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1452       1, 1, 0, 0, 1, 1, 0, 0
1453   });
1454 
1455   m.SetFilter({
1456       // array of 9 x 8 => [1, 3, 3, 8]
1457       1, 2, 3, 4, 5, 6, 7, 8,
1458       1, 2, 3, 4, 5, 6, 7, 8,
1459       1, 2, 3, 4, 5, 6, 7, 8,
1460       1, 2, 3, 4, 5, 6, 7, 8,
1461       1, 2, 3, 4, 5, 6, 7, 8,
1462       1, 2, 3, 4, 5, 6, 7, 8,
1463       1, 2, 3, 4, 5, 6, 7, 8,
1464       1, 2, 3, 4, 5, 6, 7, 8,
1465       1, 2, 3, 4, 5, 6, 7, 8
1466   });
1467   // clang-format on
1468 
1469   // No bias for this test.
1470   m.SetBias({0, 0, 0, 0, 0, 0, 0, 0});
1471   m.SetNumThreads(4);
1472   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1473 
1474   // clang-format off
1475   EXPECT_THAT(
1476       m.GetDequantizedOutput(),
1477       ElementsAreArray({
1478         9, 18, 0, 0, 46, 55, 0, 0,
1479         9, 18, 0, 0, 46, 55, 0, 0,
1480         9, 18, 0, 0, 46, 55, 0, 0,
1481         9, 18, 0, 0, 46, 55, 0, 0
1482       }));
1483   // clang-format on
1484 }
1485 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadOnRowDepthMultiplierTest)1486 TEST_P(QuantizedDepthwiseConvolutionOpTest,
1487        MultithreadOnRowDepthMultiplierTest) {
1488   const int input_batch = 1;
1489   const int input_width = 3;
1490   const int input_height = 3;
1491   const int input_depth = 8;
1492   const int filter_batch = 1;
1493   const int filter_size = 3;
1494   const int filter_depth = 16;
1495 
1496   QuantizedDepthwiseConvolutionOpModel m(
1497       GetRegistration(),
1498       {TensorType_UINT8,
1499        {input_batch, input_height, input_width, input_depth},
1500        0,
1501        128},
1502       {TensorType_UINT8,
1503        {filter_batch, filter_size, filter_size, filter_depth},
1504        0,
1505        128},
1506       {TensorType_UINT8, {}, 0, 255}, Padding_VALID);
1507 
1508   // clang-format off
1509   m.SetInput({
1510       // array of 3 x 24 => [1, 3, 3, 8]
1511       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1512       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1513       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0
1514   });
1515 
1516   m.SetFilter({
1517       // array of 9 x 16 => [1, 3, 3, 16]
1518       1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1519       1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1520       1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1521       1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1522       1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1523       1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1524       1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1525       1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1526       1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1527   });
1528   // clang-format on
1529 
1530   // No bias for this test.
1531   m.SetBias({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
1532   m.SetNumThreads(4);
1533   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1534 
1535   // clang-format off
1536   EXPECT_THAT(
1537       m.GetDequantizedOutput(),
1538       ElementsAreArray({
1539         9, 18, 27, 37, 0, 0, 0, 0,
1540         9, 18, 27, 37, 0, 0, 0, 0
1541       }));
1542   // clang-format on
1543 }
1544 
TEST_P(QuantizedDepthwiseConvolutionOpTest,MultithreadDifferentPaddingTest)1545 TEST_P(QuantizedDepthwiseConvolutionOpTest, MultithreadDifferentPaddingTest) {
1546   const int input_batch = 1;
1547   const int input_width = 4;
1548   const int input_height = 5;
1549   const int input_depth = 2;
1550   const int filter_batch = 1;
1551   const int filter_size = 3;
1552   const int filter_depth = 2;
1553 
1554   QuantizedDepthwiseConvolutionOpModel m(
1555       GetRegistration(),
1556       {TensorType_UINT8,
1557        {input_batch, input_height, input_width, input_depth},
1558        0,
1559        128},
1560       {TensorType_UINT8,
1561        {filter_batch, filter_size, filter_size, filter_depth},
1562        0,
1563        128},
1564       {TensorType_UINT8, {}, 0, 255}, Padding_SAME,
1565       /* dilation_factor = */ 1,
1566       /* stride_width = */ 2,
1567       /* stride_height = */ 2);
1568 
1569   // clang-format off
1570   m.SetInput({
1571       // array of 2 x 16 => [1, 4, 4, 2]
1572       1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1573       1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0
1574   });
1575 
1576   m.SetFilter({
1577       // array of 8 x 2 + 2 => [1, 3, 3, 2]
1578       1, 2, 1, 2, 1, 2, 1, 2,
1579       1, 2, 1, 2, 1, 2, 1, 2,
1580       1, 2
1581   });
1582   // clang-format on
1583 
1584   // No bias for this test.
1585   m.SetBias({0, 0});
1586   m.SetNumThreads(4);
1587   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1588 
1589   // clang-format off
1590   EXPECT_THAT(
1591       m.GetDequantizedOutput(),
1592       ElementsAreArray({
1593         6, 0, 4, 0,
1594         9, 0, 6, 0,
1595         6, 0, 4, 0
1596       }));
1597   // clang-format on
1598 }
1599 
1600 class PerChannelQuantizedDepthwiseConvolutionOpModel
1601     : public BaseDepthwiseConvolutionOpModel {
1602  public:
1603   using BaseDepthwiseConvolutionOpModel::BaseDepthwiseConvolutionOpModel;
1604 
SetInput(std::initializer_list<float> data)1605   void SetInput(std::initializer_list<float> data) {
1606     QuantizeAndPopulate<int8_t>(input_, data);
1607   }
1608 
SetFilter(std::initializer_list<float> data)1609   void SetFilter(std::initializer_list<float> data) {
1610     PerChannelSymmetricQuantizeAndPopulate(filter_, data);
1611   }
1612 
SetBias(std::initializer_list<float> data)1613   void SetBias(std::initializer_list<float> data) {
1614     PerChannelQuantizeBias(bias_, data);
1615   }
1616 
GetOutput()1617   std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); }
GetDequantizedOutput()1618   std::vector<float> GetDequantizedOutput() {
1619     return Dequantize<int8_t>(ExtractVector<int8_t>(output_), GetScale(output_),
1620                               GetZeroPoint(output_));
1621   }
1622 };
1623 
1624 class PerChannelQuantizedDepthwiseConvolutionOpTest : public SingleOpTest {
1625  protected:
GetKernelMap()1626   const std::map<string, TfLiteRegistration*>& GetKernelMap() override {
1627     return *kKernelMap;
1628   }
1629 };
1630 
TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest,SimplePerTensorTest)1631 TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest, SimplePerTensorTest) {
1632   PerChannelQuantizedDepthwiseConvolutionOpModel m(
1633       GetRegistration(), {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
1634       {TensorType_INT8,
1635        // [1 * 2 * 2 * 4] as [input_channel, y, x, output_channel]
1636        {1, 2, 2, 4},
1637        0,
1638        0,
1639        0,
1640        0,
1641        /*per_channel_quantization=*/true,
1642        /*per_channel_quantization_scales=*/{1},
1643        /*per_channel_quantization_offsets=*/{0},
1644        /*channel_index=*/3},
1645       {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, Padding_VALID);
1646   m.SetInput({
1647       // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
1648       3, 2,    // batch = 0, y = 0, x = 0
1649       1, -1,   // batch = 0, y = 0, x = 1
1650       -2, -3,  // batch = 0, y = 0, x = 2
1651       4, 3,    // batch = 0, y = 1, x = 0
1652       2, -2,   // batch = 0, y = 1, x = 1
1653       -3, -4,  // batch = 0, y = 1, x = 2
1654   });
1655   m.SetFilter(
1656       /*filter data*/
1657       {
1658           // [1 * 2 * 2 * 4] as [input_channel, y, x, output_channel]
1659           // depth multiplier = 2
1660           1, 2, 3, 4,  // y = 0, x = 0
1661           3, 4, 5, 6,  // y = 0, x = 1
1662           7, 8, 5, 6,  // y = 1, x = 0
1663           3, 4, 1, 2,  // y = 1, x = 1
1664       });
1665   m.SetBias({3, -2, 4, 6});
1666 
1667   // Invoke and verify output.
1668   // output has dimension [1 * 1 * 2 * 4] as [batch, y, x, output_channel]
1669   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1670   EXPECT_THAT(
1671       m.GetDequantizedOutput(),
1672       ElementsAreArray(ArrayFloatNear({43, 48, 18, 22, 3, -4, -28, -36})));
1673   EXPECT_THAT(m.GetOutput(),
1674               ElementsAreArray({85, 95, 35, 43, 5, -9, -57, -73}));
1675 }
1676 
TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest,SimplePerAxisTest)1677 TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest, SimplePerAxisTest) {
1678   PerChannelQuantizedDepthwiseConvolutionOpModel m(
1679       GetRegistration(), {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
1680       {TensorType_INT8,
1681        // [1 * 2 * 2 * 4] as [input_channel, y, x, output_channel]
1682        {1, 2, 2, 4},
1683        0,
1684        0,
1685        0,
1686        0,
1687        /*per_channel_quantization=*/true,
1688        /*per_channel_quantization_scales=*/{1, 2, 3, 4},
1689        /*per_channel_quantization_offsets=*/{0, 0, 0, 0},
1690        /*channel_index=*/3},
1691       {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, Padding_VALID);
1692   m.SetInput({
1693       // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
1694       3, 2,    // batch = 0, y = 0, x = 0
1695       1, -1,   // batch = 0, y = 0, x = 1
1696       -2, -3,  // batch = 0, y = 0, x = 2
1697       4, 3,    // batch = 0, y = 1, x = 0
1698       2, -2,   // batch = 0, y = 1, x = 1
1699       -3, -4,  // batch = 0, y = 1, x = 2
1700   });
1701   m.SetFilter(
1702       /*filter data*/
1703       {
1704           // [1 * 2 * 2 * 4] as [input_channel, y, x, output_channel]
1705           // depth multiplier = 2
1706           1, 2, 3, 4,  // y = 0, x = 0
1707           3, 4, 5, 6,  // y = 0, x = 1
1708           7, 8, 5, 6,  // y = 1, x = 0
1709           3, 4, 1, 2,  // y = 1, x = 1
1710       });
1711   m.SetBias({3, -2, 4, 6});
1712 
1713   // Invoke and verify output.
1714   // output has dimension [1 * 1 * 2 * 4] as [batch, y, x, output_channel]
1715   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1716   EXPECT_THAT(
1717       m.GetDequantizedOutput(),
1718       ElementsAreArray(ArrayFloatNear({43, 48, 21, 22, 3, -4, -30, -54})));
1719   EXPECT_THAT(m.GetOutput(),
1720               ElementsAreArray({85, 95, 41, 43, 5, -9, -61, -109}));
1721 }
1722 
1723 // Same as previous test, except the shift will be negative for the outputs.
TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest,SimpleTestNegativeOutputShift)1724 TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest,
1725        SimpleTestNegativeOutputShift) {
1726   PerChannelQuantizedDepthwiseConvolutionOpModel m(
1727       GetRegistration(), {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
1728       {TensorType_INT8,
1729        // [1 * 2 * 2 * 4] as [input_channel, y, x, output_channel]
1730        {1, 2, 2, 4},
1731        0,
1732        0,
1733        0,
1734        0,
1735        /*per_channel_quantization=*/true,
1736        /*per_channel_quantization_scales=*/{0.1, 0.2, 0.3, 0.4},
1737        /*per_channel_quantization_offsets=*/{0, 0, 0, 0},
1738        /*channel_index=*/3},
1739       {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, Padding_VALID);
1740   m.SetInput({
1741       // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
1742       3, 2,    // batch = 0, y = 0, x = 0
1743       1, -1,   // batch = 0, y = 0, x = 1
1744       -2, -3,  // batch = 0, y = 0, x = 2
1745       4, 3,    // batch = 0, y = 1, x = 0
1746       2, -2,   // batch = 0, y = 1, x = 1
1747       -3, -4,  // batch = 0, y = 1, x = 2
1748   });
1749   m.SetFilter(
1750       /*filter data*/
1751       {
1752           // [1 * 2 * 2 * 4] as [input_channel, y, x, output_channel]
1753           // depth multiplier = 2
1754           1, 2, 3, 4,  // y = 0, x = 0
1755           3, 4, 5, 6,  // y = 0, x = 1
1756           7, 8, 5, 6,  // y = 1, x = 0
1757           3, 4, 1, 2,  // y = 1, x = 1
1758       });
1759   m.SetBias({3, -2, 4, 6});
1760 
1761   // Invoke and verify output.
1762   // output has dimension [1 * 1 * 2 * 4] as [batch, y, x, output_channel]
1763   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1764   EXPECT_THAT(
1765       m.GetDequantizedOutput(),
1766       ElementsAreArray(ArrayFloatNear({43, 48, 18.5, 22, 3, -4, -28.5, -36})));
1767   EXPECT_THAT(m.GetOutput(),
1768               ElementsAreArray({85, 95, 36, 43, 5, -9, -58, -73}));
1769 }
1770 
1771 // Same as previous test, except the shift will be mixed for the outputs.
TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest,SimpleTestMixedOutputShift)1772 TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest,
1773        SimpleTestMixedOutputShift) {
1774   PerChannelQuantizedDepthwiseConvolutionOpModel m(
1775       GetRegistration(), {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
1776       {TensorType_INT8,
1777        // [1 * 2 * 2 * 4] as [input_channel, y, x, output_channel]
1778        {1, 2, 2, 4},
1779        0,
1780        0,
1781        0,
1782        0,
1783        /*per_channel_quantization=*/true,
1784        /*per_channel_quantization_scales=*/{0.1, 2, 3, 0.4},
1785        /*per_channel_quantization_offsets=*/{0, 0, 0, 0},
1786        /*channel_index=*/3},
1787       {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, Padding_VALID);
1788   m.SetInput({
1789       // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
1790       3, 2,    // batch = 0, y = 0, x = 0
1791       1, -1,   // batch = 0, y = 0, x = 1
1792       -2, -3,  // batch = 0, y = 0, x = 2
1793       4, 3,    // batch = 0, y = 1, x = 0
1794       2, -2,   // batch = 0, y = 1, x = 1
1795       -3, -4,  // batch = 0, y = 1, x = 2
1796   });
1797   m.SetFilter(
1798       /*filter data*/
1799       {
1800           // [1 * 2 * 2 * 4] as [input_channel, y, x, output_channel]
1801           // depth multiplier = 2
1802           1, 2, 3, 4,  // y = 0, x = 0
1803           3, 4, 5, 6,  // y = 0, x = 1
1804           7, 8, 5, 6,  // y = 1, x = 0
1805           3, 4, 1, 2,  // y = 1, x = 1
1806       });
1807   m.SetBias({3, -2, 4, 6});
1808 
1809   // Invoke and verify output.
1810   // output has dimension [1 * 1 * 2 * 4] as [batch, y, x, output_channel]
1811   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1812   EXPECT_THAT(
1813       m.GetDequantizedOutput(),
1814       ElementsAreArray(ArrayFloatNear({43, 48, 21, 22, 3, -4, -30, -36})));
1815   EXPECT_THAT(m.GetOutput(),
1816               ElementsAreArray({85, 95, 41, 43, 5, -9, -61, -73}));
1817 }
1818 
TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest,Simple3x3FilterTest)1819 TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest, Simple3x3FilterTest) {
1820   PerChannelQuantizedDepthwiseConvolutionOpModel m(
1821       GetRegistration(), {TensorType_INT8, {1, 3, 3, 8}, -63.5, 64, 0.5, -1},
1822       {TensorType_INT8,
1823        // [1 * 3 * 3 * 8] as [input_channel, y, x, output_channel]
1824        {1, 3, 3, 8},
1825        0,
1826        0,
1827        0,
1828        0,
1829        /*per_channel_quantization=*/true,
1830        /*per_channel_quantization_scales=*/
1831        {0.1, 0.2, 0.3, 0.4, 0.4, 0.3, 0.2, 0.1},
1832        /*per_channel_quantization_offsets=*/{0, 0, 0, 0, 0, 0, 0, 0},
1833        /*channel_index=*/3},
1834       {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, Padding_VALID);
1835   m.SetInput({// array of 9 x 8 => [1, 3, 3, 8]
1836               1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1837               0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1838               1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1839               0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0});
1840   m.SetFilter(
1841       /*filter data*/
1842       {// array of 9 x 8 => [1, 3, 3, 8]
1843        1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1844        1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1845        1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8});
1846   m.SetBias({0, 0, 0, 0, 0, 0, 0, 0});
1847 
1848   // Invoke and verify output.
1849   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1850   EXPECT_THAT(m.GetDequantizedOutput(),
1851               ElementsAreArray(ArrayFloatNear({9, 18, 0, 0, 47, 54, 0, 0})));
1852 }
1853 
TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest,Simple3x3FilterPaddingSameTest)1854 TEST_P(PerChannelQuantizedDepthwiseConvolutionOpTest,
1855        Simple3x3FilterPaddingSameTest) {
1856   PerChannelQuantizedDepthwiseConvolutionOpModel m(
1857       GetRegistration(), {TensorType_INT8, {1, 3, 3, 8}, -63.5, 64, 0.5, -1},
1858       {TensorType_INT8,
1859        // [1 * 3 * 3 * 8] as [input_channel, y, x, output_channel]
1860        {1, 3, 3, 8},
1861        0,
1862        0,
1863        0,
1864        0,
1865        /*per_channel_quantization=*/true,
1866        /*per_channel_quantization_scales=*/
1867        {0.1, 0.2, 0.3, 0.4, 0.4, 0.3, 0.2, 0.1},
1868        /*per_channel_quantization_offsets=*/{0, 0, 0, 0, 0, 0, 0, 0},
1869        /*channel_index=*/3},
1870       {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, Padding_SAME);
1871   m.SetInput({// array of 9 x 8 => [1, 3, 3, 8]
1872               1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1873               0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1874               1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1875               0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0});
1876   m.SetFilter(
1877       /*filter data*/
1878       {// array of 9 x 8 => [1, 3, 3, 8]
1879        1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1880        1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
1881        1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8});
1882   m.SetBias({0, 0, 0, 0, 0, 0, 0, 0});
1883 
1884   // Invoke and verify output.
1885   ASSERT_EQ(m.Invoke(), kTfLiteOk);
1886   EXPECT_THAT(m.GetDequantizedOutput(),
1887               ElementsAreArray(ArrayFloatNear({
1888                   // array of 9 x 8 => [1, 3, 3, 8]
1889                   4, 8,  0, 0, 21, 24, 0, 0, 6, 12, 0, 0, 31.5, 36, 0, 0,
1890                   4, 8,  0, 0, 21, 24, 0, 0, 6, 12, 0, 0, 31.5, 36, 0, 0,
1891                   9, 18, 0, 0, 47, 54, 0, 0, 6, 12, 0, 0, 31.5, 36, 0, 0,
1892                   4, 8,  0, 0, 21, 24, 0, 0, 6, 12, 0, 0, 31.5, 36, 0, 0,
1893                   4, 8,  0, 0, 21, 24, 0, 0,
1894               })));
1895 }
1896 
1897 INSTANTIATE_TEST_SUITE_P(
1898     DepthwiseConvolutionOpTest, DepthwiseConvolutionOpTest,
1899     ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap)));
1900 
1901 INSTANTIATE_TEST_SUITE_P(
1902     QuantizedDepthwiseConvolutionOpTest, QuantizedDepthwiseConvolutionOpTest,
1903     ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kQuantizedKernelMap)));
1904 
1905 INSTANTIATE_TEST_SUITE_P(
1906     PerChannelQuantizedDepthwiseConvolutionOpTest,
1907     PerChannelQuantizedDepthwiseConvolutionOpTest,
1908     ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap)));
1909 
1910 }  // namespace
1911 }  // namespace tflite
1912