• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <xnnpack.h>
7 #include <xnnpack/subgraph.h>
8 
9 #include "runtime-tester.h"
10 #include <gtest/gtest.h>
11 
12 namespace xnnpack {
13 
TEST(ADD_THEN_CLAMP,fusion)14 TEST(ADD_THEN_CLAMP, fusion) {
15   auto tester = RuntimeTester(4);
16   float output_min = -0.5f;
17   float output_max = 0.5f;
18   uint32_t input1_id = 0;
19   uint32_t input2_id = 1;
20   uint32_t intermediate_id = 2;
21   uint32_t output_id = 3;
22   tester
23     .AddInputTensorF32({1, 2, 2, 3}, input1_id)
24     .AddInputTensorF32({1, 2, 2, 3}, input2_id)
25     .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
26     .AddOutputTensorF32({1, 2, 2, 3}, output_id)
27     .AddAddition(input1_id, input2_id, intermediate_id)
28     .AddClamp(output_min, output_max, intermediate_id, output_id);
29 
30   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
31   ASSERT_EQ(tester.NumOperators(), 2);
32 
33   std::vector<float> optimized_output = tester.RunWithFusion<float>();
34 
35   ASSERT_EQ(tester.NumOperators(), 1);
36   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
37   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
38   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
39   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
40 
41   ASSERT_EQ(unoptimized_output, optimized_output);
42 }
43 
TEST(AVERAGE_POOLING_2D_THEN_CLAMP,fusion)44 TEST(AVERAGE_POOLING_2D_THEN_CLAMP, fusion) {
45   auto tester = RuntimeTester(3);
46   float output_min = -0.5f;
47   float output_max = 0.5f;
48   uint32_t input_id = 0;
49   uint32_t intermediate_id = 1;
50   uint32_t output_id = 2;
51   tester
52     .AddInputTensorF32({1, 10, 10, 3}, input_id)
53     .AddDynamicTensorF32({1, 9, 9, 3}, intermediate_id)
54     .AddOutputTensorF32({1, 9, 9, 3}, output_id)
55     .AddAveragePooling2D(0, 0, 0, 0, 2, 2, 1, 1, input_id, intermediate_id)
56     .AddClamp(output_min, output_max, intermediate_id, output_id);
57 
58   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
59   ASSERT_EQ(tester.NumOperators(), 2);
60 
61   std::vector<float> optimized_output = tester.RunWithFusion<float>();
62 
63   ASSERT_EQ(tester.NumOperators(), 1);
64   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
65   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
66   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
67   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
68 
69   ASSERT_EQ(unoptimized_output, optimized_output);
70 }
71 
TEST(CLAMP_THEN_CLAMP,fusion)72 TEST(CLAMP_THEN_CLAMP, fusion) {
73   auto tester = RuntimeTester(3);
74   float output_min = -0.5f;
75   float output_max = 0.5f;
76   uint32_t input_id = 0;
77   uint32_t intermediate_id = 1;
78   uint32_t output_id = 2;
79   tester
80     .AddInputTensorF32({1, 10, 10, 3}, input_id)
81     .AddDynamicTensorF32({1, 10, 10, 3}, intermediate_id)
82     .AddOutputTensorF32({1, 10, 10, 3}, output_id)
83     .AddClamp(
84         -std::numeric_limits<float>::infinity(),
85         std::numeric_limits<float>::infinity(),
86         input_id,
87         intermediate_id)
88     .AddClamp(output_min, output_max, intermediate_id, output_id);
89 
90   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
91   ASSERT_EQ(tester.NumOperators(), 2);
92 
93   std::vector<float> optimized_output = tester.RunWithFusion<float>();
94 
95   ASSERT_EQ(tester.NumOperators(), 1);
96   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
97   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
98   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
99   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
100 
101   ASSERT_EQ(unoptimized_output, optimized_output);
102 }
103 
TEST(CONVOLUTION_2D_THEN_CLAMP,fusion)104 TEST(CONVOLUTION_2D_THEN_CLAMP, fusion) {
105   auto tester = RuntimeTester(5);
106   float output_min = -0.5f;
107   float output_max = 0.5f;
108   uint32_t input_id = 0;
109   uint32_t filter_id = 1;
110   uint32_t bias_id = 2;
111   uint32_t intermediate_id = 3;
112   uint32_t output_id = 4;
113   tester
114     .AddInputTensorF32({1, 256, 256, 3}, input_id)
115     .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
116     .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
117     .AddDynamicTensorF32({1, 128, 128, 32}, intermediate_id)
118     .AddOutputTensorF32({1, 128, 128, 32}, output_id)
119     .AddConvolution2D(
120         ConvolutionParams{
121           Padding{1, 1, 1, 1},
122           Kernel{3, 3},
123           Subsampling{2, 2},
124           Dilation{1, 1},
125           /*groups=*/ 1,
126           /*group_input_channels=*/ 3,
127           /*group_output_channels=*/ 32,
128         }, input_id, filter_id, bias_id, intermediate_id)
129     .AddClamp(output_min, output_max, intermediate_id, output_id);
130 
131   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
132   ASSERT_EQ(tester.NumOperators(), 2);
133 
134   std::vector<float> optimized_output = tester.RunWithFusion<float>();
135 
136   ASSERT_EQ(tester.NumOperators(), 1);
137   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
138   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
139   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
140   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
141 
142   ASSERT_EQ(unoptimized_output, optimized_output);
143 }
144 
TEST(DIVIDE_THEN_CLAMP,fusion)145 TEST(DIVIDE_THEN_CLAMP, fusion) {
146   auto tester = RuntimeTester(4);
147   float output_min = -0.5f;
148   float output_max = 0.5f;
149   uint32_t input1_id = 0;
150   uint32_t input2_id = 1;
151   uint32_t intermediate_id = 2;
152   uint32_t output_id = 3;
153   tester
154     .AddInputTensorF32({1, 2, 2, 3}, input1_id)
155     .AddInputTensorF32({1, 2, 2, 3}, input2_id)
156     .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
157     .AddOutputTensorF32({1, 2, 2, 3}, output_id)
158     .AddDivide(input1_id, input2_id, intermediate_id)
159     .AddClamp(output_min, output_max, intermediate_id, output_id);
160 
161   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
162   ASSERT_EQ(tester.NumOperators(), 2);
163 
164   std::vector<float> optimized_output = tester.RunWithFusion<float>();
165 
166   ASSERT_EQ(tester.NumOperators(), 1);
167   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
168   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
169   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
170   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
171 
172   ASSERT_EQ(unoptimized_output, optimized_output);
173 }
174 
TEST(DECONVOLUTION_2D_THEN_CLAMP,fusion)175 TEST(DECONVOLUTION_2D_THEN_CLAMP, fusion) {
176   auto tester = RuntimeTester(5);
177   float output_min = -0.5f;
178   float output_max = 0.5f;
179   uint32_t input_id = 0;
180   uint32_t filter_id = 1;
181   uint32_t bias_id = 2;
182   uint32_t intermediate_id = 3;
183   uint32_t output_id = 4;
184   tester
185     .AddInputTensorF32({1, 128, 128, 3}, input_id)
186     .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
187     .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
188     .AddDynamicTensorF32({1, 255, 255, 32}, intermediate_id)
189     .AddOutputTensorF32({1, 255, 255, 32}, output_id)
190     .AddDeconvolution2D(
191         DeconvolutionParams{
192           Padding{1, 1, 1, 1},
193           Adjustment{0, 0},
194           Kernel{3, 3},
195           Upsampling{2, 2},
196           Dilation{1, 1},
197           /*groups=*/ 1,
198           /*group_input_channels=*/ 3,
199           /*groups_output_channels*/ 32
200           }, input_id, filter_id, bias_id, intermediate_id)
201     .AddClamp(output_min, output_max, intermediate_id, output_id);
202 
203   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
204   ASSERT_EQ(tester.NumOperators(), 2);
205 
206   std::vector<float> optimized_output = tester.RunWithFusion<float>();
207 
208   ASSERT_EQ(tester.NumOperators(), 1);
209   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
210   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
211   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
212   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
213 
214   ASSERT_EQ(unoptimized_output, optimized_output);
215 }
216 
TEST(DEPTHWISE_CONVOLUTION_2D_THEN_CLAMP,fusion)217 TEST(DEPTHWISE_CONVOLUTION_2D_THEN_CLAMP, fusion) {
218   auto tester = RuntimeTester(5);
219   float output_min = -0.5f;
220   float output_max = 0.5f;
221   uint32_t input_id = 0;
222   uint32_t filter_id = 1;
223   uint32_t bias_id = 2;
224   uint32_t intermediate_id = 3;
225   uint32_t output_id = 4;
226   tester
227     .AddInputTensorF32({1, 128, 128, 4}, input_id)
228     .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
229     .AddStaticTensorF32({4}, TensorType::kDense, bias_id)
230     .AddDynamicTensorF32({1, 128, 128, 4}, intermediate_id)
231     .AddOutputTensorF32({1, 128, 128, 4}, output_id)
232     .AddDepthwiseConvolution2D(
233         DepthwiseConvolutionParams{
234           Padding{1, 1, 1, 1},
235           Kernel{3, 3},
236           Subsampling{1, 1},
237           Dilation{1, 1},
238           /*depth_multiplier=*/ 1,
239           /*input_channels=*/ 4
240         }, input_id, filter_id, bias_id, intermediate_id)
241     .AddClamp(output_min, output_max, intermediate_id, output_id);
242 
243   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
244   ASSERT_EQ(tester.NumOperators(), 2);
245 
246   std::vector<float> optimized_output = tester.RunWithFusion<float>();
247 
248   ASSERT_EQ(tester.NumOperators(), 1);
249   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
250   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
251   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
252   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
253 
254   ASSERT_EQ(unoptimized_output, optimized_output);
255 }
256 
TEST(FULLY_CONNECTED_2D_THEN_CLAMP,fusion)257 TEST(FULLY_CONNECTED_2D_THEN_CLAMP, fusion) {
258   auto tester = RuntimeTester(5);
259   float output_min = -0.5f;
260   float output_max = 0.5f;
261   uint32_t input_id = 0;
262   uint32_t filter_id = 1;
263   uint32_t bias_id = 2;
264   uint32_t intermediate_id = 3;
265   uint32_t output_id = 4;
266   tester
267     .AddInputTensorF32({5, 3}, input_id)
268     .AddStaticTensorF32({7, 3}, TensorType::kDense, filter_id)
269     .AddStaticTensorF32({7}, TensorType::kDense, bias_id)
270     .AddDynamicTensorF32({5, 7}, intermediate_id)
271     .AddOutputTensorF32({5, 7}, output_id)
272     .AddFullyConnected(input_id, filter_id, bias_id, intermediate_id)
273     .AddClamp(output_min, output_max, intermediate_id, output_id);
274 
275   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
276   ASSERT_EQ(tester.NumOperators(), 2);
277 
278   std::vector<float> optimized_output = tester.RunWithFusion<float>();
279 
280   ASSERT_EQ(tester.NumOperators(), 1);
281   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
282   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
283   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
284   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
285 
286   ASSERT_EQ(unoptimized_output, optimized_output);
287 }
288 
TEST(MULTIPLY_THEN_CLAMP,fusion)289 TEST(MULTIPLY_THEN_CLAMP, fusion) {
290   auto tester = RuntimeTester(4);
291   float output_min = -0.5f;
292   float output_max = 0.5f;
293   uint32_t input1_id = 0;
294   uint32_t input2_id = 1;
295   uint32_t intermediate_id = 2;
296   uint32_t output_id = 3;
297   tester
298     .AddInputTensorF32({1, 2, 2, 3}, input1_id)
299     .AddInputTensorF32({1, 2, 2, 3}, input2_id)
300     .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
301     .AddOutputTensorF32({1, 2, 2, 3}, output_id)
302     .AddMultiply(input1_id, input2_id, intermediate_id)
303     .AddClamp(output_min, output_max, intermediate_id, output_id);
304 
305   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
306   ASSERT_EQ(tester.NumOperators(), 2);
307 
308   std::vector<float> optimized_output = tester.RunWithFusion<float>();
309 
310   ASSERT_EQ(tester.NumOperators(), 1);
311   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
312   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
313   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
314   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
315 
316   ASSERT_EQ(unoptimized_output, optimized_output);
317 }
318 
TEST(MAX_POOLING_THEN_CLAMP,fusion)319 TEST(MAX_POOLING_THEN_CLAMP, fusion) {
320   auto tester = RuntimeTester(3);
321   float output_min = -0.5f;
322   float output_max = 0.5f;
323   uint32_t input_id = 0;
324   uint32_t intermediate_id = 1;
325   uint32_t output_id = 2;
326   tester
327     .AddInputTensorF32({1, 10, 10, 3}, input_id)
328     .AddDynamicTensorF32({1, 9, 9, 3}, intermediate_id)
329     .AddOutputTensorF32({1, 9, 9, 3}, output_id)
330     .AddMaxPooling2D(0, 0, 0, 0, 2, 2, 1, 1, 1, 1, input_id, intermediate_id)
331     .AddClamp(output_min, output_max, intermediate_id, output_id);
332 
333   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
334   ASSERT_EQ(tester.NumOperators(), 2);
335 
336   std::vector<float> optimized_output = tester.RunWithFusion<float>();
337 
338   ASSERT_EQ(tester.NumOperators(), 1);
339   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
340   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
341   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
342   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
343 
344   ASSERT_EQ(unoptimized_output, optimized_output);
345 }
346 
TEST(SUBTRACT_THEN_CLAMP,fusion)347 TEST(SUBTRACT_THEN_CLAMP, fusion) {
348   auto tester = RuntimeTester(4);
349   float output_min = -0.5f;
350   float output_max = 0.5f;
351   uint32_t input1_id = 0;
352   uint32_t input2_id = 1;
353   uint32_t intermediate_id = 2;
354   uint32_t output_id = 3;
355   tester
356     .AddInputTensorF32({1, 2, 2, 3}, input1_id)
357     .AddInputTensorF32({1, 2, 2, 3}, input2_id)
358     .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
359     .AddOutputTensorF32({1, 2, 2, 3}, output_id)
360     .AddSubtract(input1_id, input2_id, intermediate_id)
361     .AddClamp(output_min, output_max, intermediate_id, output_id);
362 
363   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
364   ASSERT_EQ(tester.NumOperators(), 2);
365 
366   std::vector<float> optimized_output = tester.RunWithFusion<float>();
367 
368   ASSERT_EQ(tester.NumOperators(), 1);
369   ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
370   ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
371   ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
372   ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
373 
374   ASSERT_EQ(unoptimized_output, optimized_output);
375 }
376 
TEST(CONSTANT_PAD_THEN_CONVOLUTION,fusion)377 TEST(CONSTANT_PAD_THEN_CONVOLUTION, fusion) {
378   auto tester = RuntimeTester(5);
379   uint32_t input_id = 0;
380   uint32_t intermediate_id = 1;
381   uint32_t filter_id = 2;
382   uint32_t bias_id = 3;
383   uint32_t output_id = 4;
384   size_t pre_paddings[4] = {0, 2, 4, 0};
385   size_t post_paddings[4] = {0, 6, 8, 0};
386   float padding_value = 0.0f;
387 
388   tester
389     .AddInputTensorF32({1, 254, 254, 3}, input_id)
390     .AddDynamicTensorF32({1, 262, 266, 3}, intermediate_id)
391     .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
392     .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
393     .AddOutputTensorF32({1, 131, 133, 32}, output_id)
394     .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
395     .AddConvolution2D(
396         ConvolutionParams{
397           Padding{0, 0, 0, 0},
398           Kernel{3, 3},
399           Subsampling{2, 2},
400           Dilation{1, 1},
401           /*groups=*/ 1,
402           /*group_input_channels=*/ 3,
403           /*group_output_channels=*/ 32,
404         }, intermediate_id, filter_id, bias_id, output_id);
405 
406   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
407   ASSERT_EQ(tester.NumOperators(), 2);
408 
409   std::vector<float> optimized_output = tester.RunWithFusion<float>();
410 
411   ASSERT_EQ(tester.NumOperators(), 1);
412   ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_invalid);
413   ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_top, 2);
414   ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_left, 4);
415   ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_right, 8);
416   ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_bottom, 6);
417   ASSERT_EQ(tester.Node(1)->outputs[0], output_id);
418 
419   ASSERT_EQ(unoptimized_output, optimized_output);
420 }
421 
TEST(CONSTANT_PAD_THEN_CONVOLUTION,not_fused_due_to_non_zero_padding_in_n_dimension)422 TEST(CONSTANT_PAD_THEN_CONVOLUTION, not_fused_due_to_non_zero_padding_in_n_dimension) {
423   auto tester = RuntimeTester(5);
424   uint32_t input_id = 0;
425   uint32_t intermediate_id = 1;
426   uint32_t filter_id = 2;
427   uint32_t bias_id = 3;
428   uint32_t output_id = 4;
429   // Non-zero pre-padding in the N or C dimension.
430   size_t pre_paddings[4] = {1, 2, 4, 0};
431   size_t post_paddings[4] = {0, 6, 8, 0};
432   float padding_value = 0.0f;
433 
434   tester
435     .AddInputTensorF32({1, 254, 254, 3}, input_id)
436     .AddDynamicTensorF32({2, 262, 266, 3}, intermediate_id)
437     .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
438     .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
439     .AddOutputTensorF32({2, 131, 133, 32}, output_id)
440     .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
441     .AddConvolution2D(
442         ConvolutionParams{
443           Padding{0, 0, 0, 0},
444           Kernel{3, 3},
445           Subsampling{2, 2},
446           Dilation{1, 1},
447           /*groups=*/ 1,
448           /*group_input_channels=*/ 3,
449           /*group_output_channels=*/ 32,
450         }, intermediate_id, filter_id, bias_id, output_id)
451     .Optimize();
452   std::vector<float> optimized_output = tester.RunWithFusion<float>();
453   ASSERT_EQ(tester.NumOperators(), 2);
454 }
455 
TEST(CONSTANT_PAD_THEN_CONVOLUTION,not_fused_due_to_padding_value_not_zero)456 TEST(CONSTANT_PAD_THEN_CONVOLUTION, not_fused_due_to_padding_value_not_zero) {
457   auto tester = RuntimeTester(5);
458   uint32_t input_id = 0;
459   uint32_t intermediate_id = 1;
460   uint32_t filter_id = 2;
461   uint32_t bias_id = 3;
462   uint32_t output_id = 4;
463   size_t pre_paddings[4] = {0, 2, 4, 0};
464   size_t post_paddings[4] = {0, 6, 8, 0};
465   float padding_value = 1.0f;
466 
467   tester
468     .AddInputTensorF32({1, 254, 254, 3}, input_id)
469     .AddDynamicTensorF32({2, 262, 266, 3}, intermediate_id)
470     .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
471     .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
472     .AddOutputTensorF32({2, 131, 133, 32}, output_id)
473     .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
474     .AddConvolution2D(
475         ConvolutionParams{
476           Padding{0, 0, 0, 0},
477           Kernel{3, 3},
478           Subsampling{2, 2},
479           Dilation{1, 1},
480           /*groups=*/ 1,
481           /*group_input_channels=*/ 3,
482           /*group_output_channels=*/ 32,
483         }, intermediate_id, filter_id, bias_id, output_id)
484     .Optimize();
485   std::vector<float> optimized_output = tester.RunWithFusion<float>();
486   ASSERT_EQ(tester.NumOperators(), 2);
487 }
488 
TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION,fusion)489 TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, fusion) {
490   auto tester = RuntimeTester(5);
491   uint32_t input_id = 0;
492   uint32_t intermediate_id = 1;
493   uint32_t filter_id = 2;
494   uint32_t bias_id = 3;
495   uint32_t output_id = 4;
496   size_t pre_paddings[4] = {0, 2, 4, 0};
497   size_t post_paddings[4] = {0, 6, 8, 0};
498   float padding_value = 0.0f;
499   tester
500     .AddInputTensorF32({1, 128, 128, 4}, input_id)
501     .AddDynamicTensorF32({1, 136, 140, 4}, intermediate_id)
502     .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
503     .AddStaticTensorF32({4}, TensorType::kDense, bias_id)
504     .AddOutputTensorF32({1, 134, 140, 4}, output_id)
505     .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
506     .AddDepthwiseConvolution2D(
507         DepthwiseConvolutionParams{
508           Padding{0, 0, 0, 0},
509           Kernel{3, 3},
510           Subsampling{1, 1},
511           Dilation{1, 1},
512           /*depth_multiplier=*/ 1,
513           /*input_channels=*/ 4
514         }, intermediate_id, filter_id, bias_id, output_id);
515 
516   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
517   ASSERT_EQ(tester.NumOperators(), 2);
518 
519   std::vector<float> optimized_output = tester.RunWithFusion<float>();
520 
521   ASSERT_EQ(tester.NumOperators(), 1);
522   ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_invalid);
523   ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_top, 2);
524   ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_left, 4);
525   ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_right, 8);
526   ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_bottom, 6);
527   ASSERT_EQ(tester.Node(1)->outputs[0], output_id);
528   ASSERT_EQ(unoptimized_output, optimized_output);
529 }
530 
TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION,not_fused_due_to_non_zero_padding_in_n_dimension)531 TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, not_fused_due_to_non_zero_padding_in_n_dimension) {
532   auto tester = RuntimeTester(5);
533   uint32_t input_id = 0;
534   uint32_t intermediate_id = 1;
535   uint32_t filter_id = 2;
536   uint32_t bias_id = 3;
537   uint32_t output_id = 4;
538   // Non-zero pre-padding in the N or C dimension.
539   size_t pre_paddings[4] = {1, 2, 4, 0};
540   size_t post_paddings[4] = {0, 6, 8, 0};
541   float padding_value = 0.0f;
542   tester
543     .AddInputTensorF32({1, 128, 128, 4}, input_id)
544     .AddDynamicTensorF32({2, 136, 140, 4}, intermediate_id)
545     .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
546     .AddStaticTensorF32({4}, TensorType::kDense, bias_id)
547     .AddOutputTensorF32({2, 134, 140, 4}, output_id)
548     .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
549     .AddDepthwiseConvolution2D(
550         DepthwiseConvolutionParams{
551           Padding{0, 0, 0, 0},
552           Kernel{3, 3},
553           Subsampling{1, 1},
554           Dilation{1, 1},
555           /*depth_multiplier=*/ 1,
556           /*input_channels=*/ 4
557         }, intermediate_id, filter_id, bias_id, output_id);
558 
559   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
560   ASSERT_EQ(tester.NumOperators(), 2);
561   std::vector<float> optimized_output = tester.RunWithFusion<float>();
562   ASSERT_EQ(tester.NumOperators(), 2);
563   ASSERT_EQ(unoptimized_output, optimized_output);
564 }
565 
TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION,not_fused_due_to_padding_value_not_zero)566 TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, not_fused_due_to_padding_value_not_zero) {
567   auto tester = RuntimeTester(5);
568   uint32_t input_id = 0;
569   uint32_t intermediate_id = 1;
570   uint32_t filter_id = 2;
571   uint32_t bias_id = 3;
572   uint32_t output_id = 4;
573   size_t pre_paddings[4] = {0, 2, 4, 0};
574   size_t post_paddings[4] = {0, 6, 8, 0};
575   float padding_value = 1.0f;
576   tester
577     .AddInputTensorF32({1, 128, 128, 4}, input_id)
578     .AddDynamicTensorF32({1, 136, 140, 4}, intermediate_id)
579     .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
580     .AddStaticTensorF32({4}, TensorType::kDense, bias_id)
581     .AddOutputTensorF32({1, 134, 140, 4}, output_id)
582     .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
583     .AddDepthwiseConvolution2D(
584         DepthwiseConvolutionParams{
585           Padding{0, 0, 0, 0},
586           Kernel{3, 3},
587           Subsampling{1, 1},
588           Dilation{1, 1},
589           /*depth_multiplier=*/ 1,
590           /*input_channels=*/ 4
591         }, intermediate_id, filter_id, bias_id, output_id);
592 
593   std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
594   ASSERT_EQ(tester.NumOperators(), 2);
595   std::vector<float> optimized_output = tester.RunWithFusion<float>();
596   ASSERT_EQ(tester.NumOperators(), 2);
597   ASSERT_EQ(unoptimized_output, optimized_output);
598 }
599 
600 }  // namespace xnnpack
601