1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <xnnpack.h>
7 #include <xnnpack/subgraph.h>
8
9 #include "runtime-tester.h"
10 #include <gtest/gtest.h>
11
12 namespace xnnpack {
13
TEST(ADD_THEN_CLAMP,fusion)14 TEST(ADD_THEN_CLAMP, fusion) {
15 auto tester = RuntimeTester(4);
16 float output_min = -0.5f;
17 float output_max = 0.5f;
18 uint32_t input1_id = 0;
19 uint32_t input2_id = 1;
20 uint32_t intermediate_id = 2;
21 uint32_t output_id = 3;
22 tester
23 .AddInputTensorF32({1, 2, 2, 3}, input1_id)
24 .AddInputTensorF32({1, 2, 2, 3}, input2_id)
25 .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
26 .AddOutputTensorF32({1, 2, 2, 3}, output_id)
27 .AddAddition(input1_id, input2_id, intermediate_id)
28 .AddClamp(output_min, output_max, intermediate_id, output_id);
29
30 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
31 ASSERT_EQ(tester.NumOperators(), 2);
32
33 std::vector<float> optimized_output = tester.RunWithFusion<float>();
34
35 ASSERT_EQ(tester.NumOperators(), 1);
36 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
37 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
38 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
39 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
40
41 ASSERT_EQ(unoptimized_output, optimized_output);
42 }
43
TEST(AVERAGE_POOLING_2D_THEN_CLAMP,fusion)44 TEST(AVERAGE_POOLING_2D_THEN_CLAMP, fusion) {
45 auto tester = RuntimeTester(3);
46 float output_min = -0.5f;
47 float output_max = 0.5f;
48 uint32_t input_id = 0;
49 uint32_t intermediate_id = 1;
50 uint32_t output_id = 2;
51 tester
52 .AddInputTensorF32({1, 10, 10, 3}, input_id)
53 .AddDynamicTensorF32({1, 9, 9, 3}, intermediate_id)
54 .AddOutputTensorF32({1, 9, 9, 3}, output_id)
55 .AddAveragePooling2D(0, 0, 0, 0, 2, 2, 1, 1, input_id, intermediate_id)
56 .AddClamp(output_min, output_max, intermediate_id, output_id);
57
58 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
59 ASSERT_EQ(tester.NumOperators(), 2);
60
61 std::vector<float> optimized_output = tester.RunWithFusion<float>();
62
63 ASSERT_EQ(tester.NumOperators(), 1);
64 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
65 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
66 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
67 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
68
69 ASSERT_EQ(unoptimized_output, optimized_output);
70 }
71
TEST(CLAMP_THEN_CLAMP,fusion)72 TEST(CLAMP_THEN_CLAMP, fusion) {
73 auto tester = RuntimeTester(3);
74 float output_min = -0.5f;
75 float output_max = 0.5f;
76 uint32_t input_id = 0;
77 uint32_t intermediate_id = 1;
78 uint32_t output_id = 2;
79 tester
80 .AddInputTensorF32({1, 10, 10, 3}, input_id)
81 .AddDynamicTensorF32({1, 10, 10, 3}, intermediate_id)
82 .AddOutputTensorF32({1, 10, 10, 3}, output_id)
83 .AddClamp(
84 -std::numeric_limits<float>::infinity(),
85 std::numeric_limits<float>::infinity(),
86 input_id,
87 intermediate_id)
88 .AddClamp(output_min, output_max, intermediate_id, output_id);
89
90 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
91 ASSERT_EQ(tester.NumOperators(), 2);
92
93 std::vector<float> optimized_output = tester.RunWithFusion<float>();
94
95 ASSERT_EQ(tester.NumOperators(), 1);
96 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
97 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
98 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
99 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
100
101 ASSERT_EQ(unoptimized_output, optimized_output);
102 }
103
TEST(CONVOLUTION_2D_THEN_CLAMP,fusion)104 TEST(CONVOLUTION_2D_THEN_CLAMP, fusion) {
105 auto tester = RuntimeTester(5);
106 float output_min = -0.5f;
107 float output_max = 0.5f;
108 uint32_t input_id = 0;
109 uint32_t filter_id = 1;
110 uint32_t bias_id = 2;
111 uint32_t intermediate_id = 3;
112 uint32_t output_id = 4;
113 tester
114 .AddInputTensorF32({1, 256, 256, 3}, input_id)
115 .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
116 .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
117 .AddDynamicTensorF32({1, 128, 128, 32}, intermediate_id)
118 .AddOutputTensorF32({1, 128, 128, 32}, output_id)
119 .AddConvolution2D(
120 ConvolutionParams{
121 Padding{1, 1, 1, 1},
122 Kernel{3, 3},
123 Subsampling{2, 2},
124 Dilation{1, 1},
125 /*groups=*/ 1,
126 /*group_input_channels=*/ 3,
127 /*group_output_channels=*/ 32,
128 }, input_id, filter_id, bias_id, intermediate_id)
129 .AddClamp(output_min, output_max, intermediate_id, output_id);
130
131 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
132 ASSERT_EQ(tester.NumOperators(), 2);
133
134 std::vector<float> optimized_output = tester.RunWithFusion<float>();
135
136 ASSERT_EQ(tester.NumOperators(), 1);
137 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
138 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
139 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
140 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
141
142 ASSERT_EQ(unoptimized_output, optimized_output);
143 }
144
TEST(DIVIDE_THEN_CLAMP,fusion)145 TEST(DIVIDE_THEN_CLAMP, fusion) {
146 auto tester = RuntimeTester(4);
147 float output_min = -0.5f;
148 float output_max = 0.5f;
149 uint32_t input1_id = 0;
150 uint32_t input2_id = 1;
151 uint32_t intermediate_id = 2;
152 uint32_t output_id = 3;
153 tester
154 .AddInputTensorF32({1, 2, 2, 3}, input1_id)
155 .AddInputTensorF32({1, 2, 2, 3}, input2_id)
156 .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
157 .AddOutputTensorF32({1, 2, 2, 3}, output_id)
158 .AddDivide(input1_id, input2_id, intermediate_id)
159 .AddClamp(output_min, output_max, intermediate_id, output_id);
160
161 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
162 ASSERT_EQ(tester.NumOperators(), 2);
163
164 std::vector<float> optimized_output = tester.RunWithFusion<float>();
165
166 ASSERT_EQ(tester.NumOperators(), 1);
167 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
168 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
169 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
170 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
171
172 ASSERT_EQ(unoptimized_output, optimized_output);
173 }
174
TEST(DECONVOLUTION_2D_THEN_CLAMP,fusion)175 TEST(DECONVOLUTION_2D_THEN_CLAMP, fusion) {
176 auto tester = RuntimeTester(5);
177 float output_min = -0.5f;
178 float output_max = 0.5f;
179 uint32_t input_id = 0;
180 uint32_t filter_id = 1;
181 uint32_t bias_id = 2;
182 uint32_t intermediate_id = 3;
183 uint32_t output_id = 4;
184 tester
185 .AddInputTensorF32({1, 128, 128, 3}, input_id)
186 .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
187 .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
188 .AddDynamicTensorF32({1, 255, 255, 32}, intermediate_id)
189 .AddOutputTensorF32({1, 255, 255, 32}, output_id)
190 .AddDeconvolution2D(
191 DeconvolutionParams{
192 Padding{1, 1, 1, 1},
193 Adjustment{0, 0},
194 Kernel{3, 3},
195 Upsampling{2, 2},
196 Dilation{1, 1},
197 /*groups=*/ 1,
198 /*group_input_channels=*/ 3,
199 /*groups_output_channels*/ 32
200 }, input_id, filter_id, bias_id, intermediate_id)
201 .AddClamp(output_min, output_max, intermediate_id, output_id);
202
203 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
204 ASSERT_EQ(tester.NumOperators(), 2);
205
206 std::vector<float> optimized_output = tester.RunWithFusion<float>();
207
208 ASSERT_EQ(tester.NumOperators(), 1);
209 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
210 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
211 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
212 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
213
214 ASSERT_EQ(unoptimized_output, optimized_output);
215 }
216
TEST(DEPTHWISE_CONVOLUTION_2D_THEN_CLAMP,fusion)217 TEST(DEPTHWISE_CONVOLUTION_2D_THEN_CLAMP, fusion) {
218 auto tester = RuntimeTester(5);
219 float output_min = -0.5f;
220 float output_max = 0.5f;
221 uint32_t input_id = 0;
222 uint32_t filter_id = 1;
223 uint32_t bias_id = 2;
224 uint32_t intermediate_id = 3;
225 uint32_t output_id = 4;
226 tester
227 .AddInputTensorF32({1, 128, 128, 4}, input_id)
228 .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
229 .AddStaticTensorF32({4}, TensorType::kDense, bias_id)
230 .AddDynamicTensorF32({1, 128, 128, 4}, intermediate_id)
231 .AddOutputTensorF32({1, 128, 128, 4}, output_id)
232 .AddDepthwiseConvolution2D(
233 DepthwiseConvolutionParams{
234 Padding{1, 1, 1, 1},
235 Kernel{3, 3},
236 Subsampling{1, 1},
237 Dilation{1, 1},
238 /*depth_multiplier=*/ 1,
239 /*input_channels=*/ 4
240 }, input_id, filter_id, bias_id, intermediate_id)
241 .AddClamp(output_min, output_max, intermediate_id, output_id);
242
243 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
244 ASSERT_EQ(tester.NumOperators(), 2);
245
246 std::vector<float> optimized_output = tester.RunWithFusion<float>();
247
248 ASSERT_EQ(tester.NumOperators(), 1);
249 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
250 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
251 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
252 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
253
254 ASSERT_EQ(unoptimized_output, optimized_output);
255 }
256
TEST(FULLY_CONNECTED_2D_THEN_CLAMP,fusion)257 TEST(FULLY_CONNECTED_2D_THEN_CLAMP, fusion) {
258 auto tester = RuntimeTester(5);
259 float output_min = -0.5f;
260 float output_max = 0.5f;
261 uint32_t input_id = 0;
262 uint32_t filter_id = 1;
263 uint32_t bias_id = 2;
264 uint32_t intermediate_id = 3;
265 uint32_t output_id = 4;
266 tester
267 .AddInputTensorF32({5, 3}, input_id)
268 .AddStaticTensorF32({7, 3}, TensorType::kDense, filter_id)
269 .AddStaticTensorF32({7}, TensorType::kDense, bias_id)
270 .AddDynamicTensorF32({5, 7}, intermediate_id)
271 .AddOutputTensorF32({5, 7}, output_id)
272 .AddFullyConnected(input_id, filter_id, bias_id, intermediate_id)
273 .AddClamp(output_min, output_max, intermediate_id, output_id);
274
275 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
276 ASSERT_EQ(tester.NumOperators(), 2);
277
278 std::vector<float> optimized_output = tester.RunWithFusion<float>();
279
280 ASSERT_EQ(tester.NumOperators(), 1);
281 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
282 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
283 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
284 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
285
286 ASSERT_EQ(unoptimized_output, optimized_output);
287 }
288
TEST(MULTIPLY_THEN_CLAMP,fusion)289 TEST(MULTIPLY_THEN_CLAMP, fusion) {
290 auto tester = RuntimeTester(4);
291 float output_min = -0.5f;
292 float output_max = 0.5f;
293 uint32_t input1_id = 0;
294 uint32_t input2_id = 1;
295 uint32_t intermediate_id = 2;
296 uint32_t output_id = 3;
297 tester
298 .AddInputTensorF32({1, 2, 2, 3}, input1_id)
299 .AddInputTensorF32({1, 2, 2, 3}, input2_id)
300 .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
301 .AddOutputTensorF32({1, 2, 2, 3}, output_id)
302 .AddMultiply(input1_id, input2_id, intermediate_id)
303 .AddClamp(output_min, output_max, intermediate_id, output_id);
304
305 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
306 ASSERT_EQ(tester.NumOperators(), 2);
307
308 std::vector<float> optimized_output = tester.RunWithFusion<float>();
309
310 ASSERT_EQ(tester.NumOperators(), 1);
311 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
312 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
313 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
314 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
315
316 ASSERT_EQ(unoptimized_output, optimized_output);
317 }
318
TEST(MAX_POOLING_THEN_CLAMP,fusion)319 TEST(MAX_POOLING_THEN_CLAMP, fusion) {
320 auto tester = RuntimeTester(3);
321 float output_min = -0.5f;
322 float output_max = 0.5f;
323 uint32_t input_id = 0;
324 uint32_t intermediate_id = 1;
325 uint32_t output_id = 2;
326 tester
327 .AddInputTensorF32({1, 10, 10, 3}, input_id)
328 .AddDynamicTensorF32({1, 9, 9, 3}, intermediate_id)
329 .AddOutputTensorF32({1, 9, 9, 3}, output_id)
330 .AddMaxPooling2D(0, 0, 0, 0, 2, 2, 1, 1, 1, 1, input_id, intermediate_id)
331 .AddClamp(output_min, output_max, intermediate_id, output_id);
332
333 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
334 ASSERT_EQ(tester.NumOperators(), 2);
335
336 std::vector<float> optimized_output = tester.RunWithFusion<float>();
337
338 ASSERT_EQ(tester.NumOperators(), 1);
339 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
340 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
341 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
342 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
343
344 ASSERT_EQ(unoptimized_output, optimized_output);
345 }
346
TEST(SUBTRACT_THEN_CLAMP,fusion)347 TEST(SUBTRACT_THEN_CLAMP, fusion) {
348 auto tester = RuntimeTester(4);
349 float output_min = -0.5f;
350 float output_max = 0.5f;
351 uint32_t input1_id = 0;
352 uint32_t input2_id = 1;
353 uint32_t intermediate_id = 2;
354 uint32_t output_id = 3;
355 tester
356 .AddInputTensorF32({1, 2, 2, 3}, input1_id)
357 .AddInputTensorF32({1, 2, 2, 3}, input2_id)
358 .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
359 .AddOutputTensorF32({1, 2, 2, 3}, output_id)
360 .AddSubtract(input1_id, input2_id, intermediate_id)
361 .AddClamp(output_min, output_max, intermediate_id, output_id);
362
363 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
364 ASSERT_EQ(tester.NumOperators(), 2);
365
366 std::vector<float> optimized_output = tester.RunWithFusion<float>();
367
368 ASSERT_EQ(tester.NumOperators(), 1);
369 ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
370 ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
371 ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
372 ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
373
374 ASSERT_EQ(unoptimized_output, optimized_output);
375 }
376
TEST(CONSTANT_PAD_THEN_CONVOLUTION,fusion)377 TEST(CONSTANT_PAD_THEN_CONVOLUTION, fusion) {
378 auto tester = RuntimeTester(5);
379 uint32_t input_id = 0;
380 uint32_t intermediate_id = 1;
381 uint32_t filter_id = 2;
382 uint32_t bias_id = 3;
383 uint32_t output_id = 4;
384 size_t pre_paddings[4] = {0, 2, 4, 0};
385 size_t post_paddings[4] = {0, 6, 8, 0};
386 float padding_value = 0.0f;
387
388 tester
389 .AddInputTensorF32({1, 254, 254, 3}, input_id)
390 .AddDynamicTensorF32({1, 262, 266, 3}, intermediate_id)
391 .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
392 .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
393 .AddOutputTensorF32({1, 131, 133, 32}, output_id)
394 .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
395 .AddConvolution2D(
396 ConvolutionParams{
397 Padding{0, 0, 0, 0},
398 Kernel{3, 3},
399 Subsampling{2, 2},
400 Dilation{1, 1},
401 /*groups=*/ 1,
402 /*group_input_channels=*/ 3,
403 /*group_output_channels=*/ 32,
404 }, intermediate_id, filter_id, bias_id, output_id);
405
406 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
407 ASSERT_EQ(tester.NumOperators(), 2);
408
409 std::vector<float> optimized_output = tester.RunWithFusion<float>();
410
411 ASSERT_EQ(tester.NumOperators(), 1);
412 ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_invalid);
413 ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_top, 2);
414 ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_left, 4);
415 ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_right, 8);
416 ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_bottom, 6);
417 ASSERT_EQ(tester.Node(1)->outputs[0], output_id);
418
419 ASSERT_EQ(unoptimized_output, optimized_output);
420 }
421
TEST(CONSTANT_PAD_THEN_CONVOLUTION,not_fused_due_to_non_zero_padding_in_n_dimension)422 TEST(CONSTANT_PAD_THEN_CONVOLUTION, not_fused_due_to_non_zero_padding_in_n_dimension) {
423 auto tester = RuntimeTester(5);
424 uint32_t input_id = 0;
425 uint32_t intermediate_id = 1;
426 uint32_t filter_id = 2;
427 uint32_t bias_id = 3;
428 uint32_t output_id = 4;
429 // Non-zero pre-padding in the N or C dimension.
430 size_t pre_paddings[4] = {1, 2, 4, 0};
431 size_t post_paddings[4] = {0, 6, 8, 0};
432 float padding_value = 0.0f;
433
434 tester
435 .AddInputTensorF32({1, 254, 254, 3}, input_id)
436 .AddDynamicTensorF32({2, 262, 266, 3}, intermediate_id)
437 .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
438 .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
439 .AddOutputTensorF32({2, 131, 133, 32}, output_id)
440 .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
441 .AddConvolution2D(
442 ConvolutionParams{
443 Padding{0, 0, 0, 0},
444 Kernel{3, 3},
445 Subsampling{2, 2},
446 Dilation{1, 1},
447 /*groups=*/ 1,
448 /*group_input_channels=*/ 3,
449 /*group_output_channels=*/ 32,
450 }, intermediate_id, filter_id, bias_id, output_id)
451 .Optimize();
452 std::vector<float> optimized_output = tester.RunWithFusion<float>();
453 ASSERT_EQ(tester.NumOperators(), 2);
454 }
455
TEST(CONSTANT_PAD_THEN_CONVOLUTION,not_fused_due_to_padding_value_not_zero)456 TEST(CONSTANT_PAD_THEN_CONVOLUTION, not_fused_due_to_padding_value_not_zero) {
457 auto tester = RuntimeTester(5);
458 uint32_t input_id = 0;
459 uint32_t intermediate_id = 1;
460 uint32_t filter_id = 2;
461 uint32_t bias_id = 3;
462 uint32_t output_id = 4;
463 size_t pre_paddings[4] = {0, 2, 4, 0};
464 size_t post_paddings[4] = {0, 6, 8, 0};
465 float padding_value = 1.0f;
466
467 tester
468 .AddInputTensorF32({1, 254, 254, 3}, input_id)
469 .AddDynamicTensorF32({2, 262, 266, 3}, intermediate_id)
470 .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
471 .AddStaticTensorF32({32}, TensorType::kDense, bias_id)
472 .AddOutputTensorF32({2, 131, 133, 32}, output_id)
473 .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
474 .AddConvolution2D(
475 ConvolutionParams{
476 Padding{0, 0, 0, 0},
477 Kernel{3, 3},
478 Subsampling{2, 2},
479 Dilation{1, 1},
480 /*groups=*/ 1,
481 /*group_input_channels=*/ 3,
482 /*group_output_channels=*/ 32,
483 }, intermediate_id, filter_id, bias_id, output_id)
484 .Optimize();
485 std::vector<float> optimized_output = tester.RunWithFusion<float>();
486 ASSERT_EQ(tester.NumOperators(), 2);
487 }
488
TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION,fusion)489 TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, fusion) {
490 auto tester = RuntimeTester(5);
491 uint32_t input_id = 0;
492 uint32_t intermediate_id = 1;
493 uint32_t filter_id = 2;
494 uint32_t bias_id = 3;
495 uint32_t output_id = 4;
496 size_t pre_paddings[4] = {0, 2, 4, 0};
497 size_t post_paddings[4] = {0, 6, 8, 0};
498 float padding_value = 0.0f;
499 tester
500 .AddInputTensorF32({1, 128, 128, 4}, input_id)
501 .AddDynamicTensorF32({1, 136, 140, 4}, intermediate_id)
502 .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
503 .AddStaticTensorF32({4}, TensorType::kDense, bias_id)
504 .AddOutputTensorF32({1, 134, 140, 4}, output_id)
505 .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
506 .AddDepthwiseConvolution2D(
507 DepthwiseConvolutionParams{
508 Padding{0, 0, 0, 0},
509 Kernel{3, 3},
510 Subsampling{1, 1},
511 Dilation{1, 1},
512 /*depth_multiplier=*/ 1,
513 /*input_channels=*/ 4
514 }, intermediate_id, filter_id, bias_id, output_id);
515
516 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
517 ASSERT_EQ(tester.NumOperators(), 2);
518
519 std::vector<float> optimized_output = tester.RunWithFusion<float>();
520
521 ASSERT_EQ(tester.NumOperators(), 1);
522 ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_invalid);
523 ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_top, 2);
524 ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_left, 4);
525 ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_right, 8);
526 ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_bottom, 6);
527 ASSERT_EQ(tester.Node(1)->outputs[0], output_id);
528 ASSERT_EQ(unoptimized_output, optimized_output);
529 }
530
TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION,not_fused_due_to_non_zero_padding_in_n_dimension)531 TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, not_fused_due_to_non_zero_padding_in_n_dimension) {
532 auto tester = RuntimeTester(5);
533 uint32_t input_id = 0;
534 uint32_t intermediate_id = 1;
535 uint32_t filter_id = 2;
536 uint32_t bias_id = 3;
537 uint32_t output_id = 4;
538 // Non-zero pre-padding in the N or C dimension.
539 size_t pre_paddings[4] = {1, 2, 4, 0};
540 size_t post_paddings[4] = {0, 6, 8, 0};
541 float padding_value = 0.0f;
542 tester
543 .AddInputTensorF32({1, 128, 128, 4}, input_id)
544 .AddDynamicTensorF32({2, 136, 140, 4}, intermediate_id)
545 .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
546 .AddStaticTensorF32({4}, TensorType::kDense, bias_id)
547 .AddOutputTensorF32({2, 134, 140, 4}, output_id)
548 .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
549 .AddDepthwiseConvolution2D(
550 DepthwiseConvolutionParams{
551 Padding{0, 0, 0, 0},
552 Kernel{3, 3},
553 Subsampling{1, 1},
554 Dilation{1, 1},
555 /*depth_multiplier=*/ 1,
556 /*input_channels=*/ 4
557 }, intermediate_id, filter_id, bias_id, output_id);
558
559 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
560 ASSERT_EQ(tester.NumOperators(), 2);
561 std::vector<float> optimized_output = tester.RunWithFusion<float>();
562 ASSERT_EQ(tester.NumOperators(), 2);
563 ASSERT_EQ(unoptimized_output, optimized_output);
564 }
565
TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION,not_fused_due_to_padding_value_not_zero)566 TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, not_fused_due_to_padding_value_not_zero) {
567 auto tester = RuntimeTester(5);
568 uint32_t input_id = 0;
569 uint32_t intermediate_id = 1;
570 uint32_t filter_id = 2;
571 uint32_t bias_id = 3;
572 uint32_t output_id = 4;
573 size_t pre_paddings[4] = {0, 2, 4, 0};
574 size_t post_paddings[4] = {0, 6, 8, 0};
575 float padding_value = 1.0f;
576 tester
577 .AddInputTensorF32({1, 128, 128, 4}, input_id)
578 .AddDynamicTensorF32({1, 136, 140, 4}, intermediate_id)
579 .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
580 .AddStaticTensorF32({4}, TensorType::kDense, bias_id)
581 .AddOutputTensorF32({1, 134, 140, 4}, output_id)
582 .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
583 .AddDepthwiseConvolution2D(
584 DepthwiseConvolutionParams{
585 Padding{0, 0, 0, 0},
586 Kernel{3, 3},
587 Subsampling{1, 1},
588 Dilation{1, 1},
589 /*depth_multiplier=*/ 1,
590 /*input_channels=*/ 4
591 }, intermediate_id, filter_id, bias_id, output_id);
592
593 std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
594 ASSERT_EQ(tester.NumOperators(), 2);
595 std::vector<float> optimized_output = tester.RunWithFusion<float>();
596 ASSERT_EQ(tester.NumOperators(), 2);
597 ASSERT_EQ(unoptimized_output, optimized_output);
598 }
599
600 } // namespace xnnpack
601