• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 
15 #include <xnnpack.h>
16 #include <xnnpack/common.h>
17 
18 struct xnn_f16_default_params {
19   // Empty; serves to differentiate pointer types for micro-kernels without fused activation.
20   char _; // Dummy member variable to comply with the C standard
21 };
22 
23 struct xnn_f16_relu_params {
24   // Empty; serves to differentiate pointer types for micro-kernels with different fused activations.
25   char _; // Dummy member variable to comply with the C standard
26 };
27 
28 // scaleminmax is used for gemm/igemm ukernels.
29 struct xnn_f16_scaleminmax_params {
30   uint16_t scale;
31   uint16_t min;
32   uint16_t max;
33 };
34 
35 struct xnn_f16_minmax_params {
36   uint16_t min;
37   uint16_t max;
38 };
39 
40 union xnn_f32_default_params {
41   // Empty; serves to differentiate pointer types for micro-kernels without fused activation.
42   char _; // Dummy member variable to comply with the C standard
43 };
44 
45 union xnn_f32_relu_params {
46   // Empty; serves to differentiate pointer types for micro-kernels with different fused activations.
47   char _; // Dummy member variable to comply with the C standard
48 };
49 
50 union xnn_f32_minmax_params {
51   struct {
52     float min;
53     float max;
54   } scalar;
55 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
56   struct {
57     XNN_ALIGN(16) float min[4];
58     XNN_ALIGN(16) float max[4];
59   } sse;
60 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
61 };
62 
63 union xnn_f32_abs_params {
64   char _; // Dummy member variable to comply with the C standard
65 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
66   struct {
67     XNN_ALIGN(16) float nonsign_mask[4];
68   } sse;
69 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
70 #if XNN_ARCH_WASMSIMD
71   struct {
72     float nonsign_mask;
73   } wasmsimd;
74 #endif  // XNN_ARCH_WASMSIMD
75 };
76 
77 union xnn_f32_neg_params {
78   char _; // Dummy member variable to comply with the C standard
79 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
80   struct {
81     XNN_ALIGN(16) float sign_mask[4];
82   } sse;
83 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
84 #if XNN_ARCH_WASMSIMD
85   struct {
86     float sign_mask;
87   } wasmsimd;
88 #endif  // XNN_ARCH_WASMSIMD
89 };
90 
91 union xnn_f32_rnd_params {
92   char _; // Dummy member variable to comply with the C standard
93 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
94   struct {
95     XNN_ALIGN(16) float sign_mask[4];
96     XNN_ALIGN(16) float one[4];
97   } sse2;
98 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
99 };
100 
101 union xnn_f32_elu_params {
102   struct {
103     float prescale;
104     float alpha;
105     float beta;
106   } scalar;
107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
108   struct {
109     XNN_ALIGN(16) float prescale[4];
110     XNN_ALIGN(16) float alpha[4];
111     XNN_ALIGN(16) float beta[4];
112   } sse;
113 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
114 };
115 
116 union xnn_f32_lrelu_params {
117   struct {
118     float slope;
119   } scalar;
120 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
121   struct {
122     XNN_ALIGN(16) float slope[4];
123   } sse;
124 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
125 };
126 
127 union xnn_f32_sqrt_params {
128   char _; // Dummy member variable to comply with the C standard
129 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
130   struct {
131     float half;
132   } fma;
133 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
134 };
135 
136 union xnn_f32_chw_params {
137   struct {
138     XNN_ALIGN(16) int32_t mask_even[4]; // used by stride 2 kernels
139     XNN_ALIGN(16) int32_t mask_odd[4];  // used by stride 2 kernels
140     XNN_ALIGN(16) int32_t mask[4]; // used by stride 1 kernels
141     float min;
142     float max;
143   } scalar;
144 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
145   struct {
146     float min;
147     float max;
148     XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
149     XNN_ALIGN(16) uint32_t mask_odd[4];  // used by stride 2 kernels
150     XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels
151   } neon;
152 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
153 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
154   struct {
155     XNN_ALIGN(16) float min[4];
156     XNN_ALIGN(16) float max[4];
157     XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
158     XNN_ALIGN(16) uint32_t mask_odd[4];  // used by stride 2 kernels
159     XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels
160   } sse;
161 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
162 };
163 
164 union xnn_u8_minmax_params {
165   struct {
166     int32_t min;
167     int32_t max;
168   } scalar;
169 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
170   struct {
171     uint8_t min;
172     uint8_t max;
173   } neon;
174 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
175 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
176   struct {
177     XNN_ALIGN(16) uint8_t min[16];
178     XNN_ALIGN(16) uint8_t max[16];
179   } sse2;
180 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
181 };
182 
183 union xnn_f32_scaleminmax_params {
184   struct {
185     float scale;
186     float min;
187     float max;
188   } scalar;
189 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
190   struct {
191     XNN_ALIGN(16) float scale[4];
192     XNN_ALIGN(16) float min[4];
193     XNN_ALIGN(16) float max[4];
194   } sse2;
195 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
196 };
197 
198 union xnn_f32_gavgpool_params {
199   struct {
200     XNN_ALIGN(16) int32_t mask[4];
201     float multiplier;
202     float output_min;
203     float output_max;
204   } scalar;
205 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
206   struct {
207     XNN_ALIGN(16) float multiplier[4];
208     XNN_ALIGN(16) float output_min[4];
209     XNN_ALIGN(16) float output_max[4];
210     XNN_ALIGN(16) uint32_t mask[4];
211   } sse;
212 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
213 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
214   struct {
215     XNN_ALIGN(16) float multiplier;
216     XNN_ALIGN(16) float output_min;
217     XNN_ALIGN(16) float output_max;
218     XNN_ALIGN(16) uint32_t mask[4];
219   } neon;
220 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
221 };
222 
223 struct xnn_f16_hswish_params {
224   uint16_t sixth;
225   uint16_t three;
226   uint16_t six;
227 };
228 
229 union xnn_f32_hswish_params {
230   struct {
231     float sixth;
232     float three;
233     float six;
234   } scalar;
235 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
236   struct {
237     XNN_ALIGN(16) float sixth[4];
238     XNN_ALIGN(16) float half[4];
239     XNN_ALIGN(16) float one[4];
240   } sse;
241 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
242 };
243 
244 union xnn_qu8_gemm_params {
245   struct {
246     int32_t kernel_zero_point;
247     int32_t multiplier;
248     int32_t remainder_mask;
249     int32_t remainder_threshold;
250     uint32_t shift;
251     int32_t output_min_less_zero_point;
252     int32_t output_max_less_zero_point;
253     int32_t output_zero_point;
254   } scalar;
255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
256   struct {
257     int32_t kernel_zero_point;
258     int32_t multiplier;
259     int32_t right_shift;
260     int16_t output_zero_point;
261     uint8_t output_min;
262     uint8_t output_max;
263   } neon;
264 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
265 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
266   struct {
267     XNN_ALIGN(16) int16_t kernel_zero_point[8];
268     XNN_ALIGN(16) uint32_t multiplier[4];
269     XNN_ALIGN(16) uint64_t rounding[2];
270     XNN_ALIGN(16) int32_t remainder_mask[4];
271     XNN_ALIGN(16) int32_t remainder_threshold[4];
272     XNN_ALIGN(16) uint64_t shift[2];
273     XNN_ALIGN(16) int16_t output_zero_point[8];
274     XNN_ALIGN(16) uint8_t output_min[16];
275     XNN_ALIGN(16) uint8_t output_max[16];
276   } sse2;
277 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
278 };
279 
280 union xnn_qs8_gemm_params {
281   struct {
282     int32_t multiplier;
283     int32_t remainder_mask;
284     int32_t remainder_threshold;
285     uint32_t shift;
286     int32_t output_min_less_zero_point;
287     int32_t output_max_less_zero_point;
288     int32_t output_zero_point;
289   } scalar;
290 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
291   struct {
292     int32_t multiplier;
293     int32_t right_shift;
294     int16_t output_zero_point;
295     int8_t output_min;
296     int8_t output_max;
297   } neon;
298 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
300   struct {
301     XNN_ALIGN(16) uint32_t multiplier[4];
302     XNN_ALIGN(16) uint64_t rounding[2];
303     XNN_ALIGN(16) int32_t remainder_mask[4];
304     XNN_ALIGN(16) int32_t remainder_threshold[4];
305     XNN_ALIGN(16) uint64_t shift[2];
306     XNN_ALIGN(16) int16_t output_zero_point[8];
307     XNN_ALIGN(16) int16_t output_min[8];
308     XNN_ALIGN(16) int16_t output_max[8];
309   } sse2;
310 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
311 #if XNN_ARCH_WASMSIMD
312   struct {
313     XNN_ALIGN(16) int64_t multiplier[2];
314     XNN_ALIGN(16) int64_t rounding[2];
315     XNN_ALIGN(16) int32_t remainder_mask[4];
316     XNN_ALIGN(16) int32_t remainder_threshold[4];
317     int32_t shift;
318     XNN_ALIGN(16) int16_t output_zero_point[8];
319     XNN_ALIGN(16) int8_t output_min[16];
320     XNN_ALIGN(16) int8_t output_max[16];
321   } wasmsimd;
322 #endif  // XNN_ARCH_WASMSIMD
323 };
324 
325 union xnn_qs8_gemm_xw_params {
326   struct {
327     int32_t multiplier;
328     int32_t remainder_mask;
329     int32_t remainder_threshold;
330     uint32_t shift;
331     int32_t output_min_less_zero_point;
332     int32_t output_max_less_zero_point;
333     int32_t output_zero_point;
334   } scalar;
335 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
336   struct {
337     int32_t multiplier;
338     int32_t right_shift;
339     int16_t output_zero_point;
340     int8_t output_min;
341     int8_t output_max;
342   } neon;
343 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
344 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
345   struct {
346     XNN_ALIGN(16) uint32_t multiplier[4];
347     XNN_ALIGN(16) uint64_t rounding[2];
348     XNN_ALIGN(16) int32_t remainder_mask[4];
349     XNN_ALIGN(16) int32_t remainder_threshold[4];
350     XNN_ALIGN(16) uint64_t shift[2];
351     XNN_ALIGN(16) int16_t output_zero_point[8];
352     XNN_ALIGN(16) int16_t output_min[8];
353     XNN_ALIGN(16) int16_t output_max[8];
354   } sse2;
355 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
356 #if XNN_ARCH_WASMSIMD
357   struct {
358     XNN_ALIGN(16) int64_t multiplier[2];
359     XNN_ALIGN(16) int64_t rounding[2];
360     XNN_ALIGN(16) int32_t remainder_mask[4];
361     XNN_ALIGN(16) int32_t remainder_threshold[4];
362     int32_t shift;
363     XNN_ALIGN(16) int16_t output_zero_point[8];
364     XNN_ALIGN(16) int8_t output_min[16];
365     XNN_ALIGN(16) int8_t output_max[16];
366   } wasmsimd;
367 #endif  // XNN_ARCH_WASMSIMD
368 };
369 
370 union xnn_qu8_add_params {
371   struct {
372     int32_t zero_point_product;
373     uint32_t a_multiplier;
374     uint32_t b_multiplier;
375     uint32_t shift;
376     int32_t remainder_mask;
377     int32_t remainder_threshold;
378     int32_t y_zero_point;
379     int32_t y_min;
380     int32_t y_max;
381   } scalar;
382 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
383   struct {
384     uint8_t a_zero_point;
385     uint8_t b_zero_point;
386     int16_t y_zero_point;
387     int32_t a_multiplier;
388     int32_t b_multiplier;
389     int32_t right_shift;
390     uint8_t y_min;
391     uint8_t y_max;
392   } neon;
393 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
394 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
395   struct {
396     XNN_ALIGN(16) int32_t zero_point_product[4];
397     XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
398     XNN_ALIGN(16) uint16_t a_multiplier_hi[8];
399     XNN_ALIGN(16) uint16_t b_multiplier_lo[8];
400     XNN_ALIGN(16) uint16_t b_multiplier_hi[8];
401     XNN_ALIGN(16) int32_t remainder_mask[4];
402     XNN_ALIGN(16) int32_t remainder_threshold[4];
403     XNN_ALIGN(16) int16_t y_zero_point[8];
404     XNN_ALIGN(16) uint8_t y_min[16];
405     XNN_ALIGN(16) uint8_t y_max[16];
406     uint32_t shift;
407     uint32_t a_multiplier;
408     uint32_t b_multiplier;
409   } sse2;
410 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
411 };
412 
413 union xnn_qs8_add_params {
414   struct {
415     int32_t zero_point_product;
416     int32_t x_multiplier;
417     int32_t y_multiplier;
418     uint32_t shift;
419     int32_t remainder_mask;
420     int32_t remainder_threshold;
421     int32_t output_zero_point;
422     int32_t output_min;
423     int32_t output_max;
424   } scalar;
425 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
426   struct {
427     int8_t x_zero_point;
428     int8_t y_zero_point;
429     int16_t output_zero_point;
430     int32_t x_multiplier;
431     int32_t y_multiplier;
432     int32_t right_shift;
433     int8_t output_min;
434     int8_t output_max;
435   } neon;
436 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
437 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
438   struct {
439     XNN_ALIGN(16) int32_t zero_point_product[4];
440     XNN_ALIGN(16) uint16_t x_multiplier_lo[8];
441     XNN_ALIGN(16) uint16_t x_multiplier_hi[8];
442     XNN_ALIGN(16) uint16_t y_multiplier_lo[8];
443     XNN_ALIGN(16) uint16_t y_multiplier_hi[8];
444     XNN_ALIGN(16) int32_t x_multiplier[4];
445     XNN_ALIGN(16) int32_t y_multiplier[4];
446     XNN_ALIGN(16) int32_t remainder_mask[4];
447     XNN_ALIGN(16) int32_t remainder_threshold[4];
448     uint32_t shift;
449     XNN_ALIGN(16) int16_t output_zero_point[8];
450     XNN_ALIGN(16) int16_t output_min[8];
451     XNN_ALIGN(16) int16_t output_max[8];
452   } sse2;
453 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
454 #if XNN_ARCH_WASMSIMD
455   struct {
456     XNN_ALIGN(16) int32_t zero_point_product[4];
457     XNN_ALIGN(16) int32_t x_multiplier[4];
458     XNN_ALIGN(16) int32_t y_multiplier[4];
459     XNN_ALIGN(16) int32_t remainder_mask[4];
460     XNN_ALIGN(16) int32_t remainder_threshold[4];
461     int32_t shift;
462     XNN_ALIGN(16) int16_t output_zero_point[8];
463     XNN_ALIGN(16) int8_t output_min[16];
464     XNN_ALIGN(16) int8_t output_max[16];
465   } wasmsimd;
466 #endif  // XNN_ARCH_WASMSIMD
467 };
468 
469 union xnn_qu8_avgpool_params {
470   struct {
471     int32_t bias;
472     int32_t multiplier;
473     int64_t rounding;
474     uint32_t right_shift;
475     int32_t output_min_less_zero_point;
476     int32_t output_max_less_zero_point;
477     int32_t output_zero_point;
478   } scalar;
479 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
480   struct {
481     int32_t bias;
482     int32_t multiplier;
483     int64_t left_shift;
484     int16_t output_zero_point;
485     uint8_t output_min;
486     uint8_t output_max;
487   } neon;
488 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
489 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
490   struct {
491     XNN_ALIGN(16) int32_t bias[4];
492     XNN_ALIGN(16) uint32_t multiplier[4];
493     XNN_ALIGN(16) uint64_t rounding[2];
494     XNN_ALIGN(16) uint64_t right_shift[2];
495     XNN_ALIGN(16) int16_t output_zero_point[8];
496     XNN_ALIGN(16) uint8_t output_min[16];
497     XNN_ALIGN(16) uint8_t output_max[16];
498   } sse2;
499 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
500 };
501 
502 union xnn_qs8_avgpool_params {
503   struct {
504     int32_t bias;
505     int32_t multiplier;
506     int64_t rounding;
507     uint32_t shift;
508     int32_t output_min_less_zero_point;
509     int32_t output_max_less_zero_point;
510     int32_t output_zero_point;
511   } scalar;
512 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
513   struct {
514     int32_t bias;
515     int32_t multiplier;
516     int64_t left_shift;
517     int16_t output_zero_point;
518     int8_t output_min;
519     int8_t output_max;
520   } neon;
521 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
522 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
523   struct {
524     XNN_ALIGN(16) int32_t bias[4];
525     XNN_ALIGN(16) uint32_t multiplier[4];
526     XNN_ALIGN(16) uint64_t rounding[2];
527     XNN_ALIGN(16) uint64_t shift[2];
528     XNN_ALIGN(16) int16_t output_zero_point[8];
529     XNN_ALIGN(16) int16_t output_min[8];
530     XNN_ALIGN(16) int16_t output_max[8];
531   } sse2;
532 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
533 #if XNN_ARCH_WASMSIMD
534   struct {
535     XNN_ALIGN(16) int32_t bias[4];
536     XNN_ALIGN(16) int64_t multiplier[2];
537     XNN_ALIGN(16) int64_t rounding[2];
538     int32_t shift;
539     XNN_ALIGN(16) int16_t output_zero_point[8];
540     XNN_ALIGN(16) int8_t output_min[16];
541     XNN_ALIGN(16) int8_t output_max[16];
542   } wasmsimd;
543 #endif  // XNN_ARCH_WASMSIMD
544 };
545 
546 union xnn_qu8_requantization_params {
547   struct {
548     int32_t multiplier;
549     int32_t remainder_mask;
550     int32_t remainder_threshold;
551     uint32_t shift;
552     int32_t min_less_zero_point;
553     int32_t max_less_zero_point;
554     int32_t zero_point;
555   } q31;
556 };
557 
558 union xnn_qs8_requantization_params {
559   struct {
560     int32_t multiplier;
561     int32_t remainder_mask;
562     int32_t remainder_threshold;
563     uint32_t shift;
564     int32_t min_less_zero_point;
565     int32_t max_less_zero_point;
566     int32_t zero_point;
567   } q31;
568 };
569 
570 typedef void (*xnn_ppmm_ukernel_function)(
571     size_t mr,
572     size_t nc,
573     size_t kc,
574     const void* a,
575     const void* w,
576     void* c,
577     size_t cm_stride,
578     size_t cn_stride,
579     const void* params);
580 
581 typedef void (*xnn_f32_ppmm_minmax_ukernel_function)(
582     size_t mr,
583     size_t nc,
584     size_t kc,
585     const float* a,
586     const float* w,
587     float* c,
588     size_t cm_stride,
589     size_t cn_stride,
590     const union xnn_f32_minmax_params* params);
591 
592 typedef void (*xnn_f16_ppmm_ukernel_function)(
593     size_t mr,
594     size_t nc,
595     size_t kc,
596     const void* a,
597     const void* w,
598     void* c,
599     size_t cm_stride,
600     size_t cn_stride,
601     const struct xnn_f16_scaleminmax_params* params);
602 
603 typedef void (*xnn_gemm_ukernel_function)(
604     size_t mr,
605     size_t nr,
606     size_t k,
607     const void* a,
608     size_t a_stride,
609     const void* w,
610     void* c,
611     size_t cm_stride,
612     size_t cn_stride,
613     const void* params);
614 
615 typedef void (*xnn_f32_gemm_ukernel_function)(
616     size_t mr,
617     size_t nr,
618     size_t k,
619     const float* a,
620     size_t a_stride,
621     const float* w,
622     float* c,
623     size_t cm_stride,
624     size_t cn_stride,
625     const union xnn_f32_default_params* params);
626 
627 typedef void (*xnn_f32_gemm_relu_ukernel_function)(
628     size_t mr,
629     size_t nr,
630     size_t k,
631     const float* a,
632     size_t a_stride,
633     const float* w,
634     float* c,
635     size_t cm_stride,
636     size_t cn_stride,
637     const union xnn_f32_relu_params* params);
638 
639 typedef void (*xnn_f32_gemm_minmax_ukernel_function)(
640     size_t mr,
641     size_t nr,
642     size_t k,
643     const float* a,
644     size_t a_stride,
645     const float* w,
646     float* c,
647     size_t cm_stride,
648     size_t cn_stride,
649     const union xnn_f32_minmax_params* params);
650 
651 typedef void (*xnn_f32_gemminc_minmax_ukernel_function)(
652     size_t mr,
653     size_t nr,
654     size_t k,
655     const float* a,
656     size_t a_stride,
657     const float* w,
658     float* c,
659     size_t cm_stride,
660     size_t cn_stride,
661     const float* acc,
662     const union xnn_f32_minmax_params* params);
663 
664 typedef void (*xnn_f16_gemm_minmax_ukernel_function)(
665     size_t mr,
666     size_t nr,
667     size_t k,
668     const void* a,
669     size_t a_stride,
670     const void* w,
671     void* c,
672     size_t cm_stride,
673     size_t cn_stride,
674     const struct xnn_f16_scaleminmax_params* params);
675 
676 typedef void (*xnn_f16_igemm_minmax_ukernel_function)(
677     size_t mr,
678     size_t nr,
679     size_t kc,
680     size_t ks,
681     const void** a,
682     const void* w,
683     void* c,
684     size_t cm_stride,
685     size_t cn_stride,
686     size_t a_offset,
687     const void* zero,
688     const struct xnn_f16_scaleminmax_params* params);
689 
690 typedef void (*xnn_qu8_gemm_ukernel_function)(
691     size_t mr,
692     size_t nr,
693     size_t k,
694     const uint8_t* a,
695     size_t a_stride,
696     const void* w,
697     uint8_t* c,
698     size_t cm_stride,
699     size_t cn_stride,
700     const union xnn_qu8_gemm_params* params);
701 
702 typedef void (*xnn_qs8_gemm_ukernel_function)(
703     size_t mr,
704     size_t nr,
705     size_t k,
706     const int8_t* a,
707     size_t a_stride,
708     const void* w,
709     int8_t* c,
710     size_t cm_stride,
711     size_t cn_stride,
712     const union xnn_qs8_gemm_params* params);
713 
714 typedef void (*xnn_qs8_gemm_xw_ukernel_function)(
715     size_t mr,
716     size_t nr,
717     size_t k,
718     const int8_t* a,
719     size_t a_stride,
720     const void* w,
721     int8_t* c,
722     size_t cm_stride,
723     size_t cn_stride,
724     const union xnn_qs8_gemm_xw_params* params);
725 
726 typedef void (*xnn_igemm_ukernel_function)(
727     size_t mr,
728     size_t nr,
729     size_t kc,
730     size_t ks,
731     const void** a,
732     const void* w,
733     void* c,
734     size_t cm_stride,
735     size_t cn_stride,
736     size_t a_offset,
737     const void* zero,
738     const void* params);
739 
740 typedef void (*xnn_f32_igemm_ukernel_function)(
741     size_t mr,
742     size_t nr,
743     size_t kc,
744     size_t ks,
745     const float** a,
746     const float* w,
747     float* c,
748     size_t cm_stride,
749     size_t cn_stride,
750     size_t a_offset,
751     const float* zero,
752     const union xnn_f32_default_params* params);
753 
754 typedef void (*xnn_f32_igemm_relu_ukernel_function)(
755     size_t mr,
756     size_t nr,
757     size_t kc,
758     size_t ks,
759     const float** a,
760     const float* w,
761     float* c,
762     size_t cm_stride,
763     size_t cn_stride,
764     size_t a_offset,
765     const float* zero,
766     const union xnn_f32_relu_params* params);
767 
768 typedef void (*xnn_f32_igemm_minmax_ukernel_function)(
769     size_t mr,
770     size_t nr,
771     size_t kc,
772     size_t ks,
773     const float** a,
774     const float* w,
775     float* c,
776     size_t cm_stride,
777     size_t cn_stride,
778     size_t a_offset,
779     const float* zero,
780     const union xnn_f32_minmax_params* params);
781 
782 typedef void (*xnn_qu8_igemm_ukernel_function)(
783     size_t mr,
784     size_t nr,
785     size_t kc,
786     size_t ks,
787     const uint8_t** a,
788     const void* w,
789     uint8_t* c,
790     size_t cm_stride,
791     size_t cn_stride,
792     size_t a_offset,
793     const uint8_t* zero,
794     const union xnn_qu8_gemm_params* params);
795 
796 typedef void (*xnn_qs8_igemm_ukernel_function)(
797     size_t mr,
798     size_t nr,
799     size_t kc,
800     size_t ks,
801     const int8_t** a,
802     const void* w,
803     int8_t* c,
804     size_t cm_stride,
805     size_t cn_stride,
806     size_t a_offset,
807     const int8_t* zero,
808     const union xnn_qs8_gemm_params* params);
809 
810 typedef void (*xnn_conv_hwc_ukernel_function)(
811     size_t input_height,
812     size_t input_width,
813     size_t output_y_start,
814     size_t output_y_end,
815     const void* input,
816     const void* zero,
817     const void* weights,
818     void* output,
819     size_t input_padding_top,
820     size_t output_channels,
821     size_t output_height_stride,
822     size_t output_width_stride,
823     const void* params);
824 
825 typedef void (*xnn_f32_conv_hwc_ukernel_function)(
826     size_t input_height,
827     size_t input_width,
828     size_t output_y_start,
829     size_t output_y_end,
830     const float* input,
831     const float* zero,
832     const float* weights,
833     float* output,
834     size_t input_padding_top,
835     size_t output_channels,
836     size_t output_height_stride,
837     size_t output_width_stride,
838     const union xnn_f32_minmax_params* params);
839 
840 typedef void (*xnn_conv_hwc2chw_ukernel_function)(
841     size_t input_height,
842     size_t input_width,
843     size_t output_y_start,
844     size_t output_y_end,
845     const void* input,
846     const void* zero,
847     const void* weights,
848     void* output,
849     size_t input_padding_top,
850     size_t output_channels,
851     size_t output_height_stride,
852     size_t output_channel_stride,
853     const void* params);
854 
855 typedef void (*xnn_f32_conv_hwc2chw_ukernel_function)(
856     size_t input_height,
857     size_t input_width,
858     size_t output_y_start,
859     size_t output_y_end,
860     const float* input,
861     const float* zero,
862     const float* weights,
863     float* output,
864     size_t input_padding_top,
865     size_t output_channels,
866     size_t output_height_stride,
867     size_t output_channel_stride,
868     const union xnn_f32_minmax_params* params);
869 
870 typedef void (*xnn_spmm_ukernel_function)(
871     size_t batch_size,
872     size_t output_channels,
873     const void* input,
874     const void* weights,
875     const int32_t* widx_dmap,
876     const uint32_t* nidx_nnzmap,
877     void* output,
878     size_t output_stride,
879     const void* params);
880 
881 typedef void (*xnn_f16_spmm_minmax_ukernel_function)(
882     size_t batch_size,
883     size_t output_channels,
884     const void* input,
885     const void* weights,
886     const int32_t* widx_dmap,
887     const uint32_t* nidx_nnzmap,
888     void* output,
889     size_t output_stride,
890     const struct xnn_f16_scaleminmax_params* params);
891 
892 typedef void (*xnn_f32_spmm_minmax_ukernel_function)(
893     size_t batch_size,
894     size_t output_channels,
895     const float* input,
896     const float* weights,
897     const int32_t* widx_dmap,
898     const uint32_t* nidx_nnzmap,
899     float* output,
900     size_t output_stride,
901     const union xnn_f32_minmax_params* params);
902 
903 typedef void (*xnn_packx_ukernel_function)(
904     size_t m,
905     size_t k,
906     const void* x,
907     size_t x_stride,
908     void* y);
909 
910 typedef void (*xnn_x32_packx_ukernel_function)(
911     size_t m,
912     size_t k,
913     const uint32_t* x,
914     size_t x_stride,
915     uint32_t* y);
916 
917 typedef void (*xnn_fill_ukernel_function)(
918     size_t rows,
919     size_t channels,
920     void* output,
921     size_t output_stride,
922     const void* fill_value);
923 
924 typedef void (*xnn_x32_fill_ukernel_function)(
925     size_t rows,
926     size_t channels,
927     uint32_t* output,
928     size_t output_stride,
929     const uint32_t* fill_value);
930 
931 typedef void (*xnn_depthtospace2d_chw2hwc_ukernel_function)(
932     size_t output_channels,
933     size_t input_height,
934     size_t input_width,
935     size_t block_size,
936     const void* input,
937     void* output,
938     size_t output_channels_stride);
939 
940 typedef void (*xnn_x32_depthtospace2d_chw2hwc_ukernel_function)(
941     size_t output_channels,
942     size_t input_height,
943     size_t input_width,
944     size_t block_size,
945     const uint32_t* input,
946     uint32_t* output,
947     size_t output_channel_stride);
948 
949 typedef void (*xnn_pad_ukernel_function)(
950     size_t rows,
951     size_t channels,
952     size_t pre_padding,
953     size_t post_padding,
954     const void* fill_value,
955     const void* input,
956     size_t input_stride,
957     void* output,
958     size_t output_stride);
959 
960 typedef void (*xnn_x32_pad_ukernel_function)(
961     size_t rows,
962     size_t channels,
963     size_t pre_padding,
964     size_t post_padding,
965     const uint32_t* fill_value,
966     const uint32_t* input,
967     size_t input_stride,
968     uint32_t* output,
969     size_t output_stride);
970 
971 typedef void (*xnn_unpool_ukernel_function)(
972     size_t p,
973     size_t c,
974     uint32_t f,
975     const void* input,
976     const uint32_t* index,
977     void** output);
978 
979 typedef void (*xnn_x32_unpool_ukernel_function)(
980     size_t p,
981     size_t c,
982     uint32_t f,
983     const uint32_t* input,
984     const uint32_t* index,
985     uint32_t** output);
986 
987 typedef void (*xnn_zipc_ukernel_function)(
988     size_t n,
989     const void* x,
990     void* y);
991 
992 typedef void (*xnn_x8_zipc_ukernel_function)(
993     size_t n,
994     const uint8_t* x,
995     uint8_t* y);
996 
997 typedef void (*xnn_x32_zipc_ukernel_function)(
998     size_t n,
999     const uint32_t* x,
1000     uint32_t* y);
1001 
1002 typedef void (*xnn_zipv_ukernel_function)(
1003     size_t n,
1004     size_t m,
1005     const void* x,
1006     void* y);
1007 
1008 typedef void (*xnn_x8_zipv_ukernel_function)(
1009     size_t n,
1010     size_t m,
1011     const uint8_t* x,
1012     uint8_t* y);
1013 
1014 typedef void (*xnn_x32_zipv_ukernel_function)(
1015     size_t n,
1016     size_t m,
1017     const uint32_t* x,
1018     uint32_t* y);
1019 
1020 typedef void (*xnn_x8_lut_ukernel_function)(
1021     size_t n,
1022     const uint8_t* x,
1023     const uint8_t* t,
1024     uint8_t* y);
1025 
1026 typedef void (*xnn_dwconv2d_chw_ukernel_function)(
1027     size_t input_height,
1028     size_t input_width,
1029     const void* input,
1030     const void* weights,
1031     const void* zero,
1032     void* output,
1033     uint32_t padding_top,
1034     const void* params);
1035 
1036 typedef void (*xnn_f32_dwconv2d_chw_ukernel_function)(
1037     size_t input_height,
1038     size_t input_width,
1039     const float* input,
1040     const float* weights,
1041     const float* zero,
1042     float* output,
1043     uint32_t padding_top,
1044     const union xnn_f32_chw_params* params);
1045 
1046 typedef void (*xnn_dwconv_unipass_ukernel_function)(
1047     size_t channels,
1048     size_t output_width,
1049     const void** input,
1050     const void* weights,
1051     void* output,
1052     size_t input_stride,
1053     size_t output_increment,
1054     size_t input_offset,
1055     const void* zero,
1056     const void* params);
1057 
1058 typedef void (*xnn_f32_dwconv_unipass_ukernel_function)(
1059     size_t channels,
1060     size_t output_width,
1061     const float** input,
1062     const float* weights,
1063     float* output,
1064     size_t input_stride,
1065     size_t output_increment,
1066     size_t input_offset,
1067     const float* zero,
1068     const union xnn_f32_default_params* params);
1069 
1070 typedef void (*xnn_f32_dwconv_minmax_unipass_ukernel_function)(
1071     size_t channels,
1072     size_t output_width,
1073     const float** input,
1074     const float* weights,
1075     float* output,
1076     size_t input_stride,
1077     size_t output_increment,
1078     size_t input_offset,
1079     const float* zero,
1080     const union xnn_f32_minmax_params* params);
1081 
1082 typedef void (*xnn_f16_dwconv_minmax_unipass_ukernel_function)(
1083     size_t channels,
1084     size_t output_width,
1085     const void** input,
1086     const void* weights,
1087     void* output,
1088     size_t input_stride,
1089     size_t output_increment,
1090     size_t input_offset,
1091     const void* zero,
1092     const struct xnn_f16_minmax_params* params);
1093 
1094 typedef void (*xnn_qu8_dwconv_minmax_unipass_ukernel_function)(
1095     size_t channels,
1096     size_t output_width,
1097     const uint8_t** input,
1098     const void* weights,
1099     uint8_t* output,
1100     size_t input_stride,
1101     size_t output_increment,
1102     size_t input_offset,
1103     const uint8_t* zero,
1104     const union xnn_qu8_gemm_params* params);
1105 
1106 typedef void (*xnn_qs8_dwconv_minmax_unipass_ukernel_function)(
1107     size_t channels,
1108     size_t output_width,
1109     const int8_t** input,
1110     const void* weights,
1111     int8_t* output,
1112     size_t input_stride,
1113     size_t output_increment,
1114     size_t input_offset,
1115     const int8_t* zero,
1116     const union xnn_qs8_gemm_params* params);
1117 
1118 typedef void (*xnn_dwconv_multipass_ukernel_function)(
1119     size_t channels,
1120     size_t output_width,
1121     const void** input,
1122     const void* weights,
1123     void* buffer,
1124     void* output,
1125     size_t input_stride,
1126     size_t output_increment,
1127     size_t input_offset,
1128     const void* zero,
1129     const void* params);
1130 
1131 typedef void (*xnn_f32_ibilinear_ukernel_function)(
1132     size_t output_pixels,
1133     size_t channels,
1134     const float** input,
1135     size_t input_offset,
1136     const float* weights,
1137     float* output,
1138     size_t output_increment);
1139 
1140 typedef void (*xnn_f32_ibilinear_chw_ukernel_function)(
1141     size_t output_pixels,
1142     size_t channels,
1143     const float** input,
1144     size_t input_offset,
1145     const float* weights,
1146     float* output,
1147     size_t input_increment);
1148 
1149 typedef void (*xnn_ibilinear_ukernel_function)(
1150     size_t output_pixels,
1151     size_t channels,
1152     const void** input,
1153     size_t input_offset,
1154     const void* weights,
1155     void* output,
1156     size_t output_increment);
1157 
1158 typedef void (*xnn_ibilinear_chw_ukernel_function)(
1159     size_t output_pixels,
1160     size_t channels,
1161     const void** input,
1162     size_t input_offset,
1163     const void* weights,
1164     void* output,
1165     size_t input_increment);
1166 
1167 typedef void (*xnn_gavgpool_unipass_ukernel_function)(
1168     size_t rows,
1169     size_t channels,
1170     const void* input,
1171     size_t input_stride,
1172     const void* zero,
1173     void* output,
1174     const void* params);
1175 
1176 typedef void (*xnn_f16_gavgpool_minmax_unipass_ukernel_function)(
1177     size_t rows,
1178     size_t channels,
1179     const void* input,
1180     size_t input_stride,
1181     const void* zero,
1182     void* output,
1183     const struct xnn_f16_scaleminmax_params* params);
1184 
1185 typedef void (*xnn_f32_gavgpool_minmax_unipass_ukernel_function)(
1186     size_t rows,
1187     size_t channels,
1188     const float* input,
1189     size_t input_stride,
1190     const float* zero,
1191     float* output,
1192     const union xnn_f32_scaleminmax_params* params);
1193 
1194 typedef void (*xnn_qu8_gavgpool_minmax_unipass_ukernel_function)(
1195     size_t rows,
1196     size_t channels,
1197     const uint8_t* input,
1198     size_t input_stride,
1199     const uint8_t* zero,
1200     uint8_t* output,
1201     const union xnn_qu8_avgpool_params* params);
1202 
1203 typedef void (*xnn_qs8_gavgpool_minmax_unipass_ukernel_function)(
1204     size_t rows,
1205     size_t channels,
1206     const int8_t* input,
1207     size_t input_stride,
1208     const int8_t* zero,
1209     int8_t* output,
1210     const union xnn_qs8_avgpool_params* params);
1211 
1212 typedef void (*xnn_gavgpool_multipass_ukernel_function)(
1213     size_t rows,
1214     size_t channels,
1215     const void* input,
1216     size_t input_stride,
1217     const void* zero,
1218     void* buffer,
1219     void* output,
1220     const void* params);
1221 
1222 typedef void (*xnn_f16_gavgpool_minmax_multipass_ukernel_function)(
1223     size_t rows,
1224     size_t channels,
1225     const void* input,
1226     size_t input_stride,
1227     const void* zero,
1228     void* buffer,
1229     void* output,
1230     const struct xnn_f16_scaleminmax_params* params);
1231 
1232 typedef void (*xnn_f32_gavgpool_minmax_multipass_ukernel_function)(
1233     size_t rows,
1234     size_t channels,
1235     const float* input,
1236     size_t input_stride,
1237     const float* zero,
1238     float* buffer,
1239     float* output,
1240     const union xnn_f32_scaleminmax_params* params);
1241 
1242 typedef void (*xnn_qu8_gavgpool_minmax_multipass_ukernel_function)(
1243     size_t rows,
1244     size_t channels,
1245     const uint8_t* input,
1246     size_t input_stride,
1247     const uint8_t* zero,
1248     int32_t* buffer,
1249     uint8_t* output,
1250     const union xnn_qu8_avgpool_params* params);
1251 
1252 typedef void (*xnn_qs8_gavgpool_minmax_multipass_ukernel_function)(
1253     size_t rows,
1254     size_t channels,
1255     const int8_t* input,
1256     size_t input_stride,
1257     const int8_t* zero,
1258     int32_t* buffer,
1259     int8_t* output,
1260     const union xnn_qs8_avgpool_params* params);
1261 
1262 typedef void (*xnn_gavgpool_cw_ukernel_function)(
1263     size_t elements,
1264     size_t channels,
1265     const float* input,
1266     float* output,
1267     const void* params);
1268 
1269 typedef void (*xnn_f32_gavgpool_cw_ukernel_function)(
1270     size_t elements,
1271     size_t channels,
1272     const float* input,
1273     float* output,
1274     const union xnn_f32_gavgpool_params* params);
1275 
1276 typedef void (*xnn_avgpool_unipass_ukernel_function)(
1277     size_t output_pixels,
1278     size_t kernel_elements,
1279     size_t channels,
1280     const void** input,
1281     size_t input_offset,
1282     const void* zero,
1283     void* output,
1284     size_t input_increment,
1285     size_t output_increment,
1286     const void* params);
1287 
1288 typedef void (*xnn_f32_avgpool_minmax_unipass_ukernel_function)(
1289     size_t output_pixels,
1290     size_t kernel_elements,
1291     size_t channels,
1292     const float** input,
1293     size_t input_offset,
1294     const float* zero,
1295     float* output,
1296     size_t input_increment,
1297     size_t output_increment,
1298     const union xnn_f32_scaleminmax_params* params);
1299 
1300 typedef void (*xnn_qu8_avgpool_minmax_unipass_ukernel_function)(
1301     size_t output_pixels,
1302     size_t kernel_elements,
1303     size_t channels,
1304     const uint8_t** input,
1305     size_t input_offset,
1306     const uint8_t* zero,
1307     uint8_t* output,
1308     size_t input_increment,
1309     size_t output_increment,
1310     const union xnn_qu8_avgpool_params* params);
1311 
1312 typedef void (*xnn_avgpool_multipass_ukernel_function)(
1313     size_t output_pixels,
1314     size_t kernel_elements,
1315     size_t channels,
1316     const void** input,
1317     size_t input_offset,
1318     const void* zero,
1319     void* buffer,
1320     void* output,
1321     size_t input_increment,
1322     size_t output_increment,
1323     const void* params);
1324 
1325 typedef void (*xnn_f32_avgpool_minmax_multipass_ukernel_function)(
1326     size_t output_pixels,
1327     size_t kernel_elements,
1328     size_t channels,
1329     const float** input,
1330     size_t input_offset,
1331     const float* zero,
1332     float* buffer,
1333     float* output,
1334     size_t input_increment,
1335     size_t output_increment,
1336     const union xnn_f32_scaleminmax_params* params);
1337 
1338 typedef void (*xnn_qu8_avgpool_minmax_multipass_ukernel_function)(
1339     size_t output_pixels,
1340     size_t kernel_elements,
1341     size_t channels,
1342     const uint8_t** input,
1343     size_t input_offset,
1344     const uint8_t* zero,
1345     int32_t* buffer,
1346     uint8_t* output,
1347     size_t input_increment,
1348     size_t output_increment,
1349     const union xnn_qu8_avgpool_params* params);
1350 
1351 typedef void (*xnn_pavgpool_unipass_ukernel_function)(
1352     size_t output_pixels,
1353     size_t kernel_elements,
1354     size_t channels,
1355     const void** input,
1356     size_t input_offset,
1357     const void* zero,
1358     const void* multiplier,
1359     void* output,
1360     size_t input_increment,
1361     size_t output_increment,
1362     const void* params);
1363 
1364 typedef void (*xnn_f32_pavgpool_minmax_unipass_ukernel_function)(
1365     size_t output_pixels,
1366     size_t kernel_elements,
1367     size_t channels,
1368     const float** input,
1369     size_t input_offset,
1370     const float* zero,
1371     const float* multiplier,
1372     float* output,
1373     size_t input_increment,
1374     size_t output_increment,
1375     const union xnn_f32_minmax_params* params);
1376 
1377 typedef void (*xnn_pavgpool_multipass_ukernel_function)(
1378     size_t output_pixels,
1379     size_t kernel_elements,
1380     size_t channels,
1381     const void** input,
1382     size_t input_offset,
1383     const void* zero,
1384     const void* multiplier,
1385     void* buffer,
1386     void* output,
1387     size_t input_increment,
1388     size_t output_increment,
1389     const void* params);
1390 
1391 typedef void (*xnn_f32_pavgpool_minmax_multipass_ukernel_function)(
1392     size_t output_pixels,
1393     size_t kernel_elements,
1394     size_t channels,
1395     const float** input,
1396     size_t input_offset,
1397     const float* zero,
1398     const float* multiplier,
1399     float* buffer,
1400     float* output,
1401     size_t input_increment,
1402     size_t output_increment,
1403     const union xnn_f32_minmax_params* params);
1404 
1405 typedef void (*xnn_maxpool_ukernel_function)(
1406     size_t output_pixels,
1407     size_t kernel_elements,
1408     size_t channels,
1409     const void** input,
1410     size_t input_offset,
1411     void* output,
1412     size_t input_increment,
1413     size_t output_increment,
1414     const void* params);
1415 
1416 typedef void (*xnn_f32_maxpool_ukernel_function)(
1417     size_t output_pixels,
1418     size_t kernel_elements,
1419     size_t channels,
1420     const float** input,
1421     size_t input_offset,
1422     float* output,
1423     size_t input_increment,
1424     size_t output_increment,
1425     const union xnn_f32_minmax_params* params);
1426 
1427 typedef void (*xnn_u8_maxpool_ukernel_function)(
1428     size_t output_pixels,
1429     size_t kernel_elements,
1430     size_t channels,
1431     const uint8_t** input,
1432     size_t input_offset,
1433     uint8_t* output,
1434     size_t input_increment,
1435     size_t output_increment,
1436     const union xnn_u8_minmax_params* params);
1437 
1438 typedef void (*xnn_argmaxpool_unipass_ukernel_function)(
1439     size_t output_pixels,
1440     size_t kernel_elements,
1441     size_t channels,
1442     const void** input,
1443     size_t input_offset,
1444     void* output,
1445     uint32_t* index,
1446     size_t input_increment,
1447     size_t output_increment);
1448 
1449 typedef void (*xnn_f32_argmaxpool_unipass_ukernel_function)(
1450     size_t output_pixels,
1451     size_t kernel_elements,
1452     size_t channels,
1453     const float** input,
1454     size_t input_offset,
1455     float* output,
1456     uint32_t* index,
1457     size_t input_increment,
1458     size_t output_increment);
1459 
1460 typedef void (*xnn_argmaxpool_multipass_ukernel_function)(
1461     size_t output_pixels,
1462     size_t kernel_elements,
1463     size_t channels,
1464     const void** input,
1465     size_t input_offset,
1466     void* accumulation_buffer,
1467     uint32_t* index_buffer,
1468     void* output,
1469     uint32_t* index,
1470     size_t input_increment,
1471     size_t output_increment);
1472 
1473 typedef void (*xnn_f32_argmaxpool_multipass_ukernel_function)(
1474     size_t output_pixels,
1475     size_t kernel_elements,
1476     size_t channels,
1477     const float** input,
1478     size_t input_offset,
1479     float* accumulation_buffer,
1480     uint32_t* index_buffer,
1481     float* output,
1482     uint32_t* index,
1483     size_t input_increment,
1484     size_t output_increment);
1485 
1486 typedef void (*xnn_univector_ukernel_function)(
1487     size_t n,
1488     const void* x,
1489     void* y,
1490     const void* params);
1491 
1492 typedef void (*xnn_f16_clamp_ukernel_function)(
1493     size_t n,
1494     const void* x,
1495     void* y,
1496     const struct xnn_f16_minmax_params* params);
1497 
1498 typedef void (*xnn_f32_clamp_ukernel_function)(
1499     size_t n,
1500     const float* x,
1501     float* y,
1502     const union xnn_f32_minmax_params* params);
1503 
1504 typedef void (*xnn_u8_clamp_ukernel_function)(
1505     size_t n,
1506     const uint8_t* x,
1507     uint8_t* y,
1508     const union xnn_u8_minmax_params* params);
1509 
1510 typedef void (*xnn_f16_relu_ukernel_function)(
1511     size_t n,
1512     const void* x,
1513     void* y,
1514     const struct xnn_f16_relu_params* params);
1515 
1516 typedef void (*xnn_f32_relu_ukernel_function)(
1517     size_t n,
1518     const float* x,
1519     float* y,
1520     const union xnn_f32_relu_params* params);
1521 
1522 typedef void (*xnn_f16_hswish_ukernel_function)(
1523     size_t n,
1524     const void* x,
1525     void* y,
1526     const struct xnn_f16_hswish_params* params);
1527 
1528 typedef void (*xnn_f32_hswish_ukernel_function)(
1529     size_t n,
1530     const float* x,
1531     float* y,
1532     const union xnn_f32_hswish_params* params);
1533 
1534 typedef void (*xnn_rmax_ukernel_function)(
1535     size_t n,
1536     const void* x,
1537     void* y);
1538 
1539 typedef void (*xnn_u8_rmax_ukernel_function)(
1540     size_t n,
1541     const uint8_t* x,
1542     uint8_t* y);
1543 
1544 typedef void (*xnn_f32_rmax_ukernel_function)(
1545     size_t n,
1546     const float* x,
1547     float* y);
1548 
1549 typedef void (*xnn_u8_lut32norm_ukernel_function)(
1550     size_t n,
1551     const uint8_t* x,
1552     const uint32_t* t,
1553     uint8_t* y);
1554 
1555 typedef void (*xnn_vadd_ukernel_function)(
1556     size_t n,
1557     const void* a,
1558     const void* b,
1559     void* y,
1560     const void* params);
1561 
1562 typedef void (*xnn_qu8_vadd_minmax_ukernel_function)(
1563     size_t n,
1564     const uint8_t* input_x,
1565     const uint8_t* input_y,
1566     uint8_t* output,
1567     const union xnn_qu8_add_params* params);
1568 
1569 typedef void (*xnn_qs8_vadd_minmax_ukernel_function)(
1570     size_t n,
1571     const int8_t* input_x,
1572     const int8_t* input_y,
1573     int8_t* output,
1574     const union xnn_qs8_add_params* params);
1575 
1576 typedef void (*xnn_f32_velu_ukernel_function)(
1577     size_t n,
1578     const float* x,
1579     float* y,
1580     const union xnn_f32_elu_params* params);
1581 
1582 typedef void (*xnn_f32_vsqrt_ukernel_function)(
1583     size_t n,
1584     const float* x,
1585     float* y,
1586     const union xnn_f32_sqrt_params* params);
1587 
1588 typedef void (*xnn_vbinary_ukernel_function)(
1589     size_t n,
1590     const void* a,
1591     const void* b,
1592     void* y,
1593     const void* params);
1594 
1595 typedef void (*xnn_f16_vbinary_ukernel_function)(
1596     size_t n,
1597     const void* a,
1598     const void* b,
1599     void* y,
1600     const struct xnn_f16_default_params* params);
1601 
1602 typedef void (*xnn_f16_vbinary_minmax_ukernel_function)(
1603     size_t n,
1604     const void* a,
1605     const void* b,
1606     void* y,
1607     const struct xnn_f16_minmax_params* params);
1608 
1609 typedef void (*xnn_f32_vbinary_ukernel_function)(
1610     size_t n,
1611     const float* a,
1612     const float* b,
1613     float* y,
1614     const union xnn_f32_default_params* params);
1615 
1616 typedef void (*xnn_f32_vbinary_minmax_ukernel_function)(
1617     size_t n,
1618     const float* a,
1619     const float* b,
1620     float* y,
1621     const union xnn_f32_minmax_params* params);
1622 
1623 typedef void (*xnn_f32_vbinary_relu_ukernel_function)(
1624     size_t n,
1625     const float* a,
1626     const float* b,
1627     float* y,
1628     const union xnn_f32_relu_params* params);
1629 
1630 typedef void (*xnn_vunary_ukernel_function)(
1631     size_t n,
1632     const void* x,
1633     void* y,
1634     const void* params);
1635 
1636 typedef void (*xnn_f32_vunary_ukernel_function)(
1637     size_t n,
1638     const float* x,
1639     float* y,
1640     const void* params);
1641 
1642 typedef void (*xnn_vmulcaddc_ukernel_function)(
1643     size_t m,
1644     size_t c,
1645     const void* x,
1646     size_t x_stride,
1647     const void* w,
1648     void* y,
1649     size_t y_stride,
1650     const void* params);
1651 
1652 typedef void (*xnn_f16_vmulcaddc_ukernel_function)(
1653     size_t m,
1654     size_t c,
1655     const void* x,
1656     size_t x_stride,
1657     const void* w,
1658     void* y,
1659     size_t y_stride,
1660     const struct xnn_f16_minmax_params* params);
1661 
1662 typedef void (*xnn_f32_vmulcaddc_ukernel_function)(
1663     size_t m,
1664     size_t c,
1665     const float* x,
1666     size_t x_stride,
1667     const float* w,
1668     float* y,
1669     size_t y_stride,
1670     const union xnn_f32_minmax_params* params);
1671 
1672 typedef void (*xnn_prelu_ukernel_function)(
1673     size_t mr,
1674     size_t n,
1675     const void* x,
1676     size_t x_stride,
1677     const void* w,
1678     void* y,
1679     size_t y_stride);
1680 
1681 typedef void (*xnn_f16_prelu_ukernel_function)(
1682     size_t mr,
1683     size_t n,
1684     const void* x,
1685     size_t x_stride,
1686     const void* w,
1687     void* y,
1688     size_t y_stride);
1689 
1690 typedef void (*xnn_f32_prelu_ukernel_function)(
1691     size_t mr,
1692     size_t n,
1693     const float* x,
1694     size_t x_stride,
1695     const float* w,
1696     float* y,
1697     size_t y_stride);
1698 
1699 typedef void (*xnn_f32_raddexpminusmax_ukernel_function)(
1700     size_t n,
1701     const float* input,
1702     float* sum,
1703     float max);
1704 
1705 typedef void (*xnn_f32_raddstoreexpminusmax_ukernel_function)(
1706     size_t n,
1707     const float* input,
1708     float* output,
1709     float* sum,
1710     float max);
1711 
1712 typedef void (*xnn_f32_vscaleexpminusmax_ukernel_function)(
1713     size_t n,
1714     const float* input,
1715     float* output,
1716     float max,
1717     float scale);
1718 
1719 typedef void (*xnn_f32_vscale_ukernel_function)(
1720     size_t n,
1721     const float* x,
1722     float* y,
1723     float c);
1724 
1725 // Reduce-Add Extended ("mantissa" + "exponent") Exponentials
1726 typedef void (*xnn_f32_raddextexp_ukernel_function)(
1727     size_t n,
1728     const float* input,
1729     float* sum);
1730 
1731 // Vector Scale Extended ("mantissa" + "exponent") Exponentials
1732 typedef void (*xnn_f32_vscaleextexp_ukernel_function)(
1733     size_t n,
1734     const float* input,
1735     float* output,
1736     float scale_mantissa,
1737     float scale_exponent);
1738 
1739 struct xnn_hmp_gemm_ukernel {
1740   xnn_gemm_ukernel_function function[XNN_MAX_UARCH_TYPES];
1741 };
1742 
xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function function)1743 static inline struct xnn_hmp_gemm_ukernel xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function function) {
1744   struct xnn_hmp_gemm_ukernel ukernel = { function };
1745   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
1746     ukernel.function[i] = function;
1747   }
1748   return ukernel;
1749 }
1750 
xnn_is_hmp_gemm_ukernel(struct xnn_hmp_gemm_ukernel ukernel)1751 static inline bool xnn_is_hmp_gemm_ukernel(struct xnn_hmp_gemm_ukernel ukernel) {
1752 #if XNN_MAX_UARCH_TYPES == 1
1753   return false;
1754 #else
1755   uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
1756   uintptr_t difference = 0;
1757   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
1758     difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
1759   }
1760   return difference != 0;
1761 #endif
1762 }
1763 
1764 struct xnn_hmp_igemm_ukernel {
1765   xnn_igemm_ukernel_function function[XNN_MAX_UARCH_TYPES];
1766 };
1767 
xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function function)1768 static inline struct xnn_hmp_igemm_ukernel xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function function) {
1769   struct xnn_hmp_igemm_ukernel ukernel = { function };
1770   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
1771     ukernel.function[i] = function;
1772   }
1773   return ukernel;
1774 }
1775 
xnn_is_hmp_igemm_ukernel(struct xnn_hmp_igemm_ukernel ukernel)1776 static inline bool xnn_is_hmp_igemm_ukernel(struct xnn_hmp_igemm_ukernel ukernel) {
1777 #if XNN_MAX_UARCH_TYPES == 1
1778   return false;
1779 #else
1780   uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
1781   uintptr_t difference = 0;
1782   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
1783     difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
1784   }
1785   return difference != 0;
1786 #endif
1787 }
1788 
1789 struct gemm_fused_ukernels {
1790   struct xnn_hmp_gemm_ukernel gemm;
1791   struct xnn_hmp_igemm_ukernel igemm;
1792   // Optional GEMM and IGEMM micro-kernels with MR=1 and the same NR and KR parameters.
1793   struct xnn_hmp_gemm_ukernel gemm1;
1794   struct xnn_hmp_igemm_ukernel igemm1;
1795 };
1796 
1797 struct gemm_parameters {
1798   struct gemm_fused_ukernels minmax;
1799   struct gemm_fused_ukernels relu;
1800   struct gemm_fused_ukernels linear;
1801   uint8_t mr;
1802   uint8_t nr;
1803   uint8_t log2_kr;
1804   uint8_t log2_sr;
1805 };
1806 
1807 struct vbinary_fused_ukernels {
1808   xnn_vbinary_ukernel_function op_ukernel;
1809   xnn_vbinary_ukernel_function opc_ukernel;
1810   xnn_vbinary_ukernel_function ropc_ukernel;
1811 };
1812 
1813 struct vbinary_parameters {
1814   struct vbinary_fused_ukernels minmax;
1815   struct vbinary_fused_ukernels linear;
1816   // Number of elements in a tile.
1817   // For best efficiency, micro-kernel must process a multiple of this number of elements in each call.
1818   uint8_t element_tile;
1819 };
1820 
1821 struct spmm_parameters {
1822   xnn_spmm_ukernel_function ukernel;
1823   // Number of M-dimension elements in a tile.
1824   // Corresponds to a block of pixels in 1x1 Convolution and a block of batch size in Fully Connected operator.
1825   uint8_t mr;
1826   // Number of N-dimension elements in a tile.
1827   // Corresponds to a block of output channels/features in 1x1 Convolution and Fully Connected operator.
1828   uint8_t nr;
1829 };
1830 
1831 struct conv_hwc2chw_parameters {
1832   xnn_conv_hwc2chw_ukernel_function ukernel_with_symm_padding;
1833   // Number of output channels in a tile.
1834   // This parameter must be passed as is to weight packing function.
1835   uint8_t output_channel_tile;
1836   // Number of output height pixels in a tile.
1837   // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
1838   uint8_t output_height_tile;
1839   // Number of output width pixes in a tile.
1840   uint8_t output_width_tile;
1841 };
1842 
1843 struct dwconv2d_chw_parameters {
1844   xnn_dwconv2d_chw_ukernel_function ukernel;
1845   // Number of output width pixels in a tile.
1846   uint8_t output_width_tile;
1847   // Number of output height pixels in a tile.
1848   // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
1849   uint8_t output_height_tile;
1850 };
1851 
1852 struct gavgpool_cw_parameters {
1853   xnn_gavgpool_cw_ukernel_function ukernel;
1854   // Number of channels in a tile.
1855   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
1856   uint8_t channel_tile;
1857 };
1858 
1859 union dwconv_fused_ukernels {
1860   xnn_dwconv_unipass_ukernel_function unipass;
1861   xnn_dwconv_multipass_ukernel_function multipass;
1862 };
1863 
1864 struct dwconv_parameters {
1865   union dwconv_fused_ukernels minmax;
1866   union dwconv_fused_ukernels linear;
1867   uint8_t channel_tile;
1868   uint8_t primary_tile;
1869   uint8_t incremental_tile;
1870 };
1871 
1872 struct depthtospace2d_chw2hwc_parameters {
1873   xnn_depthtospace2d_chw2hwc_ukernel_function ukernel;
1874   // Number of output pixels in a tile.
1875   // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
1876   uint8_t pixel_tile;
1877   // Number of channels in a tile.
1878   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
1879   uint8_t channel_tile;
1880 };
1881 
1882 struct gavgpool_parameters {
1883   xnn_gavgpool_unipass_ukernel_function up;
1884   xnn_gavgpool_multipass_ukernel_function mp;
1885   uint8_t mr;
1886 };
1887 
1888 struct avgpool_parameters {
1889   xnn_avgpool_unipass_ukernel_function up;
1890   xnn_avgpool_multipass_ukernel_function mp;
1891   uint8_t mr;
1892   uint8_t qr;
1893 };
1894 
1895 struct pavgpool_parameters {
1896   xnn_pavgpool_unipass_ukernel_function up;
1897   xnn_pavgpool_multipass_ukernel_function mp;
1898   uint8_t mr;
1899   uint8_t qr;
1900 };
1901 
1902 struct argmaxpool_parameters {
1903   union {
1904     xnn_argmaxpool_unipass_ukernel_function up;
1905     xnn_argmaxpool_multipass_ukernel_function mp;
1906   };
1907   uint8_t mr;
1908   uint8_t qr;
1909 };
1910 
1911 struct maxpool_parameters {
1912   xnn_maxpool_ukernel_function ukernel;
1913   uint8_t mr;
1914   uint8_t qr;
1915 };
1916 
1917 struct ibilinear_parameters {
1918   xnn_ibilinear_ukernel_function ukernel;
1919   // Number of output pixels in a tile.
1920   // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
1921   uint8_t pixel_tile;
1922   // Number of channels in a tile.
1923   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
1924   uint8_t channel_tile;
1925 };
1926 
1927 struct ibilinear_chw_parameters {
1928   xnn_ibilinear_chw_ukernel_function ukernel;
1929   // Number of output pixels in a tile.
1930   // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
1931   uint8_t pixel_tile;
1932   // Number of channels in a tile.
1933   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
1934   uint8_t channel_tile;
1935 };
1936 
1937 struct zip_parameters {
1938   xnn_zipc_ukernel_function x2;
1939   xnn_zipc_ukernel_function x3;
1940   xnn_zipc_ukernel_function x4;
1941   xnn_zipv_ukernel_function xm;
1942 };
1943 
1944 struct prelu_parameters {
1945   xnn_prelu_ukernel_function ukernel;
1946   uint16_t row_tile;
1947   uint16_t channel_tile;
1948 };
1949 
1950 struct fill_parameters {
1951   xnn_fill_ukernel_function ukernel;
1952   // Number of rows of inputs processed in one tile.
1953   // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
1954   uint8_t row_tile;
1955 };
1956 
1957 struct pad_parameters {
1958   xnn_pad_ukernel_function ukernel;
1959   // Number of rows of inputs processed in one tile.
1960   // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
1961   uint8_t row_tile;
1962 };
1963 
1964 struct vmulcaddc_parameters {
1965   xnn_vmulcaddc_ukernel_function ukernel;
1966   uint8_t channel_tile;
1967   uint8_t row_tile;
1968 };
1969 
1970 #define XNN_MAX_QS8_DWCONV_UKERNELS 1
1971 #define XNN_MAX_QU8_DWCONV_UKERNELS 1
1972 #define XNN_MAX_F16_DWCONV_UKERNELS 3
1973 #define XNN_MAX_F32_DWCONV_UKERNELS 3
1974 #define XNN_MAX_F32_ARGMAXPOOL_UKERNELS 3
1975 
1976 // Indicates that XNNPACK as a whole has initialized.
1977 // This does not guarantee that any particular microkernels are available.
1978 #define XNN_INIT_FLAG_XNNPACK 0x00000001
1979 // Indicates that F32 XNNPACK microkernels are available for use.
1980 #define XNN_INIT_FLAG_F32     0x00000002
1981 // Indicates that X32 XNNPACK microkernels are available for use.
1982 #define XNN_INIT_FLAG_X32     0x00000004
1983 // Indicates that F16 XNNPACK microkernels are available for use.
1984 #define XNN_INIT_FLAG_F16     0x00000008
1985 // Indicates that X16 XNNPACK microkernels are available for use.
1986 #define XNN_INIT_FLAG_X16     0x00000010
1987 // Indicates that QS8 XNNPACK microkernels are available for use.
1988 #define XNN_INIT_FLAG_QS8     0x00000020
1989 // Indicates that QU8 XNNPACK microkernels are available for use.
1990 #define XNN_INIT_FLAG_QU8     0x00000040
1991 // Indicates that U8 XNNPACK microkernels are available for use.
1992 #define XNN_INIT_FLAG_U8      0x00000080
1993 // Indicates that X8 XNNPACK microkernels are available for use.
1994 #define XNN_INIT_FLAG_X8      0x00000100
1995 // Indicates that XX XNNPACK microkernels are available for use.
1996 #define XNN_INIT_FLAG_XX      0x00000200
1997 // Indicates that CHW XNNPACK microkernels are optimized for the host platform.
1998 #define XNN_INIT_FLAG_CHW_OPT 0x00000400
1999 
2000 struct xnn_parameters {
2001   // Bitwise combination of XNN_INIT_FLAG_* flags
2002   uint32_t init_flags;
2003   struct xnn_allocator allocator;
2004   struct {
2005     xnn_univector_ukernel_function copy;
2006   } xx;
2007   struct {
2008     struct gemm_parameters gemm;
2009     struct dwconv_parameters dwconv[XNN_MAX_QS8_DWCONV_UKERNELS];
2010     struct gavgpool_parameters gavgpool;
2011     struct vbinary_parameters vadd;
2012   } qs8;
2013   struct {
2014     struct gemm_parameters gemm;
2015     struct dwconv_parameters dwconv[XNN_MAX_QU8_DWCONV_UKERNELS];
2016     struct avgpool_parameters avgpool;
2017     struct gavgpool_parameters gavgpool;
2018     xnn_vadd_ukernel_function vadd;
2019   } qu8;
2020   struct {
2021     struct maxpool_parameters maxpool;
2022     xnn_univector_ukernel_function clamp;
2023     xnn_u8_lut32norm_ukernel_function lut32norm;
2024     xnn_u8_rmax_ukernel_function rmax;
2025   } u8;
2026   struct {
2027     xnn_x8_lut_ukernel_function lut;
2028     struct zip_parameters zip;
2029   } x8;
2030   struct {
2031     struct gavgpool_parameters gavgpool;
2032     struct gemm_parameters gemm;
2033     struct gemm_parameters gemm2;
2034     struct dwconv_parameters dwconv[XNN_MAX_F16_DWCONV_UKERNELS];
2035     xnn_univector_ukernel_function hswish;
2036     struct vbinary_parameters vadd;
2037     struct vbinary_parameters vmul;
2038     struct vmulcaddc_parameters vmulcaddc;
2039   } f16;
2040   struct {
2041     struct gemm_parameters gemm;
2042     struct gemm_parameters gemm2;
2043     struct dwconv_parameters dwconv[XNN_MAX_F32_DWCONV_UKERNELS];
2044     struct avgpool_parameters avgpool;
2045     struct pavgpool_parameters pavgpool;
2046     struct gavgpool_parameters gavgpool;
2047     struct maxpool_parameters maxpool;
2048     struct argmaxpool_parameters argmaxpool[XNN_MAX_F32_ARGMAXPOOL_UKERNELS];
2049     // Bilinear interpolation (2D).
2050     struct ibilinear_parameters ibilinear;
2051     xnn_univector_ukernel_function abs;
2052     xnn_univector_ukernel_function clamp;
2053     xnn_univector_ukernel_function elu;
2054     xnn_univector_ukernel_function hswish;
2055     xnn_univector_ukernel_function lrelu;
2056     xnn_univector_ukernel_function neg;
2057     xnn_univector_ukernel_function relu;
2058     xnn_univector_ukernel_function rndne;
2059     xnn_univector_ukernel_function rndz;
2060     xnn_univector_ukernel_function rndu;
2061     xnn_univector_ukernel_function rndd;
2062     xnn_univector_ukernel_function sigmoid;
2063     xnn_univector_ukernel_function sqr;
2064     xnn_univector_ukernel_function sqrt;
2065     struct prelu_parameters prelu;
2066     struct vbinary_parameters vadd;
2067     struct vbinary_parameters vdiv;
2068     struct vbinary_parameters vmax;
2069     struct vbinary_parameters vmin;
2070     struct vbinary_parameters vmul;
2071     struct vbinary_parameters vsub;
2072     struct vbinary_parameters vsqrdiff;
2073     struct vmulcaddc_parameters vmulcaddc;
2074     xnn_f32_raddstoreexpminusmax_ukernel_function raddstoreexpminusmax;
2075     xnn_f32_rmax_ukernel_function rmax;
2076     // Sparse Matrix-Dense Matrix Multiplication (NR=1 block).
2077     struct spmm_parameters spmm;
2078     // Sparse Matrix-Dense Matrix Multiplication (NR=2 block).
2079     struct spmm_parameters spmm2;
2080     // Sparse Matrix-Dense Matrix Multiplication (NR=4 block).
2081     struct spmm_parameters spmm4;
2082     // Direct 3x3 stride-2 Convolution with 3 input channels and HWC->CHW layout conversion.
2083     struct conv_hwc2chw_parameters conv_hwc2chw_3x3c3s2;
2084     // Direct 3x3 stride-1 Convolution with padding 1 on left and right in CHW layout.
2085     struct dwconv2d_chw_parameters dwconv2d_chw_3x3;
2086     // Direct 3x3 stride-2 Convolution with padding 1 on left and right in CHW layout.
2087     struct dwconv2d_chw_parameters dwconv2d_chw_3x3s2;
2088     // Direct 5x5 stride-1 Convolution with padding 2 on left and right in CHW layout.
2089     struct dwconv2d_chw_parameters dwconv2d_chw_5x5;
2090     // Direct 5x5 stride-2 Convolution with padding 2 on left and right in CHW layout.
2091     struct dwconv2d_chw_parameters dwconv2d_chw_5x5s2;
2092     // Global Average Pooling in CW layout.
2093     struct gavgpool_cw_parameters gavgpool_cw;
2094     // Bilinear interpolation (2D) in CHW layout.
2095     struct ibilinear_chw_parameters ibilinear_chw;
2096   } f32;
2097   struct {
2098     struct pad_parameters pad;
2099     struct fill_parameters fill;
2100     xnn_unpool_ukernel_function unpool;
2101     struct zip_parameters zip;
2102     // Depth To Space 2D with CHW->HWC layout conversion.
2103     struct depthtospace2d_chw2hwc_parameters depthtospace2d_chw2hwc;
2104   } x32;
2105 };
2106 
2107 #ifdef __cplusplus
2108 extern "C" XNN_INTERNAL struct xnn_parameters xnn_params;
2109 #else
2110 extern XNN_INTERNAL struct xnn_parameters xnn_params;
2111 #endif
2112