• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
5
6# AArch32 assembly
7- name: xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64
8  init: xnn_init_f32_minmax_scalar_params
9  k-block: 2
10  assembly: true
11- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7
12  init: xnn_init_f32_minmax_scalar_params
13  k-block: 2
14  assembly: true
15- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53
16  init: xnn_init_f32_minmax_scalar_params
17  k-block: 4
18  assembly: true
19  pipelined: true
20- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55
21  init: xnn_init_f32_minmax_scalar_params
22  k-block: 4
23  assembly: true
24  pipelined: true
25- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75
26  init: xnn_init_f32_minmax_scalar_params
27  k-block: 4
28  assembly: true
29  pipelined: true
30- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64
31  init: xnn_init_f32_minmax_scalar_params
32  k-block: 2
33  assembly: true
34- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53
35  init: xnn_init_f32_minmax_scalar_params
36  k-block: 4
37  assembly: true
38  pipelined: true
39- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75
40  init: xnn_init_f32_minmax_scalar_params
41  k-block: 4
42  assembly: true
43  pipelined: true
44# AArch64 assembly
45- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53
46  init: xnn_init_f32_minmax_scalar_params
47  k-block: 8
48  assembly: true
49  pipelined: true
50- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75
51  init: xnn_init_f32_minmax_scalar_params
52  k-block: 8
53  assembly: true
54  pipelined: true
55- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64
56  init: xnn_init_f32_minmax_scalar_params
57  k-block: 2
58  assembly: true
59- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53
60  init: xnn_init_f32_minmax_scalar_params
61  k-block: 8
62  assembly: true
63  pipelined: true
64- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75
65  init: xnn_init_f32_minmax_scalar_params
66  k-block: 8
67  assembly: true
68  pipelined: true
69- name: xnn_f32_gemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53
70  init: xnn_init_f32_minmax_scalar_params
71  k-block: 4
72  assembly: true
73  pipelined: true
74- name: xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75
75  init: xnn_init_f32_minmax_scalar_params
76  k-block: 8
77  assembly: true
78- name: xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_ld64
79  init: xnn_init_f32_minmax_scalar_params
80  k-block: 2
81  assembly: true
82- name: xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75
83  init: xnn_init_f32_minmax_scalar_params
84  k-block: 8
85  assembly: true
86- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53
87  init: xnn_init_f32_minmax_scalar_params
88  k-block: 4
89  assembly: true
90  pipelined: true
91- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55
92  init: xnn_init_f32_minmax_scalar_params
93  k-block: 4
94  assembly: true
95  pipelined: true
96- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75
97  init: xnn_init_f32_minmax_scalar_params
98  k-block: 8
99  assembly: true
100  pipelined: true
101- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld64
102  init: xnn_init_f32_minmax_scalar_params
103  k-block: 2
104  assembly: true
105- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld128
106  init: xnn_init_f32_minmax_scalar_params
107  k-block: 4
108  assembly: true
109- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53
110  init: xnn_init_f32_minmax_scalar_params
111  k-block: 4
112  assembly: true
113  pipelined: true
114- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75
115  init: xnn_init_f32_minmax_scalar_params
116  k-block: 8
117  assembly: true
118  pipelined: true
119- name: xnn_f32_gemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53
120  init: xnn_init_f32_minmax_scalar_params
121  k-block: 4
122  assembly: true
123  pipelined: true
124- name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75
125  init: xnn_init_f32_minmax_scalar_params
126  k-block: 8
127  assembly: true
128  pipelined: true
129- name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75
130  init: xnn_init_f32_minmax_scalar_params
131  k-block: 8
132  assembly: true
133  pipelined: true
134- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53
135  init: xnn_init_f32_minmax_scalar_params
136  k-block: 4
137  assembly: true
138  pipelined: true
139- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55
140  init: xnn_init_f32_minmax_scalar_params
141  k-block: 4
142  assembly: true
143  pipelined: true
144- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73
145  init: xnn_init_f32_minmax_scalar_params
146  k-block: 8
147  assembly: true
148  pipelined: true
149- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75
150  init: xnn_init_f32_minmax_scalar_params
151  k-block: 8
152  assembly: true
153  pipelined: true
154- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64
155  init: xnn_init_f32_minmax_scalar_params
156  k-block: 2
157  assembly: true
158- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128
159  init: xnn_init_f32_minmax_scalar_params
160  k-block: 4
161  assembly: true
162- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53
163  init: xnn_init_f32_minmax_scalar_params
164  k-block: 4
165  assembly: true
166  pipelined: true
167- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75
168  init: xnn_init_f32_minmax_scalar_params
169  k-block: 8
170  assembly: true
171  pipelined: true
172# ARM NEON
173- name: xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64
174  init: xnn_init_f32_minmax_scalar_params
175  k-block: 2
176- name: xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64
177  init: xnn_init_f32_minmax_scalar_params
178  k-block: 2
179- name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64
180  init: xnn_init_f32_minmax_scalar_params
181  k-block: 2
182- name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64
183  init: xnn_init_f32_minmax_scalar_params
184  k-block: 2
185  arch:
186    - aarch64
187- name: xnn_f32_gemm_minmax_ukernel_1x8s4__neon
188  init: xnn_init_f32_minmax_scalar_params
189  k-block: 4
190- name: xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma
191  init: xnn_init_f32_minmax_scalar_params
192  k-block: 4
193- name: xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64
194  init: xnn_init_f32_minmax_scalar_params
195  k-block: 2
196- name: xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64
197  init: xnn_init_f32_minmax_scalar_params
198  k-block: 2
199  arch:
200    - aarch64
201- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld64
202  init: xnn_init_f32_minmax_scalar_params
203  k-block: 2
204- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128
205  init: xnn_init_f32_minmax_scalar_params
206  k-block: 4
207- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64
208  init: xnn_init_f32_minmax_scalar_params
209  k-block: 2
210- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128
211  init: xnn_init_f32_minmax_scalar_params
212  k-block: 4
213- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64
214  init: xnn_init_f32_minmax_scalar_params
215  k-block: 2
216- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128
217  init: xnn_init_f32_minmax_scalar_params
218  k-block: 4
219- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64
220  init: xnn_init_f32_minmax_scalar_params
221  k-block: 2
222  arch:
223    - aarch64
224- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128
225  init: xnn_init_f32_minmax_scalar_params
226  k-block: 4
227  arch:
228    - aarch64
229- name: xnn_f32_gemm_minmax_ukernel_4x8s4__neon
230  init: xnn_init_f32_minmax_scalar_params
231  k-block: 4
232- name: xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma
233  init: xnn_init_f32_minmax_scalar_params
234  k-block: 4
235- name: xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64
236  init: xnn_init_f32_minmax_scalar_params
237  k-block: 2
238- name: xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64
239  init: xnn_init_f32_minmax_scalar_params
240  k-block: 2
241  arch:
242    - aarch64
243- name: xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64
244  init: xnn_init_f32_minmax_scalar_params
245  k-block: 2
246- name: xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64
247  init: xnn_init_f32_minmax_scalar_params
248  k-block: 2
249  arch:
250    - aarch64
251- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64
252  init: xnn_init_f32_minmax_scalar_params
253  k-block: 2
254- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128
255  init: xnn_init_f32_minmax_scalar_params
256  k-block: 4
257- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64
258  init: xnn_init_f32_minmax_scalar_params
259  k-block: 2
260- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128
261  init: xnn_init_f32_minmax_scalar_params
262  k-block: 4
263- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64
264  init: xnn_init_f32_minmax_scalar_params
265  k-block: 2
266- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128
267  init: xnn_init_f32_minmax_scalar_params
268  k-block: 4
269- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64
270  init: xnn_init_f32_minmax_scalar_params
271  k-block: 2
272  arch:
273    - aarch64
274- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128
275  init: xnn_init_f32_minmax_scalar_params
276  k-block: 4
277  arch:
278    - aarch64
279- name: xnn_f32_gemm_minmax_ukernel_6x8s4__neon
280  init: xnn_init_f32_minmax_scalar_params
281  k-block: 4
282- name: xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma
283  init: xnn_init_f32_minmax_scalar_params
284  k-block: 4
285- name: xnn_f32_gemm_minmax_ukernel_8x8s4__neon
286  init: xnn_init_f32_minmax_scalar_params
287  k-block: 4
288- name: xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma
289  init: xnn_init_f32_minmax_scalar_params
290  k-block: 4
291# x86 SSE
292- name: xnn_f32_gemm_minmax_ukernel_1x8__sse_dup
293  init: xnn_init_f32_minmax_sse_params
294  k-block: 4
295- name: xnn_f32_gemm_minmax_ukernel_1x8__sse_load1
296  init: xnn_init_f32_minmax_sse_params
297  k-block: 1
298- name: xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup
299  init: xnn_init_f32_minmax_sse_params
300  k-block: 4
301- name: xnn_f32_gemm_minmax_ukernel_1x8s4__sse
302  init: xnn_init_f32_minmax_sse_params
303  k-block: 4
304- name: xnn_f32_gemm_minmax_ukernel_3x8__sse_dup
305  init: xnn_init_f32_minmax_sse_params
306  k-block: 4
307- name: xnn_f32_gemm_minmax_ukernel_3x8__sse_load1
308  init: xnn_init_f32_minmax_sse_params
309  k-block: 1
310- name: xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup
311  init: xnn_init_f32_minmax_sse_params
312  k-block: 4
313- name: xnn_f32_gemm_minmax_ukernel_3x8s4__sse
314  init: xnn_init_f32_minmax_sse_params
315  k-block: 4
316- name: xnn_f32_gemm_minmax_ukernel_4x2c4__sse
317  init: xnn_init_f32_minmax_sse_params
318  k-block: 4
319- name: xnn_f32_gemm_minmax_ukernel_4x8__sse_dup
320  init: xnn_init_f32_minmax_sse_params
321  k-block: 4
322- name: xnn_f32_gemm_minmax_ukernel_4x8__sse_load1
323  init: xnn_init_f32_minmax_sse_params
324  k-block: 1
325- name: xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup
326  init: xnn_init_f32_minmax_sse_params
327  k-block: 4
328- name: xnn_f32_gemm_minmax_ukernel_4x8s4__sse
329  init: xnn_init_f32_minmax_sse_params
330  k-block: 4
331- name: xnn_f32_gemm_minmax_ukernel_5x8__sse_dup
332  init: xnn_init_f32_minmax_sse_params
333  k-block: 4
334- name: xnn_f32_gemm_minmax_ukernel_5x8__sse_load1
335  init: xnn_init_f32_minmax_sse_params
336  k-block: 1
337- name: xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup
338  init: xnn_init_f32_minmax_sse_params
339  k-block: 4
340- name: xnn_f32_gemm_minmax_ukernel_5x8s4__sse
341  init: xnn_init_f32_minmax_sse_params
342  k-block: 4
343# x86 AVX
344- name: xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast
345  init: xnn_init_f32_minmax_avx_params
346  k-block: 1
347- name: xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast
348  init: xnn_init_f32_minmax_avx_params
349  k-block: 1
350- name: xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast
351  init: xnn_init_f32_minmax_avx_params
352  k-block: 1
353- name: xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast
354  init: xnn_init_f32_minmax_avx_params
355  k-block: 1
356- name: xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast
357  init: xnn_init_f32_minmax_avx_params
358  k-block: 1
359- name: xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast
360  init: xnn_init_f32_minmax_avx_params
361  k-block: 1
362- name: xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast
363  init: xnn_init_f32_minmax_avx_params
364  k-block: 1
365- name: xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast
366  init: xnn_init_f32_minmax_avx_params
367  k-block: 1
368- name: xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast
369  init: xnn_init_f32_minmax_avx_params
370  k-block: 1
371# x86 FMA3
372- name: xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast
373  init: xnn_init_f32_minmax_avx_params
374  k-block: 1
375- name: xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast
376  init: xnn_init_f32_minmax_avx_params
377  k-block: 1
378- name: xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast
379  init: xnn_init_f32_minmax_avx_params
380  k-block: 4
381- name: xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast
382  init: xnn_init_f32_minmax_avx_params
383  k-block: 1
384- name: xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast
385  init: xnn_init_f32_minmax_avx_params
386  k-block: 4
387- name: xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast
388  init: xnn_init_f32_minmax_avx_params
389  k-block: 1
390- name: xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast
391  init: xnn_init_f32_minmax_avx_params
392  k-block: 1
393- name: xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast
394  init: xnn_init_f32_minmax_avx_params
395  k-block: 4
396- name: xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast
397  init: xnn_init_f32_minmax_avx_params
398  k-block: 1
399- name: xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast
400  init: xnn_init_f32_minmax_avx_params
401  k-block: 1
402- name: xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast
403  init: xnn_init_f32_minmax_avx_params
404  k-block: 4
405- name: xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast
406  init: xnn_init_f32_minmax_avx_params
407  k-block: 1
408- name: xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast
409  init: xnn_init_f32_minmax_avx_params
410  k-block: 1
411- name: xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast
412  init: xnn_init_f32_minmax_avx_params
413  k-block: 1
414# x86 AVX512
415- name: xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast
416  init: xnn_init_f32_minmax_scalar_params
417  k-block: 1
418- name: xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast
419  init: xnn_init_f32_minmax_scalar_params
420  k-block: 1
421- name: xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast
422  init: xnn_init_f32_minmax_scalar_params
423  k-block: 1
424- name: xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast
425  init: xnn_init_f32_minmax_scalar_params
426  k-block: 1
427- name: xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast
428  init: xnn_init_f32_minmax_scalar_params
429  k-block: 1
430- name: xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast
431  init: xnn_init_f32_minmax_scalar_params
432  k-block: 1
433# WAsm SIMD
434- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat
435  init: xnn_init_f32_minmax_wasmsimd_params
436  k-block: 1
437- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat
438  init: xnn_init_f32_minmax_wasmsimd_params
439  k-block: 4
440- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat
441  init: xnn_init_f32_minmax_wasmsimd_params
442  k-block: 1
443- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat
444  init: xnn_init_f32_minmax_wasmsimd_params
445  k-block: 4
446- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm
447  init: xnn_init_f32_minmax_wasmsimd_params
448  k-block: 4
449- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86
450  init: xnn_init_f32_minmax_wasmsimd_params
451  k-block: 4
452- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat
453  init: xnn_init_f32_minmax_wasmsimd_params
454  k-block: 1
455- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat
456  init: xnn_init_f32_minmax_wasmsimd_params
457  k-block: 4
458- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat
459  init: xnn_init_f32_minmax_wasmsimd_params
460  k-block: 1
461- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat
462  init: xnn_init_f32_minmax_wasmsimd_params
463  k-block: 4
464- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm
465  init: xnn_init_f32_minmax_wasmsimd_params
466  k-block: 4
467- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86
468  init: xnn_init_f32_minmax_wasmsimd_params
469  k-block: 4
470- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm
471  init: xnn_init_f32_minmax_wasmsimd_params
472  k-block: 4
473- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86
474  init: xnn_init_f32_minmax_wasmsimd_params
475  k-block: 4
476- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat
477  init: xnn_init_f32_minmax_wasmsimd_params
478  k-block: 1
479- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_splat
480  init: xnn_init_f32_minmax_wasmsimd_params
481  k-block: 4
482- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat
483  init: xnn_init_f32_minmax_wasmsimd_params
484  k-block: 1
485- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat
486  init: xnn_init_f32_minmax_wasmsimd_params
487  k-block: 4
488- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_arm
489  init: xnn_init_f32_minmax_wasmsimd_params
490  k-block: 4
491- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_x86
492  init: xnn_init_f32_minmax_wasmsimd_params
493  k-block: 4
494- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat
495  init: xnn_init_f32_minmax_wasmsimd_params
496  k-block: 1
497- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat
498  init: xnn_init_f32_minmax_wasmsimd_params
499  k-block: 4
500- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat
501  init: xnn_init_f32_minmax_wasmsimd_params
502  k-block: 1
503- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat
504  init: xnn_init_f32_minmax_wasmsimd_params
505  k-block: 4
506- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm
507  init: xnn_init_f32_minmax_wasmsimd_params
508  k-block: 4
509- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86
510  init: xnn_init_f32_minmax_wasmsimd_params
511  k-block: 4
512- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat
513  init: xnn_init_f32_minmax_wasmsimd_params
514  k-block: 1
515- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat
516  init: xnn_init_f32_minmax_wasmsimd_params
517  k-block: 4
518- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat
519  init: xnn_init_f32_minmax_wasmsimd_params
520  k-block: 1
521- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat
522  init: xnn_init_f32_minmax_wasmsimd_params
523  k-block: 4
524- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm
525  init: xnn_init_f32_minmax_wasmsimd_params
526  k-block: 4
527- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86
528  init: xnn_init_f32_minmax_wasmsimd_params
529  k-block: 4
530# WAsm Relaxed SIMD
531- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat
532  init: xnn_init_f32_minmax_wasmsimd_params
533  k-block: 1
534- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat
535  init: xnn_init_f32_minmax_wasmsimd_params
536  k-block: 4
537- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat
538  init: xnn_init_f32_minmax_wasmsimd_params
539  k-block: 1
540- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat
541  init: xnn_init_f32_minmax_wasmsimd_params
542  k-block: 4
543- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd
544  init: xnn_init_f32_minmax_wasmsimd_params
545  k-block: 4
546- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma
547  init: xnn_init_f32_minmax_wasmsimd_params
548  k-block: 4
549- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat
550  init: xnn_init_f32_minmax_wasmsimd_params
551  k-block: 1
552- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat
553  init: xnn_init_f32_minmax_wasmsimd_params
554  k-block: 4
555- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat
556  init: xnn_init_f32_minmax_wasmsimd_params
557  k-block: 1
558- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat
559  init: xnn_init_f32_minmax_wasmsimd_params
560  k-block: 4
561- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd
562  init: xnn_init_f32_minmax_wasmsimd_params
563  k-block: 4
564- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma
565  init: xnn_init_f32_minmax_wasmsimd_params
566  k-block: 4
567- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd
568  init: xnn_init_f32_minmax_wasmsimd_params
569  k-block: 4
570- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma
571  init: xnn_init_f32_minmax_wasmsimd_params
572  k-block: 4
573- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat
574  init: xnn_init_f32_minmax_wasmsimd_params
575  k-block: 1
576- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat
577  init: xnn_init_f32_minmax_wasmsimd_params
578  k-block: 4
579- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat
580  init: xnn_init_f32_minmax_wasmsimd_params
581  k-block: 1
582- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat
583  init: xnn_init_f32_minmax_wasmsimd_params
584  k-block: 4
585- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd
586  init: xnn_init_f32_minmax_wasmsimd_params
587  k-block: 4
588- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma
589  init: xnn_init_f32_minmax_wasmsimd_params
590  k-block: 4
591- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat
592  init: xnn_init_f32_minmax_wasmsimd_params
593  k-block: 1
594- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat
595  init: xnn_init_f32_minmax_wasmsimd_params
596  k-block: 4
597- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat
598  init: xnn_init_f32_minmax_wasmsimd_params
599  k-block: 1
600- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat
601  init: xnn_init_f32_minmax_wasmsimd_params
602  k-block: 4
603- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd
604  init: xnn_init_f32_minmax_wasmsimd_params
605  k-block: 4
606- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma
607  init: xnn_init_f32_minmax_wasmsimd_params
608  k-block: 4
609- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat
610  init: xnn_init_f32_minmax_wasmsimd_params
611  k-block: 1
612- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat
613  init: xnn_init_f32_minmax_wasmsimd_params
614  k-block: 4
615- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat
616  init: xnn_init_f32_minmax_wasmsimd_params
617  k-block: 1
618- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat
619  init: xnn_init_f32_minmax_wasmsimd_params
620  k-block: 4
621- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd
622  init: xnn_init_f32_minmax_wasmsimd_params
623  k-block: 4
624- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma
625  init: xnn_init_f32_minmax_wasmsimd_params
626  k-block: 4
627# WAsm
628- name: xnn_f32_gemm_minmax_ukernel_1x4__wasm
629  init: xnn_init_f32_minmax_scalar_params
630  k-block: 1
631- name: xnn_f32_gemm_minmax_ukernel_2x4__wasm
632  init: xnn_init_f32_minmax_scalar_params
633  k-block: 1
634- name: xnn_f32_gemm_minmax_ukernel_4x2__wasm
635  init: xnn_init_f32_minmax_scalar_params
636  k-block: 1
637- name: xnn_f32_gemm_minmax_ukernel_4x4__wasm
638  init: xnn_init_f32_minmax_scalar_params
639  k-block: 1
640# Scalar
641- name: xnn_f32_gemm_minmax_ukernel_1x4__scalar
642  init: xnn_init_f32_minmax_scalar_params
643  k-block: 1
644- name: xnn_f32_gemm_minmax_ukernel_2x4__scalar
645  init: xnn_init_f32_minmax_scalar_params
646  k-block: 1
647- name: xnn_f32_gemm_minmax_ukernel_4x2__scalar
648  init: xnn_init_f32_minmax_scalar_params
649  k-block: 1
650- name: xnn_f32_gemm_minmax_ukernel_4x4__scalar
651  init: xnn_init_f32_minmax_scalar_params
652  k-block: 1
653# AArch32 JIT assembly
654- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7
655  init: xnn_init_f32_minmax_scalar_params
656  k-block: 2
657  assembly: true
658- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a53
659  init: xnn_init_f32_minmax_scalar_params
660  k-block: 4
661  pipelined: true
662  assembly: true
663- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55
664  init: xnn_init_f32_minmax_scalar_params
665  k-block: 4
666  pipelined: true
667- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75
668  init: xnn_init_f32_minmax_scalar_params
669  k-block: 4
670  pipelined: true
671  assembly: true
672- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_ld64
673  init: xnn_init_f32_minmax_scalar_params
674  k-block: 2
675  assembly: true
676- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75
677  init: xnn_init_f32_minmax_scalar_params
678  k-block: 4
679  pipelined: true
680  assembly: true
681# AArch64 JIT assembly
682- name: xnn_generate_f32_gemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75
683  init: xnn_init_f32_minmax_scalar_params
684  k-block: 8
685  pipelined: true
686- name: xnn_generate_f32_gemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75
687  init: xnn_init_f32_minmax_scalar_params
688  k-block: 8
689  pipelined: true
690- name: xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75
691  init: xnn_init_f32_minmax_scalar_params
692  k-block: 8
693  pipelined: true
694- name: xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75
695  init: xnn_init_f32_minmax_scalar_params
696  k-block: 8
697  pipelined: true
698- name: xnn_generate_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75
699  init: xnn_init_f32_minmax_scalar_params
700  k-block: 8
701  pipelined: true
702- name: xnn_generate_f32_gemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75
703  init: xnn_init_f32_minmax_scalar_params
704  k-block: 8
705  pipelined: true
706- name: xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_ld128
707  init: xnn_init_f32_minmax_scalar_params
708  k-block: 4
709