• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
5- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64
6  k-block: 2
7  assembly: true
8- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53
9  k-block: 8
10  pipelined: true
11  assembly: true
12- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57
13  k-block: 8
14  pipelined: true
15  assembly: true
16- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75
17  k-block: 8
18  pipelined: true
19  assembly: true
20- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53
21  k-block: 4
22  pipelined: true
23  assembly: true
24- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55
25  k-block: 4
26  pipelined: true
27  assembly: true
28- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a57
29  k-block: 8
30  pipelined: true
31  assembly: true
32- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75
33  k-block: 8
34  pipelined: true
35  assembly: true
36- name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a57
37  k-block: 8
38  pipelined: true
39  assembly: true
40- name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75
41  k-block: 8
42  pipelined: true
43  assembly: true
44- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53
45  k-block: 4
46  pipelined: true
47  assembly: true
48- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55
49  k-block: 4
50  pipelined: true
51  assembly: true
52- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73
53  k-block: 8
54  pipelined: true
55  assembly: true
56- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a57
57  k-block: 8
58  pipelined: true
59  assembly: true
60- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75
61  k-block: 8
62  pipelined: true
63  assembly: true
64- name: xnn_f32_gemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53
65  k-block: 4
66  pipelined: true
67  assembly: true
68- name: xnn_f32_gemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53
69  k-block: 4
70  pipelined: true
71  assembly: true
72- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld64
73  k-block: 2
74  assembly: true
75- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53
76  k-block: 4
77  pipelined: true
78  assembly: true
79- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55
80  k-block: 4
81  pipelined: true
82  assembly: true
83- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75
84  k-block: 4
85  pipelined: true
86  assembly: true
87- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_pld_cortex_a75
88  k-block: 4
89  pipelined: true
90  assembly: true
91- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64
92  k-block: 2
93  assembly: true
94- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7
95  k-block: 2
96  assembly: true
97- name: xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64
98  k-block: 2
99  assembly: true
100- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld128
101  k-block: 4
102  assembly: true
103- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64
104  k-block: 2
105  assembly: true
106- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128
107  k-block: 4
108  assembly: true
109- name: xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64
110  k-block: 2
111- name: xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64
112  k-block: 2
113- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64
114  k-block: 2
115- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128
116  k-block: 4
117- name: xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64
118  k-block: 2
119- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64
120  k-block: 2
121- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128
122  k-block: 4
123- name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64
124  k-block: 2
125  arch:
126    - aarch64
127- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64
128  k-block: 2
129  arch:
130    - aarch64
131- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128
132  k-block: 4
133  arch:
134    - aarch64
135- name: xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64
136  k-block: 2
137  arch:
138    - aarch64
139- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64
140  k-block: 2
141  arch:
142    - aarch64
143- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128
144  k-block: 4
145  arch:
146    - aarch64
147- name: xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64
148  k-block: 2
149- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld64
150  k-block: 2
151- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128
152  k-block: 4
153- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64
154  k-block: 2
155- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128
156  k-block: 4
157- name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64
158  k-block: 2
159- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64
160  k-block: 2
161- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128
162  k-block: 4
163- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64
164  k-block: 2
165- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128
166  k-block: 4
167- name: xnn_f32_gemm_minmax_ukernel_1x8s4__neon
168  k-block: 4
169- name: xnn_f32_gemm_minmax_ukernel_4x8s4__neon
170  k-block: 4
171- name: xnn_f32_gemm_minmax_ukernel_6x8s4__neon
172  k-block: 4
173- name: xnn_f32_gemm_minmax_ukernel_8x8s4__neon
174  k-block: 4
175- name: xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma
176  k-block: 4
177- name: xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma
178  k-block: 4
179- name: xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma
180  k-block: 4
181- name: xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma
182  k-block: 4
183- name: xnn_f32_gemm_minmax_ukernel_1x8__sse_load1
184  k-block: 1
185- name: xnn_f32_gemm_minmax_ukernel_3x8__sse_load1
186  k-block: 1
187- name: xnn_f32_gemm_minmax_ukernel_4x8__sse_load1
188  k-block: 1
189- name: xnn_f32_gemm_minmax_ukernel_5x8__sse_load1
190  k-block: 1
191- name: xnn_f32_gemm_minmax_ukernel_1x8__sse_dup
192  k-block: 4
193- name: xnn_f32_gemm_minmax_ukernel_3x8__sse_dup
194  k-block: 4
195- name: xnn_f32_gemm_minmax_ukernel_4x8__sse_dup
196  k-block: 4
197- name: xnn_f32_gemm_minmax_ukernel_5x8__sse_dup
198  k-block: 4
199- name: xnn_f32_gemm_minmax_ukernel_1x8s4__sse
200  k-block: 4
201- name: xnn_f32_gemm_minmax_ukernel_3x8s4__sse
202  k-block: 4
203- name: xnn_f32_gemm_minmax_ukernel_4x8s4__sse
204  k-block: 4
205- name: xnn_f32_gemm_minmax_ukernel_5x8s4__sse
206  k-block: 4
207- name: xnn_f32_gemm_minmax_ukernel_4x2c4__sse
208  k-block: 4
209- name: xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup
210  k-block: 4
211- name: xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup
212  k-block: 4
213- name: xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup
214  k-block: 4
215- name: xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup
216  k-block: 4
217- name: xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast
218  k-block: 1
219- name: xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast
220  k-block: 1
221- name: xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast
222  k-block: 1
223- name: xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast
224  k-block: 1
225- name: xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast
226  k-block: 1
227- name: xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast
228  k-block: 1
229- name: xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast
230  k-block: 1
231- name: xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast
232  k-block: 1
233- name: xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast
234  k-block: 1
235- name: xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast
236  k-block: 1
237- name: xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast
238  k-block: 1
239- name: xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast
240  k-block: 1
241- name: xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast
242  k-block: 1
243- name: xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast
244  k-block: 1
245- name: xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast
246  k-block: 1
247- name: xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast
248  k-block: 1
249- name: xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast
250  k-block: 1
251- name: xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast
252  k-block: 1
253- name: xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast
254  k-block: 1
255- name: xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast
256  k-block: 4
257- name: xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast
258  k-block: 4
259- name: xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast
260  k-block: 4
261- name: xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast
262  k-block: 4
263- name: xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast
264  k-block: 1
265- name: xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast
266  k-block: 1
267- name: xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast
268  k-block: 1
269- name: xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast
270  k-block: 1
271- name: xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast
272  k-block: 1
273- name: xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast
274  k-block: 1
275- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat
276  k-block: 1
277- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat
278  k-block: 1
279- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat
280  k-block: 1
281- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat
282  k-block: 1
283- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat
284  k-block: 1
285- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat
286  k-block: 1
287- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat
288  k-block: 1
289- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat
290  k-block: 1
291- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat
292  k-block: 1
293- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat
294  k-block: 1
295- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat
296  k-block: 4
297- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat
298  k-block: 4
299- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_splat
300  k-block: 4
301- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat
302  k-block: 4
303- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat
304  k-block: 4
305- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat
306  k-block: 4
307- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat
308  k-block: 4
309- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat
310  k-block: 4
311- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat
312  k-block: 4
313- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat
314  k-block: 4
315- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm
316  k-block: 4
317- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm
318  k-block: 4
319- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_arm
320  k-block: 4
321- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm
322  k-block: 4
323- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm
324  k-block: 4
325- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86
326  k-block: 4
327- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86
328  k-block: 4
329- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_x86
330  k-block: 4
331- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86
332  k-block: 4
333- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86
334  k-block: 4
335- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm
336  k-block: 4
337- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86
338  k-block: 4
339- name: xnn_f32_gemm_minmax_ukernel_1x4__wasm
340  k-block: 1
341- name: xnn_f32_gemm_minmax_ukernel_2x4__wasm
342  k-block: 1
343- name: xnn_f32_gemm_minmax_ukernel_4x4__wasm
344  k-block: 1
345- name: xnn_f32_gemm_minmax_ukernel_4x2__wasm
346  k-block: 1
347- name: xnn_f32_gemm_minmax_ukernel_1x4__scalar
348  k-block: 1
349- name: xnn_f32_gemm_minmax_ukernel_2x4__scalar
350  k-block: 1
351- name: xnn_f32_gemm_minmax_ukernel_4x4__scalar
352  k-block: 1
353- name: xnn_f32_gemm_minmax_ukernel_4x2__scalar
354  k-block: 1
355