1# Copyright 2019 Google LLC 2# 3# This source code is licensed under the BSD-style license found in the 4# LICENSE file in the root directory of this source tree. 5- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64 6 k-block: 2 7 assembly: true 8- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53 9 k-block: 8 10 pipelined: true 11 assembly: true 12- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57 13 k-block: 8 14 pipelined: true 15 assembly: true 16- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75 17 k-block: 8 18 pipelined: true 19 assembly: true 20- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53 21 k-block: 4 22 pipelined: true 23 assembly: true 24- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55 25 k-block: 4 26 pipelined: true 27 assembly: true 28- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a57 29 k-block: 8 30 pipelined: true 31 assembly: true 32- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75 33 k-block: 8 34 pipelined: true 35 assembly: true 36- name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a57 37 k-block: 8 38 pipelined: true 39 assembly: true 40- name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75 41 k-block: 8 42 pipelined: true 43 assembly: true 44- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53 45 k-block: 4 46 pipelined: true 47 assembly: true 48- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55 49 k-block: 4 50 pipelined: true 51 assembly: true 52- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73 53 k-block: 8 54 pipelined: true 55 assembly: true 56- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a57 57 k-block: 8 58 pipelined: true 59 assembly: true 60- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75 61 k-block: 8 62 pipelined: true 63 assembly: true 64- name: xnn_f32_gemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53 65 k-block: 4 66 pipelined: true 67 assembly: true 68- name: xnn_f32_gemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53 69 k-block: 4 70 pipelined: true 71 assembly: true 72- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld64 73 k-block: 2 74 assembly: true 75- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53 76 k-block: 4 77 pipelined: true 78 assembly: true 79- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55 80 k-block: 4 81 pipelined: true 82 assembly: true 83- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75 84 k-block: 4 85 pipelined: true 86 assembly: true 87- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_pld_cortex_a75 88 k-block: 4 89 pipelined: true 90 assembly: true 91- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64 92 k-block: 2 93 assembly: true 94- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7 95 k-block: 2 96 assembly: true 97- name: xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64 98 k-block: 2 99 assembly: true 100- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld128 101 k-block: 4 102 assembly: true 103- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64 104 k-block: 2 105 assembly: true 106- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128 107 k-block: 4 108 assembly: true 109- name: xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64 110 k-block: 2 111- name: xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64 112 k-block: 2 113- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64 114 k-block: 2 115- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128 116 k-block: 4 117- name: xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64 118 k-block: 2 119- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64 120 k-block: 2 121- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128 122 k-block: 4 123- name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64 124 k-block: 2 125 arch: 126 - aarch64 127- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64 128 k-block: 2 129 arch: 130 - aarch64 131- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128 132 k-block: 4 133 arch: 134 - aarch64 135- name: xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64 136 k-block: 2 137 arch: 138 - aarch64 139- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64 140 k-block: 2 141 arch: 142 - aarch64 143- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128 144 k-block: 4 145 arch: 146 - aarch64 147- name: xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64 148 k-block: 2 149- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld64 150 k-block: 2 151- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128 152 k-block: 4 153- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64 154 k-block: 2 155- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128 156 k-block: 4 157- name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64 158 k-block: 2 159- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64 160 k-block: 2 161- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128 162 k-block: 4 163- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64 164 k-block: 2 165- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128 166 k-block: 4 167- name: xnn_f32_gemm_minmax_ukernel_1x8s4__neon 168 k-block: 4 169- name: xnn_f32_gemm_minmax_ukernel_4x8s4__neon 170 k-block: 4 171- name: xnn_f32_gemm_minmax_ukernel_6x8s4__neon 172 k-block: 4 173- name: xnn_f32_gemm_minmax_ukernel_8x8s4__neon 174 k-block: 4 175- name: xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma 176 k-block: 4 177- name: xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma 178 k-block: 4 179- name: xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma 180 k-block: 4 181- name: xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma 182 k-block: 4 183- name: xnn_f32_gemm_minmax_ukernel_1x8__sse_load1 184 k-block: 1 185- name: xnn_f32_gemm_minmax_ukernel_3x8__sse_load1 186 k-block: 1 187- name: xnn_f32_gemm_minmax_ukernel_4x8__sse_load1 188 k-block: 1 189- name: xnn_f32_gemm_minmax_ukernel_5x8__sse_load1 190 k-block: 1 191- name: xnn_f32_gemm_minmax_ukernel_1x8__sse_dup 192 k-block: 4 193- name: xnn_f32_gemm_minmax_ukernel_3x8__sse_dup 194 k-block: 4 195- name: xnn_f32_gemm_minmax_ukernel_4x8__sse_dup 196 k-block: 4 197- name: xnn_f32_gemm_minmax_ukernel_5x8__sse_dup 198 k-block: 4 199- name: xnn_f32_gemm_minmax_ukernel_1x8s4__sse 200 k-block: 4 201- name: xnn_f32_gemm_minmax_ukernel_3x8s4__sse 202 k-block: 4 203- name: xnn_f32_gemm_minmax_ukernel_4x8s4__sse 204 k-block: 4 205- name: xnn_f32_gemm_minmax_ukernel_5x8s4__sse 206 k-block: 4 207- name: xnn_f32_gemm_minmax_ukernel_4x2c4__sse 208 k-block: 4 209- name: xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup 210 k-block: 4 211- name: xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup 212 k-block: 4 213- name: xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup 214 k-block: 4 215- name: xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup 216 k-block: 4 217- name: xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast 218 k-block: 1 219- name: xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast 220 k-block: 1 221- name: xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast 222 k-block: 1 223- name: xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast 224 k-block: 1 225- name: xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast 226 k-block: 1 227- name: xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast 228 k-block: 1 229- name: xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast 230 k-block: 1 231- name: xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast 232 k-block: 1 233- name: xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast 234 k-block: 1 235- name: xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast 236 k-block: 1 237- name: xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast 238 k-block: 1 239- name: xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast 240 k-block: 1 241- name: xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast 242 k-block: 1 243- name: xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast 244 k-block: 1 245- name: xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast 246 k-block: 1 247- name: xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast 248 k-block: 1 249- name: xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast 250 k-block: 1 251- name: xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast 252 k-block: 1 253- name: xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast 254 k-block: 1 255- name: xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast 256 k-block: 4 257- name: xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast 258 k-block: 4 259- name: xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast 260 k-block: 4 261- name: xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast 262 k-block: 4 263- name: xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast 264 k-block: 1 265- name: xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast 266 k-block: 1 267- name: xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast 268 k-block: 1 269- name: xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast 270 k-block: 1 271- name: xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast 272 k-block: 1 273- name: xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast 274 k-block: 1 275- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat 276 k-block: 1 277- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat 278 k-block: 1 279- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat 280 k-block: 1 281- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat 282 k-block: 1 283- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat 284 k-block: 1 285- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat 286 k-block: 1 287- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat 288 k-block: 1 289- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat 290 k-block: 1 291- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat 292 k-block: 1 293- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat 294 k-block: 1 295- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat 296 k-block: 4 297- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat 298 k-block: 4 299- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_splat 300 k-block: 4 301- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat 302 k-block: 4 303- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat 304 k-block: 4 305- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat 306 k-block: 4 307- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat 308 k-block: 4 309- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat 310 k-block: 4 311- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat 312 k-block: 4 313- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat 314 k-block: 4 315- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm 316 k-block: 4 317- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm 318 k-block: 4 319- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_arm 320 k-block: 4 321- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm 322 k-block: 4 323- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm 324 k-block: 4 325- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86 326 k-block: 4 327- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86 328 k-block: 4 329- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_x86 330 k-block: 4 331- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86 332 k-block: 4 333- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86 334 k-block: 4 335- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm 336 k-block: 4 337- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86 338 k-block: 4 339- name: xnn_f32_gemm_minmax_ukernel_1x4__wasm 340 k-block: 1 341- name: xnn_f32_gemm_minmax_ukernel_2x4__wasm 342 k-block: 1 343- name: xnn_f32_gemm_minmax_ukernel_4x4__wasm 344 k-block: 1 345- name: xnn_f32_gemm_minmax_ukernel_4x2__wasm 346 k-block: 1 347- name: xnn_f32_gemm_minmax_ukernel_1x4__scalar 348 k-block: 1 349- name: xnn_f32_gemm_minmax_ukernel_2x4__scalar 350 k-block: 1 351- name: xnn_f32_gemm_minmax_ukernel_4x4__scalar 352 k-block: 1 353- name: xnn_f32_gemm_minmax_ukernel_4x2__scalar 354 k-block: 1 355