1# Copyright 2019 Google LLC 2# 3# This source code is licensed under the BSD-style license found in the 4# LICENSE file in the root directory of this source tree. 5 6# AArch32 assembly 7- name: xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64 8 init: xnn_init_f32_minmax_scalar_params 9 k-block: 2 10 assembly: true 11- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7 12 init: xnn_init_f32_minmax_scalar_params 13 k-block: 2 14 assembly: true 15- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53 16 init: xnn_init_f32_minmax_scalar_params 17 k-block: 4 18 assembly: true 19 pipelined: true 20- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55 21 init: xnn_init_f32_minmax_scalar_params 22 k-block: 4 23 assembly: true 24 pipelined: true 25- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75 26 init: xnn_init_f32_minmax_scalar_params 27 k-block: 4 28 assembly: true 29 pipelined: true 30- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64 31 init: xnn_init_f32_minmax_scalar_params 32 k-block: 2 33 assembly: true 34- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53 35 init: xnn_init_f32_minmax_scalar_params 36 k-block: 4 37 assembly: true 38 pipelined: true 39- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75 40 init: xnn_init_f32_minmax_scalar_params 41 k-block: 4 42 assembly: true 43 pipelined: true 44# AArch64 assembly 45- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53 46 init: xnn_init_f32_minmax_scalar_params 47 k-block: 8 48 assembly: true 49 pipelined: true 50- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75 51 init: xnn_init_f32_minmax_scalar_params 52 k-block: 8 53 assembly: true 54 pipelined: true 55- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64 56 init: xnn_init_f32_minmax_scalar_params 57 k-block: 2 58 assembly: true 59- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53 60 init: xnn_init_f32_minmax_scalar_params 61 k-block: 8 62 assembly: true 63 pipelined: true 64- name: xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75 65 init: xnn_init_f32_minmax_scalar_params 66 k-block: 8 67 assembly: true 68 pipelined: true 69- name: xnn_f32_gemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53 70 init: xnn_init_f32_minmax_scalar_params 71 k-block: 4 72 assembly: true 73 pipelined: true 74- name: xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75 75 init: xnn_init_f32_minmax_scalar_params 76 k-block: 8 77 assembly: true 78- name: xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_ld64 79 init: xnn_init_f32_minmax_scalar_params 80 k-block: 2 81 assembly: true 82- name: xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75 83 init: xnn_init_f32_minmax_scalar_params 84 k-block: 8 85 assembly: true 86- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53 87 init: xnn_init_f32_minmax_scalar_params 88 k-block: 4 89 assembly: true 90 pipelined: true 91- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55 92 init: xnn_init_f32_minmax_scalar_params 93 k-block: 4 94 assembly: true 95 pipelined: true 96- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75 97 init: xnn_init_f32_minmax_scalar_params 98 k-block: 8 99 assembly: true 100 pipelined: true 101- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld64 102 init: xnn_init_f32_minmax_scalar_params 103 k-block: 2 104 assembly: true 105- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld128 106 init: xnn_init_f32_minmax_scalar_params 107 k-block: 4 108 assembly: true 109- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53 110 init: xnn_init_f32_minmax_scalar_params 111 k-block: 4 112 assembly: true 113 pipelined: true 114- name: xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75 115 init: xnn_init_f32_minmax_scalar_params 116 k-block: 8 117 assembly: true 118 pipelined: true 119- name: xnn_f32_gemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53 120 init: xnn_init_f32_minmax_scalar_params 121 k-block: 4 122 assembly: true 123 pipelined: true 124- name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75 125 init: xnn_init_f32_minmax_scalar_params 126 k-block: 8 127 assembly: true 128 pipelined: true 129- name: xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75 130 init: xnn_init_f32_minmax_scalar_params 131 k-block: 8 132 assembly: true 133 pipelined: true 134- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53 135 init: xnn_init_f32_minmax_scalar_params 136 k-block: 4 137 assembly: true 138 pipelined: true 139- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55 140 init: xnn_init_f32_minmax_scalar_params 141 k-block: 4 142 assembly: true 143 pipelined: true 144- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73 145 init: xnn_init_f32_minmax_scalar_params 146 k-block: 8 147 assembly: true 148 pipelined: true 149- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75 150 init: xnn_init_f32_minmax_scalar_params 151 k-block: 8 152 assembly: true 153 pipelined: true 154- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64 155 init: xnn_init_f32_minmax_scalar_params 156 k-block: 2 157 assembly: true 158- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128 159 init: xnn_init_f32_minmax_scalar_params 160 k-block: 4 161 assembly: true 162- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53 163 init: xnn_init_f32_minmax_scalar_params 164 k-block: 4 165 assembly: true 166 pipelined: true 167- name: xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75 168 init: xnn_init_f32_minmax_scalar_params 169 k-block: 8 170 assembly: true 171 pipelined: true 172# ARM NEON 173- name: xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64 174 init: xnn_init_f32_minmax_scalar_params 175 k-block: 2 176- name: xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64 177 init: xnn_init_f32_minmax_scalar_params 178 k-block: 2 179- name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64 180 init: xnn_init_f32_minmax_scalar_params 181 k-block: 2 182- name: xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64 183 init: xnn_init_f32_minmax_scalar_params 184 k-block: 2 185 arch: 186 - aarch64 187- name: xnn_f32_gemm_minmax_ukernel_1x8s4__neon 188 init: xnn_init_f32_minmax_scalar_params 189 k-block: 4 190- name: xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma 191 init: xnn_init_f32_minmax_scalar_params 192 k-block: 4 193- name: xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64 194 init: xnn_init_f32_minmax_scalar_params 195 k-block: 2 196- name: xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64 197 init: xnn_init_f32_minmax_scalar_params 198 k-block: 2 199 arch: 200 - aarch64 201- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld64 202 init: xnn_init_f32_minmax_scalar_params 203 k-block: 2 204- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128 205 init: xnn_init_f32_minmax_scalar_params 206 k-block: 4 207- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64 208 init: xnn_init_f32_minmax_scalar_params 209 k-block: 2 210- name: xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128 211 init: xnn_init_f32_minmax_scalar_params 212 k-block: 4 213- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64 214 init: xnn_init_f32_minmax_scalar_params 215 k-block: 2 216- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128 217 init: xnn_init_f32_minmax_scalar_params 218 k-block: 4 219- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64 220 init: xnn_init_f32_minmax_scalar_params 221 k-block: 2 222 arch: 223 - aarch64 224- name: xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128 225 init: xnn_init_f32_minmax_scalar_params 226 k-block: 4 227 arch: 228 - aarch64 229- name: xnn_f32_gemm_minmax_ukernel_4x8s4__neon 230 init: xnn_init_f32_minmax_scalar_params 231 k-block: 4 232- name: xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma 233 init: xnn_init_f32_minmax_scalar_params 234 k-block: 4 235- name: xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64 236 init: xnn_init_f32_minmax_scalar_params 237 k-block: 2 238- name: xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64 239 init: xnn_init_f32_minmax_scalar_params 240 k-block: 2 241 arch: 242 - aarch64 243- name: xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64 244 init: xnn_init_f32_minmax_scalar_params 245 k-block: 2 246- name: xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64 247 init: xnn_init_f32_minmax_scalar_params 248 k-block: 2 249 arch: 250 - aarch64 251- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64 252 init: xnn_init_f32_minmax_scalar_params 253 k-block: 2 254- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128 255 init: xnn_init_f32_minmax_scalar_params 256 k-block: 4 257- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64 258 init: xnn_init_f32_minmax_scalar_params 259 k-block: 2 260- name: xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128 261 init: xnn_init_f32_minmax_scalar_params 262 k-block: 4 263- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64 264 init: xnn_init_f32_minmax_scalar_params 265 k-block: 2 266- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128 267 init: xnn_init_f32_minmax_scalar_params 268 k-block: 4 269- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64 270 init: xnn_init_f32_minmax_scalar_params 271 k-block: 2 272 arch: 273 - aarch64 274- name: xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128 275 init: xnn_init_f32_minmax_scalar_params 276 k-block: 4 277 arch: 278 - aarch64 279- name: xnn_f32_gemm_minmax_ukernel_6x8s4__neon 280 init: xnn_init_f32_minmax_scalar_params 281 k-block: 4 282- name: xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma 283 init: xnn_init_f32_minmax_scalar_params 284 k-block: 4 285- name: xnn_f32_gemm_minmax_ukernel_8x8s4__neon 286 init: xnn_init_f32_minmax_scalar_params 287 k-block: 4 288- name: xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma 289 init: xnn_init_f32_minmax_scalar_params 290 k-block: 4 291# x86 SSE 292- name: xnn_f32_gemm_minmax_ukernel_1x8__sse_dup 293 init: xnn_init_f32_minmax_sse_params 294 k-block: 4 295- name: xnn_f32_gemm_minmax_ukernel_1x8__sse_load1 296 init: xnn_init_f32_minmax_sse_params 297 k-block: 1 298- name: xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup 299 init: xnn_init_f32_minmax_sse_params 300 k-block: 4 301- name: xnn_f32_gemm_minmax_ukernel_1x8s4__sse 302 init: xnn_init_f32_minmax_sse_params 303 k-block: 4 304- name: xnn_f32_gemm_minmax_ukernel_3x8__sse_dup 305 init: xnn_init_f32_minmax_sse_params 306 k-block: 4 307- name: xnn_f32_gemm_minmax_ukernel_3x8__sse_load1 308 init: xnn_init_f32_minmax_sse_params 309 k-block: 1 310- name: xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup 311 init: xnn_init_f32_minmax_sse_params 312 k-block: 4 313- name: xnn_f32_gemm_minmax_ukernel_3x8s4__sse 314 init: xnn_init_f32_minmax_sse_params 315 k-block: 4 316- name: xnn_f32_gemm_minmax_ukernel_4x2c4__sse 317 init: xnn_init_f32_minmax_sse_params 318 k-block: 4 319- name: xnn_f32_gemm_minmax_ukernel_4x8__sse_dup 320 init: xnn_init_f32_minmax_sse_params 321 k-block: 4 322- name: xnn_f32_gemm_minmax_ukernel_4x8__sse_load1 323 init: xnn_init_f32_minmax_sse_params 324 k-block: 1 325- name: xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup 326 init: xnn_init_f32_minmax_sse_params 327 k-block: 4 328- name: xnn_f32_gemm_minmax_ukernel_4x8s4__sse 329 init: xnn_init_f32_minmax_sse_params 330 k-block: 4 331- name: xnn_f32_gemm_minmax_ukernel_5x8__sse_dup 332 init: xnn_init_f32_minmax_sse_params 333 k-block: 4 334- name: xnn_f32_gemm_minmax_ukernel_5x8__sse_load1 335 init: xnn_init_f32_minmax_sse_params 336 k-block: 1 337- name: xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup 338 init: xnn_init_f32_minmax_sse_params 339 k-block: 4 340- name: xnn_f32_gemm_minmax_ukernel_5x8s4__sse 341 init: xnn_init_f32_minmax_sse_params 342 k-block: 4 343# x86 AVX 344- name: xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast 345 init: xnn_init_f32_minmax_avx_params 346 k-block: 1 347- name: xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast 348 init: xnn_init_f32_minmax_avx_params 349 k-block: 1 350- name: xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast 351 init: xnn_init_f32_minmax_avx_params 352 k-block: 1 353- name: xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast 354 init: xnn_init_f32_minmax_avx_params 355 k-block: 1 356- name: xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast 357 init: xnn_init_f32_minmax_avx_params 358 k-block: 1 359- name: xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast 360 init: xnn_init_f32_minmax_avx_params 361 k-block: 1 362- name: xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast 363 init: xnn_init_f32_minmax_avx_params 364 k-block: 1 365- name: xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast 366 init: xnn_init_f32_minmax_avx_params 367 k-block: 1 368- name: xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast 369 init: xnn_init_f32_minmax_avx_params 370 k-block: 1 371# x86 FMA3 372- name: xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast 373 init: xnn_init_f32_minmax_avx_params 374 k-block: 1 375- name: xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast 376 init: xnn_init_f32_minmax_avx_params 377 k-block: 1 378- name: xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast 379 init: xnn_init_f32_minmax_avx_params 380 k-block: 4 381- name: xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast 382 init: xnn_init_f32_minmax_avx_params 383 k-block: 1 384- name: xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast 385 init: xnn_init_f32_minmax_avx_params 386 k-block: 4 387- name: xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast 388 init: xnn_init_f32_minmax_avx_params 389 k-block: 1 390- name: xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast 391 init: xnn_init_f32_minmax_avx_params 392 k-block: 1 393- name: xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast 394 init: xnn_init_f32_minmax_avx_params 395 k-block: 4 396- name: xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast 397 init: xnn_init_f32_minmax_avx_params 398 k-block: 1 399- name: xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast 400 init: xnn_init_f32_minmax_avx_params 401 k-block: 1 402- name: xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast 403 init: xnn_init_f32_minmax_avx_params 404 k-block: 4 405- name: xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast 406 init: xnn_init_f32_minmax_avx_params 407 k-block: 1 408- name: xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast 409 init: xnn_init_f32_minmax_avx_params 410 k-block: 1 411- name: xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast 412 init: xnn_init_f32_minmax_avx_params 413 k-block: 1 414# x86 AVX512 415- name: xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast 416 init: xnn_init_f32_minmax_scalar_params 417 k-block: 1 418- name: xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast 419 init: xnn_init_f32_minmax_scalar_params 420 k-block: 1 421- name: xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast 422 init: xnn_init_f32_minmax_scalar_params 423 k-block: 1 424- name: xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast 425 init: xnn_init_f32_minmax_scalar_params 426 k-block: 1 427- name: xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast 428 init: xnn_init_f32_minmax_scalar_params 429 k-block: 1 430- name: xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast 431 init: xnn_init_f32_minmax_scalar_params 432 k-block: 1 433# WAsm SIMD 434- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat 435 init: xnn_init_f32_minmax_wasmsimd_params 436 k-block: 1 437- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat 438 init: xnn_init_f32_minmax_wasmsimd_params 439 k-block: 4 440- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat 441 init: xnn_init_f32_minmax_wasmsimd_params 442 k-block: 1 443- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat 444 init: xnn_init_f32_minmax_wasmsimd_params 445 k-block: 4 446- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm 447 init: xnn_init_f32_minmax_wasmsimd_params 448 k-block: 4 449- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86 450 init: xnn_init_f32_minmax_wasmsimd_params 451 k-block: 4 452- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat 453 init: xnn_init_f32_minmax_wasmsimd_params 454 k-block: 1 455- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat 456 init: xnn_init_f32_minmax_wasmsimd_params 457 k-block: 4 458- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat 459 init: xnn_init_f32_minmax_wasmsimd_params 460 k-block: 1 461- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat 462 init: xnn_init_f32_minmax_wasmsimd_params 463 k-block: 4 464- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm 465 init: xnn_init_f32_minmax_wasmsimd_params 466 k-block: 4 467- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86 468 init: xnn_init_f32_minmax_wasmsimd_params 469 k-block: 4 470- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm 471 init: xnn_init_f32_minmax_wasmsimd_params 472 k-block: 4 473- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86 474 init: xnn_init_f32_minmax_wasmsimd_params 475 k-block: 4 476- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat 477 init: xnn_init_f32_minmax_wasmsimd_params 478 k-block: 1 479- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_splat 480 init: xnn_init_f32_minmax_wasmsimd_params 481 k-block: 4 482- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat 483 init: xnn_init_f32_minmax_wasmsimd_params 484 k-block: 1 485- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat 486 init: xnn_init_f32_minmax_wasmsimd_params 487 k-block: 4 488- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_arm 489 init: xnn_init_f32_minmax_wasmsimd_params 490 k-block: 4 491- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_x86 492 init: xnn_init_f32_minmax_wasmsimd_params 493 k-block: 4 494- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat 495 init: xnn_init_f32_minmax_wasmsimd_params 496 k-block: 1 497- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat 498 init: xnn_init_f32_minmax_wasmsimd_params 499 k-block: 4 500- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat 501 init: xnn_init_f32_minmax_wasmsimd_params 502 k-block: 1 503- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat 504 init: xnn_init_f32_minmax_wasmsimd_params 505 k-block: 4 506- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm 507 init: xnn_init_f32_minmax_wasmsimd_params 508 k-block: 4 509- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86 510 init: xnn_init_f32_minmax_wasmsimd_params 511 k-block: 4 512- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat 513 init: xnn_init_f32_minmax_wasmsimd_params 514 k-block: 1 515- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat 516 init: xnn_init_f32_minmax_wasmsimd_params 517 k-block: 4 518- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat 519 init: xnn_init_f32_minmax_wasmsimd_params 520 k-block: 1 521- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat 522 init: xnn_init_f32_minmax_wasmsimd_params 523 k-block: 4 524- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm 525 init: xnn_init_f32_minmax_wasmsimd_params 526 k-block: 4 527- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86 528 init: xnn_init_f32_minmax_wasmsimd_params 529 k-block: 4 530# WAsm Relaxed SIMD 531- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat 532 init: xnn_init_f32_minmax_wasmsimd_params 533 k-block: 1 534- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat 535 init: xnn_init_f32_minmax_wasmsimd_params 536 k-block: 4 537- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat 538 init: xnn_init_f32_minmax_wasmsimd_params 539 k-block: 1 540- name: xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat 541 init: xnn_init_f32_minmax_wasmsimd_params 542 k-block: 4 543- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd 544 init: xnn_init_f32_minmax_wasmsimd_params 545 k-block: 4 546- name: xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma 547 init: xnn_init_f32_minmax_wasmsimd_params 548 k-block: 4 549- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat 550 init: xnn_init_f32_minmax_wasmsimd_params 551 k-block: 1 552- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat 553 init: xnn_init_f32_minmax_wasmsimd_params 554 k-block: 4 555- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat 556 init: xnn_init_f32_minmax_wasmsimd_params 557 k-block: 1 558- name: xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat 559 init: xnn_init_f32_minmax_wasmsimd_params 560 k-block: 4 561- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd 562 init: xnn_init_f32_minmax_wasmsimd_params 563 k-block: 4 564- name: xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma 565 init: xnn_init_f32_minmax_wasmsimd_params 566 k-block: 4 567- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd 568 init: xnn_init_f32_minmax_wasmsimd_params 569 k-block: 4 570- name: xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma 571 init: xnn_init_f32_minmax_wasmsimd_params 572 k-block: 4 573- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat 574 init: xnn_init_f32_minmax_wasmsimd_params 575 k-block: 1 576- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat 577 init: xnn_init_f32_minmax_wasmsimd_params 578 k-block: 4 579- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat 580 init: xnn_init_f32_minmax_wasmsimd_params 581 k-block: 1 582- name: xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat 583 init: xnn_init_f32_minmax_wasmsimd_params 584 k-block: 4 585- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd 586 init: xnn_init_f32_minmax_wasmsimd_params 587 k-block: 4 588- name: xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma 589 init: xnn_init_f32_minmax_wasmsimd_params 590 k-block: 4 591- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat 592 init: xnn_init_f32_minmax_wasmsimd_params 593 k-block: 1 594- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat 595 init: xnn_init_f32_minmax_wasmsimd_params 596 k-block: 4 597- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat 598 init: xnn_init_f32_minmax_wasmsimd_params 599 k-block: 1 600- name: xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat 601 init: xnn_init_f32_minmax_wasmsimd_params 602 k-block: 4 603- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd 604 init: xnn_init_f32_minmax_wasmsimd_params 605 k-block: 4 606- name: xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma 607 init: xnn_init_f32_minmax_wasmsimd_params 608 k-block: 4 609- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat 610 init: xnn_init_f32_minmax_wasmsimd_params 611 k-block: 1 612- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat 613 init: xnn_init_f32_minmax_wasmsimd_params 614 k-block: 4 615- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat 616 init: xnn_init_f32_minmax_wasmsimd_params 617 k-block: 1 618- name: xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat 619 init: xnn_init_f32_minmax_wasmsimd_params 620 k-block: 4 621- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd 622 init: xnn_init_f32_minmax_wasmsimd_params 623 k-block: 4 624- name: xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma 625 init: xnn_init_f32_minmax_wasmsimd_params 626 k-block: 4 627# WAsm 628- name: xnn_f32_gemm_minmax_ukernel_1x4__wasm 629 init: xnn_init_f32_minmax_scalar_params 630 k-block: 1 631- name: xnn_f32_gemm_minmax_ukernel_2x4__wasm 632 init: xnn_init_f32_minmax_scalar_params 633 k-block: 1 634- name: xnn_f32_gemm_minmax_ukernel_4x2__wasm 635 init: xnn_init_f32_minmax_scalar_params 636 k-block: 1 637- name: xnn_f32_gemm_minmax_ukernel_4x4__wasm 638 init: xnn_init_f32_minmax_scalar_params 639 k-block: 1 640# Scalar 641- name: xnn_f32_gemm_minmax_ukernel_1x4__scalar 642 init: xnn_init_f32_minmax_scalar_params 643 k-block: 1 644- name: xnn_f32_gemm_minmax_ukernel_2x4__scalar 645 init: xnn_init_f32_minmax_scalar_params 646 k-block: 1 647- name: xnn_f32_gemm_minmax_ukernel_4x2__scalar 648 init: xnn_init_f32_minmax_scalar_params 649 k-block: 1 650- name: xnn_f32_gemm_minmax_ukernel_4x4__scalar 651 init: xnn_init_f32_minmax_scalar_params 652 k-block: 1 653# AArch32 JIT assembly 654- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7 655 init: xnn_init_f32_minmax_scalar_params 656 k-block: 2 657 assembly: true 658- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a53 659 init: xnn_init_f32_minmax_scalar_params 660 k-block: 4 661 pipelined: true 662 assembly: true 663- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55 664 init: xnn_init_f32_minmax_scalar_params 665 k-block: 4 666 pipelined: true 667- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75 668 init: xnn_init_f32_minmax_scalar_params 669 k-block: 4 670 pipelined: true 671 assembly: true 672- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_ld64 673 init: xnn_init_f32_minmax_scalar_params 674 k-block: 2 675 assembly: true 676- name: xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75 677 init: xnn_init_f32_minmax_scalar_params 678 k-block: 4 679 pipelined: true 680 assembly: true 681# AArch64 JIT assembly 682- name: xnn_generate_f32_gemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75 683 init: xnn_init_f32_minmax_scalar_params 684 k-block: 8 685 pipelined: true 686- name: xnn_generate_f32_gemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75 687 init: xnn_init_f32_minmax_scalar_params 688 k-block: 8 689 pipelined: true 690- name: xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75 691 init: xnn_init_f32_minmax_scalar_params 692 k-block: 8 693 pipelined: true 694- name: xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75 695 init: xnn_init_f32_minmax_scalar_params 696 k-block: 8 697 pipelined: true 698- name: xnn_generate_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75 699 init: xnn_init_f32_minmax_scalar_params 700 k-block: 8 701 pipelined: true 702- name: xnn_generate_f32_gemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75 703 init: xnn_init_f32_minmax_scalar_params 704 k-block: 8 705 pipelined: true 706- name: xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_ld128 707 init: xnn_init_f32_minmax_scalar_params 708 k-block: 4 709