/external/XNNPACK/src/f32-spmm/gen/ |
D | 32x1-minmax-wasmsimd-arm-pipelined-x2.c | 48 v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() local 70 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 81 viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 89 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 100 viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 112 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 124 viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
|
D | 32x1-minmax-wasmsimd-x86-pipelined-x2.c | 48 v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() local 70 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 81 viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 89 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 100 viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 112 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 124 viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
|
D | 32x1-minmax-wasmsimd-arm-pipelined.c | 48 v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() local 72 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() 84 viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined()
|
D | 32x1-minmax-neon-pipelined.c | 48 float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() local 70 vaccSTUV = vmlaq_f32(vaccSTUV, viSTUV, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() 84 viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined()
|
D | 32x1-minmax-neonfma-pipelined.c | 48 float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() local 70 vaccSTUV = vfmaq_f32(vaccSTUV, viSTUV, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() 84 viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined()
|
D | 32x1-minmax-wasmsimd-x86-pipelined.c | 48 v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local 72 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 84 viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
|
D | 32x4-minmax-neonfma.c | 84 const float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local 97 vaccSTUVn0 = vfmaq_laneq_f32(vaccSTUVn0, viSTUV, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 105 vaccSTUVn1 = vfmaq_laneq_f32(vaccSTUVn1, viSTUV, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 113 vaccSTUVn2 = vfmaq_laneq_f32(vaccSTUVn2, viSTUV, vw, 2); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 121 vaccSTUVn3 = vfmaq_laneq_f32(vaccSTUVn3, viSTUV, vw, 3); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 251 const float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local 264 vaccSTUV = vfmaq_f32(vaccSTUV, viSTUV, vw); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
|
D | 32x2-minmax-neonfma.c | 68 const float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local 81 vaccSTUVn0 = vfmaq_lane_f32(vaccSTUVn0, viSTUV, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 89 vaccSTUVn1 = vfmaq_lane_f32(vaccSTUVn1, viSTUV, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 169 const float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local 182 vaccSTUV = vfmaq_f32(vaccSTUV, viSTUV, vw); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
|
D | 32x1-minmax-neon.c | 60 const float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__neon() local 73 vaccSTUV = vmlaq_f32(vaccSTUV, viSTUV, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
|
D | 32x1-minmax-neonfma.c | 60 const float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() local 73 vaccSTUV = vfmaq_f32(vaccSTUV, viSTUV, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
|
D | 32x1-minmax-wasmsimd-arm.c | 60 const v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() local 70 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm()
|
D | 32x1-minmax-sse.c | 60 const __m128 viSTUV = _mm_loadu_ps(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__sse() local 70 vaccSTUV = _mm_add_ps(vaccSTUV, _mm_mul_ps(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__sse()
|
D | 32x1-minmax-wasmsimd-x86.c | 60 const v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local 70 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86()
|
D | 32x1-minmax-neon-x2.c | 131 const float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() local 144 vaccSTUV = vmlaq_f32(vaccSTUV, viSTUV, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2()
|
D | 32x1-minmax-neonfma-x2.c | 131 const float32x4_t viSTUV = vld1q_f32(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() local 144 vaccSTUV = vfmaq_f32(vaccSTUV, viSTUV, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2()
|
D | 32x1-minmax-wasmsimd-arm-x2.c | 129 const v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() local 139 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2()
|
D | 32x1-minmax-wasmsimd-x86-x2.c | 129 const v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() local 139 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2()
|
D | 32x1-minmax-wasmsimd-arm-x4.c | 201 const v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() local 211 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4()
|
D | 32x1-minmax-wasmsimd-x86-x4.c | 201 const v128_t viSTUV = wasm_v128_load(input + 28); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() local 211 vaccSTUV = wasm_f32x4_add(vaccSTUV, wasm_f32x4_mul(viSTUV, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4()
|