/external/XNNPACK/src/f32-spmm/gen/ |
D | 32x1-minmax-neon.c | 79 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neon() local 87 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 95 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 137 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neon() local 141 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 145 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
|
D | 32x1-minmax-neonfma.c | 79 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() local 87 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 95 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 137 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() local 141 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 145 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
|
D | 32x1-minmax-wasmsimd-arm.c | 76 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() local 84 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() 92 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() 134 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() local 138 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() 143 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm()
|
D | 32x1-minmax-wasmsimd-x86-pipelined.c | 90 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local 98 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 106 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 148 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local 152 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 157 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
|
D | 32x1-minmax-neon-pipelined.c | 90 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() local 98 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() 106 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() 151 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() local 155 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() 159 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined()
|
D | 32x1-minmax-wasmsimd-arm-pipelined.c | 90 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() local 98 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() 106 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() 148 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() local 152 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() 157 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined()
|
D | 32x1-minmax-neonfma-pipelined.c | 90 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() local 98 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() 106 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() 151 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() local 155 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() 159 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined()
|
D | 32x1-minmax-sse.c | 76 __m128 voutCDEF = _mm_min_ps(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__sse() local 84 voutCDEF = _mm_max_ps(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__sse() 92 _mm_storeu_ps(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__sse() 134 __m128 voutCDEF = _mm_min_ps(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__sse() local 138 voutCDEF = _mm_max_ps(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__sse() 142 _mm_storeu_ps(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__sse()
|
D | 32x1-minmax-wasmsimd-x86.c | 76 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local 84 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 92 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 134 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local 138 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 143 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86()
|
D | 32x1-minmax-wasmsimd-arm-pipelined-x2.c | 130 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() local 138 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 146 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 188 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() local 192 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 197 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
|
D | 32x1-minmax-wasmsimd-x86-pipelined-x2.c | 130 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() local 138 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 146 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 188 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() local 192 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 197 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
|
D | 32x1-minmax-neon-x2.c | 150 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() local 158 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() 166 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() 208 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() local 212 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() 216 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2()
|
D | 32x1-minmax-wasmsimd-x86-x2.c | 144 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() local 152 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() 160 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() 202 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() local 206 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() 211 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2()
|
D | 32x1-minmax-neonfma-x2.c | 150 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() local 158 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() 166 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() 208 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() local 212 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() 216 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2()
|
D | 32x1-minmax-wasmsimd-arm-x2.c | 144 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() local 152 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() 160 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() 202 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() local 206 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() 211 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2()
|
D | 32x1-minmax-wasmsimd-arm-x4.c | 216 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() local 224 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 232 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 274 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() local 278 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 283 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4()
|
D | 32x1-minmax-wasmsimd-x86-x4.c | 216 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() local 224 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 232 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 274 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() local 278 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 283 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4()
|
D | 16x1-minmax-wasmsimd-arm.c | 64 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() local 68 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() 72 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
|
D | 16x1-minmax-wasmsimd-x86.c | 64 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86() local 68 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86() 72 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86()
|
D | 16x1-minmax-neon.c | 66 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_16x1__neon() local 70 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_16x1__neon() 74 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
|
D | 16x1-minmax-neonfma.c | 66 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() local 70 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() 74 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
|
D | 16x1-minmax-sse.c | 64 __m128 voutCDEF = _mm_min_ps(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_16x1__sse() local 68 voutCDEF = _mm_max_ps(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_16x1__sse() 72 _mm_storeu_ps(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
|
D | 16x1-minmax-wasmsimd-arm-pipelined.c | 74 v128_t voutCDEF = wasm_f32x4_min(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() local 78 voutCDEF = wasm_f32x4_max(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() 82 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
|
D | 16x1-minmax-neon-pipelined.c | 73 float32x4_t voutCDEF = vminq_f32(vaccCDEF, vmax); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() local 77 voutCDEF = vmaxq_f32(voutCDEF, vmin); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() 81 vst1q_f32(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
|
D | 16x1-minmax-wasmsimd-x86-pipelined.c | 74 v128_t voutCDEF = wasm_f32x4_pmin(vmax, vaccCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined() local 78 voutCDEF = wasm_f32x4_pmax(vmin, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined() 82 wasm_v128_store(output + 12, voutCDEF); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined()
|