Home
last modified time | relevance | path

Searched refs:vmask_odd (Results 1 – 25 of 136) sorted by relevance

123456

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3s2p1-minmax-neonfma-4x4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local
218 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
220 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
222 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
224 …const float32x4_t vi3x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
226 …const float32x4_t vi4x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
228 …const float32x4_t vi5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
230 …const float32x4_t vi6x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi6… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
232 …const float32x4_t vi7x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi7… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
234 …const float32x4_t vi8x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi8… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
D3x3s2p1-minmax-neon-4x4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local
218 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
220 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
222 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
224 …const float32x4_t vi3x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
226 …const float32x4_t vi4x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
228 …const float32x4_t vi5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
230 …const float32x4_t vi6x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi6… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
232 …const float32x4_t vi7x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi7… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
234 …const float32x4_t vi8x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi8… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
D3x3s2p1-minmax-neon-3x4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local
184 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
186 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
188 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
190 …const float32x4_t vi3x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
192 …const float32x4_t vi4x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
194 …const float32x4_t vi5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
196 …const float32x4_t vi6x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi6… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
D3x3s2p1-minmax-neonfma-3x4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local
184 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
186 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
188 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
190 …const float32x4_t vi3x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
192 …const float32x4_t vi4x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
194 …const float32x4_t vi5x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi5… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
196 …const float32x4_t vi6x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi6… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
D3x3s2p1-minmax-neonfma-2x4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local
150 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
152 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
154 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
156 …const float32x4_t vi3x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
158 …const float32x4_t vi4x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
D3x3s2p1-minmax-neon-2x4-acc2.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2() local
152 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2()
154 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2()
156 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2()
158 …const float32x4_t vi3x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2()
160 …const float32x4_t vi4x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4_acc2()
D3x3s2p1-minmax-neon-2x4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local
150 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
152 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
154 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
156 …const float32x4_t vi3x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
158 …const float32x4_t vi4x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
D3x3s2p1-minmax-neonfma-2x4-acc2.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local
152 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
154 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
156 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
158 …const float32x4_t vi3x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
160 …const float32x4_t vi4x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c36 const v128_t vmask_odd = wasm_v128_load(params->scalar.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
276 …const v128_t vi0x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
278 …const v128_t vi1x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
280 …const v128_t vi2x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
282 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
284 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
286 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
288 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
290 …const v128_t vi7x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi7x89AB, vi7xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
292 …const v128_t vi8x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi8x89AB, vi8xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
D3x3s2p1-minmax-neon-1x4-acc3.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local
117 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
119 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
121 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
D3x3s2p1-minmax-neon-1x4-acc2.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local
116 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
118 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
120 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
D3x3s2p1-minmax-neonfma-1x4-acc2.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local
116 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
118 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
120 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
D3x3s2p1-minmax-neon-1x4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local
115 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
117 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
119 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
D3x3s2p1-minmax-neon-1x4-acc4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local
118 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
120 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
122 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
D3x3s2p1-minmax-neonfma-1x4-acc3.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local
117 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
119 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
121 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
D3x3s2p1-minmax-neonfma-1x4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local
115 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
117 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
119 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
D3x3s2p1-minmax-neonfma-1x4-acc4.c35 const uint32x4_t vmask_odd = vld1q_u32(params->neon.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local
118 …const float32x4_t vi0x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
120 …const float32x4_t vi1x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
122 …const float32x4_t vi2x9BDF = vreinterpretq_f32_u32(vandq_u32(vmask_odd, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
D3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c36 const v128_t vmask_odd = wasm_v128_load(params->scalar.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
222 …const v128_t vi0x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
224 …const v128_t vi1x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
226 …const v128_t vi2x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
228 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
230 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
232 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
234 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c36 const v128_t vmask_odd = wasm_v128_load(params->scalar.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
232 …const v128_t vi0x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
234 …const v128_t vi1x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
236 …const v128_t vi2x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
238 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
240 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
242 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
244 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c36 const v128_t vmask_odd = wasm_v128_load(params->scalar.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
232 …const v128_t vi0x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
234 …const v128_t vi1x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
236 …const v128_t vi2x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
238 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
240 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
242 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
244 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
D3x3s2p1-minmax-sse-6x4.c35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() local
360 …const __m128 vi0x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi0x89AB, vi0xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
362 …const __m128 vi1x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi1x89AB, vi1xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
364 …const __m128 vi2x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi2x89AB, vi2xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
366 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
368 …const __m128 vi4x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
370 …const __m128 vi5x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi5x89AB, vi5xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
372 …const __m128 vi6x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
374 …const __m128 vi7x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi7x89AB, vi7xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
376 …const __m128 vi8x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi8x89AB, vi8xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
[all …]
D3x3s2p1-minmax-wasmsimd-arm-splat-4x4.c36 const v128_t vmask_odd = wasm_v128_load(params->scalar.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
266 …const v128_t vi0x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
268 …const v128_t vi1x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
270 …const v128_t vi2x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
272 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
274 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
276 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
278 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
280 …const v128_t vi7x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi7x89AB, vi7xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
282 …const v128_t vi8x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi8x89AB, vi8xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
D3x3s2p1-minmax-sse-4x4.c35 const __m128 vmask_odd = _mm_load_ps((const float*) params->sse.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local
270 …const __m128 vi0x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi0x89AB, vi0xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
272 …const __m128 vi1x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi1x89AB, vi1xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
274 …const __m128 vi2x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi2x89AB, vi2xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
276 …const __m128 vi3x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
278 …const __m128 vi4x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
280 …const __m128 vi5x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi5x89AB, vi5xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
282 …const __m128 vi6x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
284 …const __m128 vi7x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi7x89AB, vi7xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
286 …const __m128 vi8x9BDF = _mm_and_ps(vmask_odd, _mm_shuffle_ps(vi8x89AB, vi8xCDEF, _MM_SHUFFLE(3, 1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-4x4.c36 const v128_t vmask_odd = wasm_v128_load(params->scalar.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
266 …const v128_t vi0x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
268 …const v128_t vi1x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
270 …const v128_t vi2x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
272 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
274 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
276 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
278 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
280 …const v128_t vi7x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi7x89AB, vi7xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
282 …const v128_t vi8x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi8x89AB, vi8xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-4x4.c36 const v128_t vmask_odd = wasm_v128_load(params->scalar.mask_odd); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
276 …const v128_t vi0x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
278 …const v128_t vi1x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
280 …const v128_t vi2x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
282 …const v128_t vi3x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
284 …const v128_t vi4x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
286 …const v128_t vi5x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
288 …const v128_t vi6x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
290 …const v128_t vi7x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi7x89AB, vi7xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
292 …const v128_t vi8x9BDF = wasm_v128_and(vmask_odd, wasm_v32x4_shuffle(vi8x89AB, vi8xCDEF, 1, 3, 5, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()

123456