/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-2x1-acc3.c | 127 float vo1p2 = vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 226 float vo1p2 = vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 309 float vo1p2 = vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 151 float vo1p2 = vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 253 float vo1p2 = vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 311 float vo1p2 = vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
|
D | 5x5p2-minmax-wasmsimd-x86-splat-2x4-acc3.c | 103 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3() local 250 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3() local 393 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3() local
|
D | 5x5p2-minmax-wasmsimd-arm-splat-2x4-acc3.c | 103 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3() local 250 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3() local 393 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3() local
|
D | 5x5p2-minmax-neon-2x4-acc3.c | 100 float32x4_t vo1p2 = vmulq_lane_f32(vi2x4567, vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() local 248 float32x4_t vo1p2 = vmulq_lane_f32(vi2x4567, vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() local 392 float32x4_t vo1p2 = vmulq_lane_f32(vi2x4567, vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc3() local
|
D | 5x5p2-minmax-neonfma-2x4-acc3.c | 100 float32x4_t vo1p2 = vmulq_lane_f32(vi2x4567, vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() local 248 float32x4_t vo1p2 = vmulq_lane_f32(vi2x4567, vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() local 392 float32x4_t vo1p2 = vmulq_lane_f32(vi2x4567, vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc3() local
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-2x4-acc3.c | 129 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3() local 276 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3() local 419 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_2x4_acc3() local
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-2x4-acc3.c | 129 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() local 276 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() local 419 v128_t vo1p2 = wasm_f32x4_mul(vi2x4567, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_2x4_acc3() local
|
D | 5x5p2-minmax-sse-2x4-acc3.c | 116 __m128 vo1p2 = _mm_mul_ps(vi3x4567, vk22); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 256 __m128 vo1p2 = _mm_mul_ps(vi3x4567, vk22); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 406 __m128 vo1p2 = _mm_mul_ps(vi3x4567, vk22); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local
|
D | 5x5s2p2-minmax-neonfma-2x4-acc3.c | 116 float32x4_t vo1p2 = vmulq_lane_f32(vi3x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local 281 float32x4_t vo1p2 = vmulq_lane_f32(vi3x8ACE, vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local
|
D | 5x5s2p2-minmax-neon-2x4-acc3.c | 116 float32x4_t vo1p2 = vmulq_lane_f32(vi3x8ACE9BDF.val[0], vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local 281 float32x4_t vo1p2 = vmulq_lane_f32(vi3x8ACE, vget_low_f32(vw89AB), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-2x4-acc3.c | 148 v128_t vo1p2 = wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc3() local 348 v128_t vo1p2 = wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc3() local
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-2x4-acc3.c | 148 v128_t vo1p2 = wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc3() local 348 v128_t vo1p2 = wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vw89AB, vw89AB, 0, 0, 0, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc3() local
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4-acc3.c | 168 v128_t vo1p2 = wasm_f32x4_mul(vi3x8ACE, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3() local 368 v128_t vo1p2 = wasm_f32x4_mul(vi3x8ACE, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3() local
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4-acc3.c | 168 v128_t vo1p2 = wasm_f32x4_mul(vi3x8ACE, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3() local 368 v128_t vo1p2 = wasm_f32x4_mul(vi3x8ACE, vk12); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3() local
|
D | 5x5s2p2-minmax-sse-2x4-acc3.c | 162 __m128 vo1p2 = _mm_mul_ps(vi4x8ACE, vk22); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() local 365 __m128 vo1p2 = _mm_mul_ps(vi4x8ACE, vk22); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4_acc3() local
|