/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-neon-1x4-acc3.c | 77 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3() local 81 vo0p0 = vmlaq_lane_f32(vo0p0, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3() 130 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3() local 134 vo0p0 = vmlaq_lane_f32(vo0p0, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3()
|
D | 3x3p1-minmax-neonfma-1x4-acc3.c | 77 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3() local 81 vo0p0 = vfmaq_lane_f32(vo0p0, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3() 130 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3() local 134 vo0p0 = vfmaq_lane_f32(vo0p0, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3()
|
D | 3x3p1-minmax-neonfma-1x4-acc2.c | 77 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2() local 81 vo0p1 = vfmaq_lane_f32(vo0p1, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2() 129 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2() local 133 vo0p1 = vfmaq_lane_f32(vo0p1, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2()
|
D | 3x3p1-minmax-neonfma-1x4.c | 77 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4() local 81 vo0p0 = vfmaq_lane_f32(vo0p0, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4() 128 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4() local 132 vo0p0 = vfmaq_lane_f32(vo0p0, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4()
|
D | 3x3p1-minmax-neon-1x4.c | 77 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4() local 81 vo0p0 = vmlaq_lane_f32(vo0p0, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4() 128 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4() local 132 vo0p0 = vmlaq_lane_f32(vo0p0, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4()
|
D | 3x3p1-minmax-neon-1x4-acc2.c | 77 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2() local 81 vo0p1 = vmlaq_lane_f32(vo0p1, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2() 129 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2() local 133 vo0p1 = vmlaq_lane_f32(vo0p1, vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2()
|
D | 3x3p1-minmax-wasmsimd-arm-splat-1x4-acc3.c | 78 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local 82 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() 130 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local 134 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
|
D | 3x3p1-minmax-wasmsimd-arm-splat-1x4-acc4.c | 78 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local 82 v128_t vo0p3 = wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() 131 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local 135 v128_t vo0p3 = wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c | 91 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local 95 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() 136 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local 140 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c | 91 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() local 95 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() 135 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() local 139 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-1x4.c | 91 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() local 95 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() 134 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() local 138 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
|
D | 3x3p1-minmax-wasmsimd-x86-splat-1x4-acc4.c | 78 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() local 82 v128_t vo0p3 = wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() 131 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() local 135 v128_t vo0p3 = wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
|
D | 3x3p1-minmax-wasmsimd-arm-splat-1x4.c | 78 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() local 82 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() 128 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() local 132 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c | 91 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() local 95 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() 135 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() local 139 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
|
D | 3x3p1-minmax-wasmsimd-x86-splat-1x4-acc2.c | 78 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() local 82 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() 129 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() local 133 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
|
D | 3x3p1-minmax-wasmsimd-arm-splat-1x4-acc2.c | 78 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() local 82 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() 129 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() local 133 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
|
D | 3x3p1-minmax-wasmsimd-x86-splat-1x4-acc3.c | 78 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() local 82 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() 130 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() local 134 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
|
D | 3x3p1-minmax-ssse3-1x4.c | 86 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() local 90 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 130 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() local 134 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
|
D | 3x3p1-minmax-neonfma-1x4-acc4.c | 77 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4() local 81 float32x4_t vo0p3 = vmulq_lane_f32(vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4() 131 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4() local 135 float32x4_t vo0p3 = vmulq_lane_f32(vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc4()
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c | 91 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() local 95 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() 136 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() local 140 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-1x4.c | 91 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() local 95 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() 134 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() local 138 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
|
D | 3x3p1-minmax-neon-1x4-acc4.c | 77 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4() local 81 float32x4_t vo0p3 = vmulq_lane_f32(vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4() 131 const float32x4_t vi0x3456 = vextq_f32(vi0x0123, vi0x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4() local 135 float32x4_t vo0p3 = vmulq_lane_f32(vi0x3456, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc4()
|
D | 3x3p1-minmax-wasmsimd-x86-splat-1x4.c | 78 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() local 82 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() 128 const v128_t vi0x3456 = wasm_v32x4_shuffle(vi0x0123, vi0x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() local 132 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x3456, wasm_v32x4_shuffle(vw0123, vw0123, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
|
D | 3x3p1-minmax-ssse3-1x4-acc3.c | 86 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() local 90 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 132 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() local 136 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
|
D | 3x3p1-minmax-ssse3-1x4-acc2.c | 86 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() local 90 vo0p1 = _mm_add_ps(vo0p1, _mm_mul_ps(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 131 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() local 135 vo0p1 = _mm_add_ps(vo0p1, _mm_mul_ps(vi0x3456, vk00)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
|