Home
last modified time | relevance | path

Searched refs:vi1x5678 (Results 1 – 25 of 186) sorted by relevance

12345678

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-neon-2x4.c110 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() local
115 vo1p0 = vmlaq_lane_f32(vo1p0, vi1x5678, vget_high_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4()
117 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4()
175 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4() local
180 vo1p0 = vmlaq_lane_f32(vo1p0, vi1x5678, vget_high_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4()
182 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c123 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4_acc2() local
128 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4_acc2()
129 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4_acc2()
182 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4_acc2() local
187 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4_acc2()
188 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4_acc2()
D3x3p1-minmax-wasmsimd-arm-loadsplat-2x4.c123 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4() local
128 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4()
129 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4()
180 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4() local
185 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4()
186 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4()
D3x3p1-minmax-neonfma-2x4-acc2.c110 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() local
115 vo1p0 = vfmaq_lane_f32(vo1p0, vi1x5678, vget_high_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2()
117 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2()
177 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2() local
182 vo1p0 = vfmaq_lane_f32(vo1p0, vi1x5678, vget_high_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2()
184 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4_acc2()
D3x3p1-minmax-neon-2x4-acc2.c110 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() local
115 vo1p0 = vmlaq_lane_f32(vo1p0, vi1x5678, vget_high_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2()
117 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2()
177 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2() local
182 vo1p0 = vmlaq_lane_f32(vo1p0, vi1x5678, vget_high_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2()
184 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4_acc2()
D3x3p1-minmax-neonfma-2x4.c110 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() local
115 vo1p0 = vfmaq_lane_f32(vo1p0, vi1x5678, vget_high_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4()
117 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4()
175 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4() local
180 vo1p0 = vfmaq_lane_f32(vo1p0, vi1x5678, vget_high_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4()
182 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_2x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c123 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4_acc2() local
128 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4_acc2()
129 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4_acc2()
182 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4_acc2() local
187 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4_acc2()
188 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4_acc2()
D3x3p1-minmax-wasmsimd-x86-loadsplat-2x4.c123 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4() local
128 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4()
129 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4()
180 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4() local
185 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4()
186 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4()
D3x3p1-minmax-wasmsimd-arm-splat-2x4-acc2.c111 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4_acc2() local
116 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4_acc2()
118 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4_acc2()
177 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4_acc2() local
182 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4_acc2()
184 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4_acc2()
D3x3p1-minmax-ssse3-2x4.c118 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() local
123 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
124 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
176 … const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi1x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() local
181 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
182 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
D3x3p1-minmax-wasmsimd-arm-splat-2x4.c111 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4() local
116 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4()
118 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4()
175 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4() local
180 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4()
182 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_2x4()
D3x3p1-minmax-wasmsimd-x86-splat-2x4-acc2.c111 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4_acc2() local
116 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4_acc2()
118 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4_acc2()
177 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4_acc2() local
182 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4_acc2()
184 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4_acc2()
D3x3p1-minmax-ssse3-2x4-acc2.c118 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() local
123 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
124 vo0p1 = _mm_add_ps(vo0p1, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
178 … const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi1x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() local
183 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
184 vo0p1 = _mm_add_ps(vo0p1, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
D3x3p1-minmax-sse-2x4.c150 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local
157 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
158 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
234 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local
241 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
242 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
D3x3p1-minmax-wasmsimd-x86-splat-2x4.c111 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4() local
116 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4()
118 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4()
175 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4() local
180 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4()
182 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_2x4()
D3x3p1-minmax-neon-1x4-acc3.c92 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3() local
97 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3()
142 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3() local
147 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc3()
D3x3p1-minmax-neonfma-1x4-acc3.c92 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3() local
97 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3()
142 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3() local
147 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc3()
D3x3p1-minmax-neonfma-1x4-acc2.c92 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2() local
97 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2()
141 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2() local
146 vo0p1 = vfmaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4_acc2()
D3x3p1-minmax-neonfma-1x4.c92 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4() local
97 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4()
140 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4() local
145 vo0p0 = vfmaq_lane_f32(vo0p0, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_1x4()
D3x3p1-minmax-neon-1x4.c92 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4() local
97 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4()
140 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4() local
145 vo0p0 = vmlaq_lane_f32(vo0p0, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4()
D3x3p1-minmax-neon-1x4-acc2.c92 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vi1x89AB, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2() local
97 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2()
141 const float32x4_t vi1x5678 = vextq_f32(vi1x4567, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2() local
146 vo0p1 = vmlaq_lane_f32(vo0p1, vi1x5678, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_1x4_acc2()
D3x3p1-minmax-sse-2x4-acc2.c150 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local
157 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
158 vo0p1 = _mm_add_ps(vo0p1, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
236 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local
243 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi1x5678, vk02)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
244 vo0p1 = _mm_add_ps(vo0p1, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
D3x3p1-minmax-wasmsimd-arm-splat-1x4-acc3.c93 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local
98 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
142 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local
147 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
D3x3p1-minmax-wasmsimd-arm-splat-1x4-acc4.c93 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local
98 …vo0p3 = wasm_f32x4_add(vo0p3, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
143 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local
148 …vo0p3 = wasm_f32x4_add(vo0p3, wasm_f32x4_mul(vi1x5678, wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c104 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vi1x89AB, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local
108 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
146 const v128_t vi1x5678 = wasm_v32x4_shuffle(vi1x4567, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local
150 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()

12345678