Home
last modified time | relevance | path

Searched refs:vi5x79BD (Results 1 – 25 of 35) sorted by relevance

12

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5s2p2-minmax-neon-3x4.c218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local
243 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x79BD, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
250 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
412 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local
423 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x79BD, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
430 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
D5x5s2p2-minmax-neonfma-3x4-acc2.c218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local
243 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x79BD, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
250 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
415 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local
426 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x79BD, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
433 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
D5x5s2p2-minmax-neonfma-3x4.c218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local
243 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x79BD, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
250 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
412 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local
423 vo2p0 = vfmaq_lane_f32(vo2p0, vi5x79BD, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
430 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
D5x5s2p2-minmax-neon-3x4-acc2.c218 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local
243 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x79BD, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
250 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
415 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local
426 vo2p0 = vmlaq_lane_f32(vo2p0, vi5x79BD, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
433 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
D5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c278 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() local
340 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
347 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
520 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() local
531 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
538 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
D5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4.c278 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() local
340 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
347 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
517 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() local
528 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
535 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
D5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4.c278 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() local
340 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
347 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
517 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() local
528 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
535 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
D5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4-acc2.c278 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() local
340 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
347 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
520 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() local
531 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
538 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
D5x5s2p2-minmax-neon-2x4-acc2.c182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local
205 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
333 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local
346 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
D5x5s2p2-minmax-neon-2x4.c182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() local
205 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
331 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() local
344 vo1p0 = vmlaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
D5x5s2p2-minmax-neonfma-2x4-acc3.c182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local
205 vo1p2 = vfmaq_lane_f32(vo1p2, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
335 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local
348 vo1p2 = vfmaq_lane_f32(vo1p2, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
D5x5s2p2-minmax-neon-2x4-acc3.c182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local
205 vo1p2 = vmlaq_lane_f32(vo1p2, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
335 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local
348 vo1p2 = vmlaq_lane_f32(vo1p2, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
D5x5s2p2-minmax-neonfma-2x4-acc2.c182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local
205 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
333 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local
346 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
D5x5s2p2-minmax-neonfma-2x4.c182 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() local
205 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
331 const float32x4_t vi5x79BD = vextq_f32(vi5x1357, vi5x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() local
344 vo1p0 = vfmaq_lane_f32(vo1p0, vi5x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
D5x5s2p2-minmax-wasmsimd-x86-splat-3x4-acc2.c258 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2() local
320 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
327 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
500 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2() local
511 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
518 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
D5x5s2p2-minmax-sse-3x4.c306 const __m128 vi5x79BD = _mm_move_ss(vi5xF9BD, vi5x7135); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() local
348 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
353 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
526 const __m128 vi5x79BD = _mm_move_ss(vi5xF9BD, vi5x7135); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() local
536 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
541 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
D5x5s2p2-minmax-wasmsimd-arm-splat-3x4.c258 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4() local
320 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
327 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
497 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4() local
508 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
515 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
D5x5s2p2-minmax-wasmsimd-x86-splat-3x4.c258 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4() local
320 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
327 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
497 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4() local
508 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
515 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
D5x5s2p2-minmax-wasmsimd-arm-splat-3x4-acc2.c258 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2() local
320 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
327 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
500 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2() local
511 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
518 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
D5x5s2p2-minmax-sse-3x4-acc2.c306 const __m128 vi5x79BD = _mm_move_ss(vi5xF9BD, vi5x7135); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() local
348 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
353 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
529 const __m128 vi5x79BD = _mm_move_ss(vi5xF9BD, vi5x7135); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() local
539 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi5x79BD, vk11)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
544 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
D5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4-acc3.c234 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3() local
286 vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3()
422 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3() local
435 vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3()
D5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c234 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc2() local
286 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc2()
420 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc2() local
433 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc2()
D5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4-acc3.c234 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3() local
286 vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3()
422 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3() local
435 vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3()
D5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c234 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc2() local
286 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc2()
420 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc2() local
433 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc2()
D5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4.c234 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4() local
286 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4()
418 const v128_t vi5x79BD = wasm_v32x4_shuffle(vi5x1357, vi5x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4() local
431 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4()

12