Home
last modified time | relevance | path

Searched refs:vo1c01 (Results 1 – 19 of 19) sorted by relevance

/external/XNNPACK/src/f32-conv-hwc/gen/
D3x3s2p0p1c3x4-neon-2x1.c267 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local
269 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
270 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
275 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
403 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local
405 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
406 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
411 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
D3x3s2p0p1c3x4-neonfma-2x1.c267 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local
269 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
270 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
275 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
401 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local
403 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
404 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
409 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
D3x3s2p1c3x4-neonfma-2x1.c267 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local
269 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
270 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
275 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
401 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local
403 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
404 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
409 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
D3x3s2p1c3x4-neon-2x1.c267 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local
269 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
270 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
275 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
403 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local
405 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
406 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
411 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
D3x3s2p0p1c3x8-neonfma-2x1.c362 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local
364 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
365 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
370 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
564 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local
566 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
567 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
572 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
D3x3s2p1c3x8-neon-2x1.c362 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local
364 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
365 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
370 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
566 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local
568 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
569 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
574 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
D3x3s2p0p1c3x8-neon-2x1.c362 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local
364 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
365 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
370 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
566 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local
568 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
569 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
574 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
D3x3s2p1c3x8-neonfma-2x1.c362 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local
364 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
365 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
370 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
564 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local
566 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
567 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
572 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
D3x3s2p1c3x4-neonfma-2x2.c570 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local
572 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
573 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
578 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
703 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local
705 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
706 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
711 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
D3x3s2p1c3x4-neon-2x2.c572 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local
574 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
575 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
580 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
707 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local
709 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
710 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
715 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
D3x3s2p0p1c3x4-neon-2x2.c572 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local
574 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
575 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
580 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
707 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local
709 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
710 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
715 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
D3x3s2p0p1c3x4-neonfma-2x2.c570 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local
572 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
573 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
578 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
703 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local
705 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
706 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
711 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
D3x3s2p1c3x8-neonfma-2x2.c827 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local
829 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
830 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
835 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
1028 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local
1030 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
1031 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
1036 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
D3x3s2p1c3x8-neon-2x2.c829 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local
831 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
832 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
837 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
1032 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local
1034 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
1035 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
1040 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
D3x3s2p0p1c3x8-neonfma-2x2.c827 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local
829 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
830 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
835 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
1028 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local
1030 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
1031 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
1036 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
D3x3s2p0p1c3x8-neon-2x2.c829 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local
831 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
832 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
837 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
1032 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local
1034 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
1035 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
1040 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-wasmsimd-2x2.c338 const v128_t vo1c01 = wasm_v32x4_shuffle(vo1x0, vo1x1, 0, 4 + 0, 1, 4 + 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
342 *((double*) o1c0) = wasm_f64x2_extract_lane(vo1c01, 0); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
343 …*((double*) o1c1) = wasm_f64x2_extract_lane(wasm_v32x4_shuffle(vo1c01, vo1c01, 2, 3, 2, 3), 0); o1… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
598 const v128_t vo1c01 = wasm_v32x4_shuffle(vo1x0, vo1x1, 0, 4 + 0, 1, 4 + 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
601 *((double*) o1c0) = wasm_f64x2_extract_lane(vo1c01, 0); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
602 …*((double*) o1c1) = wasm_f64x2_extract_lane(wasm_v32x4_shuffle(vo1c01, vo1c01, 2, 3, 2, 3), 0); o1… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
D3x3s2p1c3x4-neonfma-2x2.c337 const float32x4_t vo1c01 = vzip1q_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
341 vst1_f32(o1c0, vget_low_f32(vo1c01)); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
342 vst1_f32(o1c1, vget_high_f32(vo1c01)); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
598 const float32x4_t vo1c01 = vzip1q_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
601 vst1_f32(o1c0, vget_low_f32(vo1c01)); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
602 vst1_f32(o1c1, vget_high_f32(vo1c01)); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
D3x3s2p1c3x4-sse-2x2.c337 const __m128 vo1c01 = _mm_unpacklo_ps(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
341 _mm_storel_pi((__m64 *)o1c0, vo1c01); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
342 … _mm_storel_pi((__m64 *)o1c1, _mm_shuffle_ps(vo1c01, vo1c01, _MM_SHUFFLE(3, 2, 3, 2))); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
598 const __m128 vo1c01 = _mm_unpacklo_ps(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
601 _mm_storel_pi((__m64 *)o1c0, vo1c01); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
602 … _mm_storel_pi((__m64 *)o1c1, _mm_shuffle_ps(vo1c01, vo1c01, _MM_SHUFFLE(3, 2, 3, 2))); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()