/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x4-neon-2x1.c | 267 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 269 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 270 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 275 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 403 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 405 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 406 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 411 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 267 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 269 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 270 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 275 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 401 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 403 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 404 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 409 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 267 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 269 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 270 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 275 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 401 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 403 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 404 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 409 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neon-2x1.c | 267 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 269 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 270 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 275 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 403 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 405 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 406 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 411 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 362 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 364 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 365 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 370 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 564 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 566 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 567 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 572 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x8-neon-2x1.c | 362 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 364 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 365 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 370 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 566 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 568 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 569 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 574 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 362 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 364 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 365 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 370 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 566 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 568 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 569 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 574 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 362 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 364 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 365 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 370 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 564 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 566 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 567 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 572 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 570 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 572 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 573 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 578 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 703 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 705 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 706 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 711 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x4-neon-2x2.c | 572 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 574 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 575 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 580 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 707 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 709 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 710 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 715 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 572 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 574 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 575 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 580 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 707 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 709 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 710 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 715 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
|
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 570 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 572 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 573 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 578 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 703 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 705 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 706 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 711 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 827 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 829 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 830 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 835 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 1028 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 1030 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 1031 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 1036 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
|
D | 3x3s2p1c3x8-neon-2x2.c | 829 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 831 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 832 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 837 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 1032 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 1034 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 1035 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 1040 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 827 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 829 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 830 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 835 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 1028 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 1030 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 1031 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 1036 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 829 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 831 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 832 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 837 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 1032 float32x2_t vo1c01 = vget_low_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 1034 vst1_f32(o1_tmp, vo1c01); o1_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 1035 vo1c01 = vget_high_f32(vo1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 1040 vst1_lane_f32(o1_tmp, vo1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-wasmsimd-2x2.c | 338 const v128_t vo1c01 = wasm_v32x4_shuffle(vo1x0, vo1x1, 0, 4 + 0, 1, 4 + 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 342 *((double*) o1c0) = wasm_f64x2_extract_lane(vo1c01, 0); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 343 …*((double*) o1c1) = wasm_f64x2_extract_lane(wasm_v32x4_shuffle(vo1c01, vo1c01, 2, 3, 2, 3), 0); o1… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 598 const v128_t vo1c01 = wasm_v32x4_shuffle(vo1x0, vo1x1, 0, 4 + 0, 1, 4 + 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 601 *((double*) o1c0) = wasm_f64x2_extract_lane(vo1c01, 0); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 602 …*((double*) o1c1) = wasm_f64x2_extract_lane(wasm_v32x4_shuffle(vo1c01, vo1c01, 2, 3, 2, 3), 0); o1… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 337 const float32x4_t vo1c01 = vzip1q_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 341 vst1_f32(o1c0, vget_low_f32(vo1c01)); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 342 vst1_f32(o1c1, vget_high_f32(vo1c01)); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 598 const float32x4_t vo1c01 = vzip1q_f32(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 601 vst1_f32(o1c0, vget_low_f32(vo1c01)); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 602 vst1_f32(o1c1, vget_high_f32(vo1c01)); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x4-sse-2x2.c | 337 const __m128 vo1c01 = _mm_unpacklo_ps(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 341 _mm_storel_pi((__m64 *)o1c0, vo1c01); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 342 … _mm_storel_pi((__m64 *)o1c1, _mm_shuffle_ps(vo1c01, vo1c01, _MM_SHUFFLE(3, 2, 3, 2))); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 598 const __m128 vo1c01 = _mm_unpacklo_ps(vo1x0, vo1x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 601 _mm_storel_pi((__m64 *)o1c0, vo1c01); o1c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 602 … _mm_storel_pi((__m64 *)o1c1, _mm_shuffle_ps(vo1c01, vo1c01, _MM_SHUFFLE(3, 2, 3, 2))); o1c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
|