/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x4-neon-2x1.c | 266 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 271 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 272 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 276 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 402 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 407 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 408 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 412 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 266 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 271 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 272 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 276 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 400 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 405 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 406 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 410 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 266 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 271 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 272 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 276 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 400 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 405 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 406 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 410 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neon-2x1.c | 266 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 271 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 272 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 276 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 402 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 407 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 408 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 412 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 361 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 366 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 367 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 371 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 563 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 568 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 569 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 573 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x8-neon-2x1.c | 361 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 366 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 367 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 371 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 565 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 570 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 571 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 575 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 361 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 366 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 367 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 371 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 565 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 570 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 571 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 575 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 361 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 366 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 367 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 371 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 563 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 568 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 569 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 573 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 569 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 574 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 575 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 579 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 702 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 707 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 708 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 712 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x4-neon-2x2.c | 571 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 576 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 577 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 581 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 706 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 711 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 712 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 716 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 571 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 576 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 577 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 581 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 706 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 711 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 712 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 716 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
|
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 569 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 574 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 575 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 579 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 702 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 707 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 708 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 712 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 826 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 831 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 832 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 836 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 1027 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 1032 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 1033 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 1037 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
|
D | 3x3s2p1c3x8-neon-2x2.c | 828 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 833 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 834 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 838 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 1031 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 1036 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 1037 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 1041 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 826 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 831 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 832 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 836 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 1027 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 1032 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 1033 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 1037 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 828 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 833 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 834 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 838 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 1031 float32x2_t vo0c01 = vget_low_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 1036 vst1_f32(o0_tmp, vo0c01); o0_tmp += 2; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 1037 vo0c01 = vget_high_f32(vo0c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 1041 vst1_lane_f32(o0_tmp, vo0c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-wasmsimd-2x2.c | 336 const v128_t vo0c01 = wasm_v32x4_shuffle(vo0x0, vo0x1, 0, 4 + 0, 1, 4 + 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 346 *((double*) o0c0) = wasm_f64x2_extract_lane(vo0c01, 0); o0c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 347 …*((double*) o0c1) = wasm_f64x2_extract_lane(wasm_v32x4_shuffle(vo0c01, vo0c01, 2, 3, 2, 3), 0); o0… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 596 const v128_t vo0c01 = wasm_v32x4_shuffle(vo0x0, vo0x1, 0, 4 + 0, 1, 4 + 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 606 *((double*) o0c0) = wasm_f64x2_extract_lane(vo0c01, 0); o0c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 607 …*((double*) o0c1) = wasm_f64x2_extract_lane(wasm_v32x4_shuffle(vo0c01, vo0c01, 2, 3, 2, 3), 0); o0… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 335 const float32x4_t vo0c01 = vzip1q_f32(vo0x0, vo0x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 346 vst1_f32(o0c0, vget_low_f32(vo0c01)); o0c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 347 vst1_f32(o0c1, vget_high_f32(vo0c01)); o0c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 596 const float32x4_t vo0c01 = vzip1q_f32(vo0x0, vo0x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 606 vst1_f32(o0c0, vget_low_f32(vo0c01)); o0c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 607 vst1_f32(o0c1, vget_high_f32(vo0c01)); o0c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x4-sse-2x2.c | 335 const __m128 vo0c01 = _mm_unpacklo_ps(vo0x0, vo0x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 346 _mm_storel_pi((__m64 *)o0c0, vo0c01); o0c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 347 … _mm_storel_pi((__m64 *)o0c1, _mm_shuffle_ps(vo0c01, vo0c01, _MM_SHUFFLE(3, 2, 3, 2))); o0c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 596 const __m128 vo0c01 = _mm_unpacklo_ps(vo0x0, vo0x1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 606 _mm_storel_pi((__m64 *)o0c0, vo0c01); o0c0 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 607 … _mm_storel_pi((__m64 *)o0c1, _mm_shuffle_ps(vo0c01, vo0c01, _MM_SHUFFLE(3, 2, 3, 2))); o0c1 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
|