/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 113 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 115 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 116 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 118 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 119 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 404 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 406 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 407 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 595 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 597 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 111 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 113 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 114 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 116 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 117 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 404 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 406 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 407 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 597 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 599 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 111 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 113 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 114 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 116 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 117 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 404 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 406 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 407 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 597 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 599 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 113 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 115 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 116 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 118 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 119 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 404 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 406 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 407 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 595 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 597 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x1.c | 99 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 101 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 102 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 293 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 295 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 296 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 101 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 103 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 104 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 293 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 295 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 296 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 101 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 103 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 104 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 293 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 295 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 296 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neon-2x1.c | 99 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 101 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 102 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 293 const float32x4_t vk10c0x0123 = vld1q_f32(w + 8); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 295 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 296 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 122 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 125 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 126 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 130 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 131 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 571 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 574 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 575 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 857 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 860 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 120 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 123 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 124 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 128 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 129 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 571 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 574 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 575 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 859 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 862 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 122 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 125 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 126 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 130 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 131 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 571 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 574 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 575 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 857 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 860 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 120 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 123 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 124 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 128 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c0x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 129 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c0x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 571 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 574 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 575 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 859 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 862 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 106 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 109 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 110 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 393 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 396 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 397 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x8-neon-2x1.c | 104 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 107 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 108 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 393 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 396 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 397 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 104 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 107 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 108 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 393 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 396 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 397 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 106 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 109 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 110 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 393 const float32x4_t vk10c0x0123 = vld1q_f32(w + 16); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 396 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c0x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 397 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-sse-1x1.c | 93 const __m128 vk10c0x0123 = _mm_load_ps(w + 8); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 94 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk10c0x0123, vi10c0)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() 220 const __m128 vk10c0x0123 = _mm_load_ps(w + 8); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 221 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk10c0x0123, vi10c0)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1()
|