/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 144 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 146 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 147 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 149 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 150 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 419 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 421 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 422 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 610 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 612 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 142 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 144 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 145 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 147 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 148 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 419 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 421 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 422 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 612 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 614 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 135 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 137 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 138 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 140 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 141 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 419 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 421 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 422 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 612 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 614 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 137 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 139 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 140 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 142 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 143 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 419 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 421 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 422 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 610 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 612 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x1.c | 114 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 116 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 117 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 308 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 310 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 311 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 116 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 118 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 119 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 308 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 310 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 311 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 116 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 118 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 119 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 308 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 310 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 311 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neon-2x1.c | 114 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 116 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 117 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 308 const float32x4_t vk10c1x0123 = vld1q_f32(w + 20); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 310 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 311 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 168 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 171 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 172 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 176 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 177 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 595 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 598 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 599 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 881 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 884 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 166 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 169 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 170 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 174 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 175 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_low_f32(vi3x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 595 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 598 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 599 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 883 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 886 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 161 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 164 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 165 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 169 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 170 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 595 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 598 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 599 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 881 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 884 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 159 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 162 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 163 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 167 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk10c1x0123, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 168 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk10c1x0123, vget_high_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 595 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 598 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 599 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 883 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 886 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 130 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 133 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 417 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 420 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 421 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x8-neon-2x1.c | 128 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 131 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 417 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 420 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 421 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 128 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 131 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 417 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 420 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk10c1x0123, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 421 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 130 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 133 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 417 const float32x4_t vk10c1x0123 = vld1q_f32(w + 40); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 420 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk10c1x0123, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 421 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-sse-1x1.c | 102 const __m128 vk10c1x0123 = _mm_load_ps(w + 20); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 103 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk10c1x0123, vi10c1)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() 229 const __m128 vk10c1x0123 = _mm_load_ps(w + 20); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 230 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk10c1x0123, vi10c1)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1()
|