/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p1c3x8-neonfma-2x2.c | 98 float32x4_t vo0x1c4567 = vo0x0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 119 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk00c0x4567, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 132 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk10c0x4567, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 145 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c0x4567, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 165 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk00c1x4567, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 178 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk10c1x4567, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 191 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 204 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk00c2x4567, vget_low_f32(vi0x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 217 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk10c2x4567, vget_low_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 230 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 96 float32x4_t vo0x1c4567 = vo0x0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 117 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk00c0x4567, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 130 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk10c0x4567, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 143 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c0x4567, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 163 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk00c1x4567, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 176 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk10c1x4567, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 189 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c1x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 202 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk00c2x4567, vget_low_f32(vi0x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 215 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk10c2x4567, vget_low_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 228 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c2x4567, vget_low_f32(vi2x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 98 float32x4_t vo0x1c4567 = vo0x0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 119 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk00c0x4567, vget_high_f32(vi0x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 132 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk10c0x4567, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 145 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c0x4567, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 158 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk00c1x4567, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 171 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk10c1x4567, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 184 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c1x4567, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 204 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk00c2x4567, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 217 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk10c2x4567, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 230 vo0x1c4567 = vfmaq_lane_f32(vo0x1c4567, vk20c2x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 96 float32x4_t vo0x1c4567 = vo0x0c4567; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 117 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk00c0x4567, vget_high_f32(vi0x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 130 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk10c0x4567, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 143 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c0x4567, vget_high_f32(vi2x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 156 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk00c1x4567, vget_high_f32(vi0x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 169 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk10c1x4567, vget_high_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 182 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c1x4567, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 202 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk00c2x4567, vget_low_f32(vi0x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 215 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk10c2x4567, vget_low_f32(vi1x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 228 vo0x1c4567 = vmlaq_lane_f32(vo0x1c4567, vk20c2x4567, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|