/external/XNNPACK/src/f32-dwconv-spchw/ |
D | 5x5p2-scalar.c | 85 float vi4x2 = *i4; i4 = (const float*) ((uintptr_t) i4 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() local 128 …const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2 + vw24 * vi4x3 + vw25 * vi4x4; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 130 vi4x1 = vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 131 vi4x2 = vi4x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 158 const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2 + vw24 * vi4x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 160 vi4x1 = vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 161 vi4x2 = vi4x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 177 const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
|
D | 5x5s2p2-scalar.c | 85 float vi4x2 = *i4; i4 = (const float*) ((uintptr_t) i4 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() local 118 …const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2 + vw24 * vi4x3 + vw25 * vi4x4; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 119 vi4x0 = vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 121 vi4x2 = vi4x4; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 141 const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2 + vw24 * vi4x3; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 155 const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar()
|
/external/XNNPACK/src/f32-conv-hwc/ |
D | 3x3s2p1c3x8-neonfma-2x2.c | 148 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 182 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk20c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 184 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk20c1x4567, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 221 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk20c2x0123, vi4x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 223 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk20c2x4567, vi4x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 260 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk21c0x0123, vi4x2, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 262 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk21c0x4567, vi4x2, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 299 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk21c1x0123, vi4x2, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 301 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk21c1x4567, vi4x2, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 418 vo1x0c0123 = vfmaq_laneq_f32(vo1x0c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 125 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 144 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 165 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c2, vi4x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 186 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c0, vi4x2, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 207 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c1, vi4x2, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 275 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c1, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 296 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c2, vi4x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 402 float32x4_t vi4x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 409 vi4x2 = vld1q_f32(i4 + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 429 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
/external/XNNPACK/src/f32-conv-hwc2spchw/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 144 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 163 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 184 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c2, vi4x2, 1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 205 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c0, vi4x2, 2); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 226 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c1, vi4x2, 3); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 294 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c1, vi4x2, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 315 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c2, vi4x2, 1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 398 float32x4_t vi4x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 405 vi4x2 = vld1q_f32(i4 + 4); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 425 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|