/external/XNNPACK/src/f32-dwconv-spchw/ |
D | 5x5p2-scalar.c | 84 float vi3x2 = *i3; i3 = (const float*) ((uintptr_t) i3 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() local 123 …const float vrow3_accum = vw16 * vi3x0 + vw17 * vi3x1 + vw18 * vi3x2 + vw19 * vi3x3 + vw20 * vi3x4; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 125 vi3x1 = vi3x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 126 vi3x2 = vi3x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 154 const float vrow3_accum = vw16 * vi3x0 + vw17 * vi3x1 + vw18 * vi3x2 + vw19 * vi3x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 156 vi3x1 = vi3x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 157 vi3x2 = vi3x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() 176 const float vrow3_accum = vw16 * vi3x0 + vw17 * vi3x1 + vw18 * vi3x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
|
D | 5x5s2p2-scalar.c | 84 float vi3x2 = *i3; i3 = (const float*) ((uintptr_t) i3 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() local 114 …const float vrow3_accum = vw16 * vi3x0 + vw17 * vi3x1 + vw18 * vi3x2 + vw19 * vi3x3 + vw20 * vi3x4; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 115 vi3x0 = vi3x2; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 117 vi3x2 = vi3x4; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 140 const float vrow3_accum = vw16 * vi3x0 + vw17 * vi3x1 + vw18 * vi3x2 + vw19 * vi3x3; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() 154 const float vrow3_accum = vw16 * vi3x0 + vw17 * vi3x1 + vw18 * vi3x2; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar()
|
/external/XNNPACK/src/f32-conv-hwc/ |
D | 3x3s2p1c3x8-neonfma-2x2.c | 147 const float32x4_t vi3x2 = vld1q_f32(i3); i3 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 169 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk10c1x0123, vi3x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 171 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk10c1x4567, vi3x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 208 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk10c2x0123, vi3x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 210 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk10c2x4567, vi3x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 247 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk11c0x0123, vi3x2, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 249 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk11c0x4567, vi3x2, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 286 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk11c1x0123, vi3x2, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 288 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk11c1x4567, vi3x2, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 405 vo1x0c0123 = vfmaq_laneq_f32(vo1x0c0123, vk12c1x0123, vi3x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 124 const float32x4_t vi3x2 = vld1q_f32(i3); i3 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 137 vo1x1 = vfmaq_laneq_f32(vo1x1, vk10c1, vi3x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 158 vo1x1 = vfmaq_laneq_f32(vo1x1, vk10c2, vi3x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 179 vo1x1 = vfmaq_laneq_f32(vo1x1, vk11c0, vi3x2, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 200 vo1x1 = vfmaq_laneq_f32(vo1x1, vk11c1, vi3x2, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 268 vo1x0 = vfmaq_laneq_f32(vo1x0, vk12c1, vi3x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 289 vo1x0 = vfmaq_laneq_f32(vo1x0, vk12c2, vi3x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 401 float32x4_t vi3x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 408 vi3x2 = vld1q_f32(i3 + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 422 vo1x1 = vfmaq_laneq_f32(vo1x1, vk10c1, vi3x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
/external/XNNPACK/src/f32-conv-hwc2spchw/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 143 const float32x4_t vi3x2 = vld1q_f32(i3); i3 += 4; in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 156 vo1x1 = vfmaq_laneq_f32(vo1x1, vk10c1, vi3x2, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 177 vo1x1 = vfmaq_laneq_f32(vo1x1, vk10c2, vi3x2, 1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 198 vo1x1 = vfmaq_laneq_f32(vo1x1, vk11c0, vi3x2, 2); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 219 vo1x1 = vfmaq_laneq_f32(vo1x1, vk11c1, vi3x2, 3); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 287 vo1x0 = vfmaq_laneq_f32(vo1x0, vk12c1, vi3x2, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 308 vo1x0 = vfmaq_laneq_f32(vo1x0, vk12c2, vi3x2, 1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 397 float32x4_t vi3x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 404 vi3x2 = vld1q_f32(i3 + 4); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() 418 vo1x1 = vfmaq_laneq_f32(vo1x1, vk10c1, vi3x2, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|