Home
last modified time | relevance | path

Searched refs:vi4x2 (Results 1 – 5 of 5) sorted by relevance

/external/XNNPACK/src/f32-dwconv-spchw/
D5x5p2-scalar.c85 float vi4x2 = *i4; i4 = (const float*) ((uintptr_t) i4 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar() local
128 …const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2 + vw24 * vi4x3 + vw25 * vi4x4; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
130 vi4x1 = vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
131 vi4x2 = vi4x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
158 const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2 + vw24 * vi4x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
160 vi4x1 = vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
161 vi4x2 = vi4x3; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
177 const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar()
D5x5s2p2-scalar.c85 float vi4x2 = *i4; i4 = (const float*) ((uintptr_t) i4 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar() local
118 …const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2 + vw24 * vi4x3 + vw25 * vi4x4; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar()
119 vi4x0 = vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar()
121 vi4x2 = vi4x4; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar()
141 const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2 + vw24 * vi4x3; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar()
155 const float vrow4_accum = vw21 * vi4x0 + vw22 * vi4x1 + vw23 * vi4x2; in xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar()
/external/XNNPACK/src/f32-conv-hwc/
D3x3s2p1c3x8-neonfma-2x2.c148 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local
182 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk20c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
184 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk20c1x4567, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
221 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk20c2x0123, vi4x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
223 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk20c2x4567, vi4x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
260 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk21c0x0123, vi4x2, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
262 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk21c0x4567, vi4x2, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
299 vo1x1c0123 = vfmaq_laneq_f32(vo1x1c0123, vk21c1x0123, vi4x2, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
301 vo1x1c4567 = vfmaq_laneq_f32(vo1x1c4567, vk21c1x4567, vi4x2, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
418 vo1x0c0123 = vfmaq_laneq_f32(vo1x0c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
[all …]
D3x3s2p1c3x4-neonfma-2x2.c125 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local
144 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
165 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c2, vi4x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
186 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c0, vi4x2, 2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
207 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c1, vi4x2, 3); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
275 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c1, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
296 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c2, vi4x2, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
402 float32x4_t vi4x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local
409 vi4x2 = vld1q_f32(i4 + 4); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
429 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
[all …]
/external/XNNPACK/src/f32-conv-hwc2spchw/
D3x3s2p1c3x4-neonfma-2x2.c144 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
163 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2()
184 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c2, vi4x2, 1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2()
205 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c0, vi4x2, 2); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2()
226 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c1, vi4x2, 3); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2()
294 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c1, vi4x2, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2()
315 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c2, vi4x2, 1); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2()
398 float32x4_t vi4x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
405 vi4x2 = vld1q_f32(i4 + 4); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2()
425 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2()
[all …]