/external/XNNPACK/src/f32-dwconv-spchw/ |
D | 5x5p2-neonfma.c | 69 float32x4_t vi5x4567 = vld1q_f32(i5); i5 = (const float*) ((uintptr_t) i5 + input_tuple_stride); in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() local 100 vo4567p20 = vfmaq_laneq_f32(vo4567p20, vi5x4567, vwGHIJ, 2); in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() 103 vo4567p10 = vfmaq_laneq_f32(vo4567p10, vi5x4567, vwKLMN, 3); in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() 112 const float32x4_t vi5x3456 = vextq_f32(vi5x0123, vi5x4567, 3); in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() 140 const float32x4_t vi5x2345 = vextq_f32(vi5x0123, vi5x4567, 2); in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() 168 vi5x0123 = vi5x4567; in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() 176 const float32x4_t vi5x5678 = vextq_f32(vi5x4567, vi5x89AB, 1); in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() 204 const float32x4_t vi5x6789 = vextq_f32(vi5x4567, vi5x89AB, 2); in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() 232 vi5x4567 = vi5x89AB; in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() 287 vo4567p20 = vfmaq_laneq_f32(vo4567p20, vi5x4567, vwGHIJ, 2); in xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma() [all …]
|
/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x9-psimd.c | 106 const psimd_f32 vi5x4567 = psimd_load_f32(i5 + 4); in xnn_f32_dwconv_ukernel_up8x9__psimd() local 112 vacc4567p0 = psimd_qfma_f32(vacc4567p0, vi5x4567, vk5x4567); in xnn_f32_dwconv_ukernel_up8x9__psimd()
|
D | up8x9-neon-acc2.c | 96 const float32x4_t vi5x4567 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local 100 vacc4567p1 = vmlaq_f32(vacc4567p1, vi5x4567, vk5x4567); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
|
D | up8x9-neonfma-acc2.c | 96 const float32x4_t vi5x4567 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local 100 vacc4567p1 = vfmaq_f32(vacc4567p1, vi5x4567, vk5x4567); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
|
D | up8x9-sse-acc2.c | 106 const __m128 vi5x4567 = _mm_loadu_ps(i5 + 4); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local 112 vacc4567p1 = _mm_add_ps(vacc4567p1, _mm_mul_ps(vi5x4567, vk5x4567)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
|
D | up8x9-neon.c | 96 const float32x4_t vi5x4567 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_ukernel_up8x9__neon() local 100 vacc4567p0 = vmlaq_f32(vacc4567p0, vi5x4567, vk5x4567); in xnn_f32_dwconv_ukernel_up8x9__neon()
|
D | up8x9-sse.c | 106 const __m128 vi5x4567 = _mm_loadu_ps(i5 + 4); in xnn_f32_dwconv_ukernel_up8x9__sse() local 112 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi5x4567, vk5x4567)); in xnn_f32_dwconv_ukernel_up8x9__sse()
|
D | up8x9-psimd-acc2.c | 106 const psimd_f32 vi5x4567 = psimd_load_f32(i5 + 4); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local 112 vacc4567p1 = psimd_qfma_f32(vacc4567p1, vi5x4567, vk5x4567); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
|
D | up8x9-neonfma.c | 96 const float32x4_t vi5x4567 = vld1q_f32(i5); i5 += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma() local 100 vacc4567p0 = vfmaq_f32(vacc4567p0, vi5x4567, vk5x4567); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
|
D | up8x25-psimd.c | 138 const psimd_f32 vi5x4567 = psimd_load_f32(i5 + 4); in xnn_f32_dwconv_ukernel_up8x25__psimd() local 144 vacc4567p0 = psimd_qfma_f32(vacc4567p0, vi5x4567, vk5x4567); in xnn_f32_dwconv_ukernel_up8x25__psimd()
|
D | up8x25-psimd-acc2.c | 138 const psimd_f32 vi5x4567 = psimd_load_f32(i5 + 4); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() local 144 vacc4567p1 = psimd_qfma_f32(vacc4567p1, vi5x4567, vk5x4567); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
|
D | up8x25-sse.c | 138 const __m128 vi5x4567 = _mm_loadu_ps(i5 + 4); in xnn_f32_dwconv_ukernel_up8x25__sse() local 144 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi5x4567, vk5x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse()
|
D | up8x25-sse-acc2.c | 138 const __m128 vi5x4567 = _mm_loadu_ps(i5 + 4); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() local 144 vacc4567p1 = _mm_add_ps(vacc4567p1, _mm_mul_ps(vi5x4567, vk5x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
|