/external/XNNPACK/src/f32-prelu/gen/ |
D | sse41-2x8.c | 54 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x8() local 61 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8() 66 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 92 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x8() local 97 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8() 100 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 118 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x8() local 123 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8() 126 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
|
D | psimd-2x8.c | 54 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_prelu_ukernel__psimd_2x8() local 61 psimd_f32 vacc0x0123 = psimd_mul_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__psimd_2x8() 66 vacc0x0123 = psimd_signblend_f32(vi0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__psimd_2x8() 92 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_prelu_ukernel__psimd_2x8() local 97 psimd_f32 vacc0x0123 = psimd_mul_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__psimd_2x8() 100 vacc0x0123 = psimd_signblend_f32(vi0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__psimd_2x8() 118 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_prelu_ukernel__psimd_2x8() local 123 psimd_f32 vacc0x0123 = psimd_mul_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__psimd_2x8() 126 vacc0x0123 = psimd_signblend_f32(vi0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__psimd_2x8()
|
D | neon-2x8.c | 53 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_2x8() local 58 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8() 59 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8() 67 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x8() 90 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x8() local 95 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8() 96 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8() 100 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x8() 115 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x8() local 120 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8() [all …]
|
D | sse2-2x8.c | 54 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x8() local 61 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8() 62 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 70 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x8() 96 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x8() local 101 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8() 102 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 106 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x8() 124 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x8() local 129 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8() [all …]
|
D | sse41-2x4.c | 53 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x4() local 58 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4() 61 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x4() 79 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x4() local 84 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4() 87 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x4()
|
D | psimd-2x4.c | 53 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_prelu_ukernel__psimd_2x4() local 58 psimd_f32 vacc0x0123 = psimd_mul_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__psimd_2x4() 61 vacc0x0123 = psimd_signblend_f32(vi0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__psimd_2x4() 79 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_prelu_ukernel__psimd_2x4() local 84 psimd_f32 vacc0x0123 = psimd_mul_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__psimd_2x4() 87 vacc0x0123 = psimd_signblend_f32(vi0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__psimd_2x4()
|
D | neon-2x4.c | 52 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_2x4() local 55 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4() 56 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4() 60 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x4() 75 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x4() local 80 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4() 81 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4() 85 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x4()
|
D | sse2-2x4.c | 53 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x4() local 58 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4() 59 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x4() 63 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x4() 81 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x4() local 86 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4() 87 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x4() 91 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x4()
|
/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x4-psimd.c | 50 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x4__psimd() local 56 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd() 101 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x4__psimd() local 105 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd() 137 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x4__psimd() local 139 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd()
|
D | up8x4-psimd-acc2.c | 50 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() local 56 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() 104 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() local 108 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() 142 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2() local 144 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd_acc2()
|
D | up8x4-sse.c | 50 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x4__sse() local 56 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse() 101 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x4__sse() local 105 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse() 137 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x4__sse() local 139 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse()
|
D | up8x4-sse-acc2.c | 50 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() local 56 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 104 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() local 108 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() 142 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2() local 144 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse_acc2()
|
D | up4x4-psimd.c | 49 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up4x4__psimd() local 53 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd() 85 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up4x4__psimd() local 87 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd()
|
D | up4x4-psimd-acc2.c | 49 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2() local 53 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2() 87 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2() local 89 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd_acc2()
|
D | up4x4-sse.c | 49 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up4x4__sse() local 53 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x4__sse() 85 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up4x4__sse() local 87 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x4__sse()
|
D | up4x4-sse-acc2.c | 49 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() local 53 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() 87 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2() local 89 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x4__sse_acc2()
|
D | up8x9-psimd.c | 60 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__psimd() local 66 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd() 156 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__psimd() local 160 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd() 222 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__psimd() local 224 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
|
D | up8x9-neon-acc2.c | 60 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local 64 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() 139 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local 141 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() 187 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local 189 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
|
D | up8x9-neonfma-acc2.c | 60 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local 64 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() 139 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local 141 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() 187 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local 189 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
|
D | up8x9-sse-acc2.c | 60 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local 66 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 159 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local 163 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() 227 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local 229 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
|
D | up8x9-neon.c | 60 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_ukernel_up8x9__neon() local 64 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neon() 136 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_ukernel_up8x9__neon() local 138 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neon() 182 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__neon() local 184 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
|
D | up8x9-sse.c | 60 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x9__sse() local 66 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse() 156 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x9__sse() local 160 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse() 222 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_dwconv_ukernel_up8x9__sse() local 224 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
|
D | up8x9-psimd-acc2.c | 60 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local 66 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() 159 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local 163 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() 227 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local 229 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
|
D | up8x9-neonfma.c | 60 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma() local 64 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma() 136 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma() local 138 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma() 182 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_dwconv_ukernel_up8x9__neonfma() local 184 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
|
D | up4x9-psimd-acc2.c | 59 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local 63 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() 127 const psimd_f32 vi0x0123 = psimd_load_f32(i0); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local 129 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
|