Home
last modified time | relevance | path

Searched refs:vk8x0123 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/f32-dwconv/gen/
Dup8x9-psimd.c136 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__psimd() local
138 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
207 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__psimd() local
208 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
255 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__psimd() local
256 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
Dup8x9-neon-acc2.c118 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local
120 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
172 const float32x4_t vk8x0123 = vld1q_f32(w + 68); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local
173 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
220 const float32x4_t vk8x0123 = vld1q_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local
221 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
Dup8x9-neonfma-acc2.c118 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local
120 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
172 const float32x4_t vk8x0123 = vld1q_f32(w + 68); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local
173 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
220 const float32x4_t vk8x0123 = vld1q_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local
221 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
Dup8x9-sse-acc2.c136 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local
138 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
210 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local
211 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
260 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local
261 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
Dup8x9-neon.c118 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neon() local
120 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
169 const float32x4_t vk8x0123 = vld1q_f32(w + 68); in xnn_f32_dwconv_ukernel_up8x9__neon() local
170 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
215 const float32x4_t vk8x0123 = vld1q_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__neon() local
216 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
Dup8x9-sse.c136 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__sse() local
138 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
207 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__sse() local
208 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
255 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x9__sse() local
256 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
Dup8x9-psimd-acc2.c136 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local
138 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
210 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local
211 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
260 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local
261 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
Dup8x9-neonfma.c118 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma() local
120 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
169 const float32x4_t vk8x0123 = vld1q_f32(w + 68); in xnn_f32_dwconv_ukernel_up8x9__neonfma() local
170 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
215 const float32x4_t vk8x0123 = vld1q_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x9__neonfma() local
216 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
Dup4x9-psimd-acc2.c110 const psimd_f32 vk8x0123 = psimd_load_f32(w + 36); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local
111 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
160 const psimd_f32 vk8x0123 = psimd_load_f32(w + 36); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local
161 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
Dup4x9-sse-acc2.c110 const __m128 vk8x0123 = _mm_load_ps(w + 36); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() local
111 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
160 const __m128 vk8x0123 = _mm_load_ps(w + 36); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() local
161 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
Dup4x9-neon.c92 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neon() local
93 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
138 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neon() local
139 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
Dup4x9-neonfma.c92 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neonfma() local
93 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
138 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neonfma() local
139 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
Dup4x9-sse.c110 const __m128 vk8x0123 = _mm_load_ps(w + 36); in xnn_f32_dwconv_ukernel_up4x9__sse() local
111 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
158 const __m128 vk8x0123 = _mm_load_ps(w + 36); in xnn_f32_dwconv_ukernel_up4x9__sse() local
159 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
Dup4x9-psimd.c110 const psimd_f32 vk8x0123 = psimd_load_f32(w + 36); in xnn_f32_dwconv_ukernel_up4x9__psimd() local
111 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
158 const psimd_f32 vk8x0123 = psimd_load_f32(w + 36); in xnn_f32_dwconv_ukernel_up4x9__psimd() local
159 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
Dup4x9-neonfma-acc2.c92 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() local
93 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2()
140 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() local
141 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2()
Dup4x9-neon-acc2.c92 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() local
93 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2()
140 const float32x4_t vk8x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() local
141 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2()
Dup4x25-psimd-acc2.c142 const psimd_f32 vk8x0123 = psimd_load_f32(w + 36); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() local
143 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
288 const psimd_f32 vk8x0123 = psimd_load_f32(w + 36); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() local
289 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
Dup4x25-psimd.c142 const psimd_f32 vk8x0123 = psimd_load_f32(w + 36); in xnn_f32_dwconv_ukernel_up4x25__psimd() local
143 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
286 const psimd_f32 vk8x0123 = psimd_load_f32(w + 36); in xnn_f32_dwconv_ukernel_up4x25__psimd() local
287 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
Dup4x25-sse.c142 const __m128 vk8x0123 = _mm_load_ps(w + 36); in xnn_f32_dwconv_ukernel_up4x25__sse() local
143 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
286 const __m128 vk8x0123 = _mm_load_ps(w + 36); in xnn_f32_dwconv_ukernel_up4x25__sse() local
287 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
Dup4x25-sse-acc2.c142 const __m128 vk8x0123 = _mm_load_ps(w + 36); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() local
143 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
288 const __m128 vk8x0123 = _mm_load_ps(w + 36); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() local
289 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
Dup8x25-psimd.c168 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x25__psimd() local
170 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
383 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x25__psimd() local
384 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
527 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x25__psimd() local
528 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
Dup8x25-psimd-acc2.c168 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() local
170 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
386 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() local
387 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
532 const psimd_f32 vk8x0123 = psimd_load_f32(w + 72); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() local
533 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
Dup8x25-sse.c168 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__sse() local
170 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
383 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__sse() local
384 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
527 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__sse() local
528 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
Dup8x25-sse-acc2.c168 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() local
170 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
386 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() local
387 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
532 const __m128 vk8x0123 = _mm_load_ps(w + 72); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() local
533 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()