Home
last modified time | relevance | path

Searched refs:vk4x0123 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/f32-dwconv/gen/
Dup8x9-psimd.c100 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__psimd() local
102 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
183 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__psimd() local
184 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
239 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__psimd() local
240 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
Dup8x9-neon-acc2.c90 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local
92 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
156 const float32x4_t vk4x0123 = vld1q_f32(w + 36); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local
157 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
204 const float32x4_t vk4x0123 = vld1q_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local
205 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
Dup8x9-neonfma-acc2.c90 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local
92 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
156 const float32x4_t vk4x0123 = vld1q_f32(w + 36); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local
157 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
204 const float32x4_t vk4x0123 = vld1q_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local
205 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
Dup8x9-sse-acc2.c100 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local
102 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
186 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local
187 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
244 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local
245 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
Dup8x9-neon.c90 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neon() local
92 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
153 const float32x4_t vk4x0123 = vld1q_f32(w + 36); in xnn_f32_dwconv_ukernel_up8x9__neon() local
154 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
199 const float32x4_t vk4x0123 = vld1q_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__neon() local
200 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
Dup8x9-sse.c100 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__sse() local
102 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
183 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__sse() local
184 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
239 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x9__sse() local
240 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
Dup8x9-psimd-acc2.c100 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local
102 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
186 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local
187 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
244 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local
245 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
Dup8x9-neonfma.c90 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma() local
92 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
153 const float32x4_t vk4x0123 = vld1q_f32(w + 36); in xnn_f32_dwconv_ukernel_up8x9__neonfma() local
154 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
199 const float32x4_t vk4x0123 = vld1q_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x9__neonfma() local
200 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
Dup4x9-psimd-acc2.c86 const psimd_f32 vk4x0123 = psimd_load_f32(w + 20); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local
87 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
144 const psimd_f32 vk4x0123 = psimd_load_f32(w + 20); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local
145 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
Dup4x9-sse-acc2.c86 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() local
87 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
144 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() local
145 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
Dup4x9-neon.c76 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neon() local
77 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
122 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neon() local
123 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
Dup4x9-neonfma.c76 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neonfma() local
77 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
122 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neonfma() local
123 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
Dup4x9-sse.c86 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x9__sse() local
87 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
142 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x9__sse() local
143 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
Dup4x9-psimd.c86 const psimd_f32 vk4x0123 = psimd_load_f32(w + 20); in xnn_f32_dwconv_ukernel_up4x9__psimd() local
87 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
142 const psimd_f32 vk4x0123 = psimd_load_f32(w + 20); in xnn_f32_dwconv_ukernel_up4x9__psimd() local
143 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
Dup4x9-neonfma-acc2.c76 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() local
77 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2()
124 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2() local
125 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2()
Dup4x9-neon-acc2.c76 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() local
77 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2()
124 const float32x4_t vk4x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neon_acc2() local
125 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neon_acc2()
Dup4x25-psimd-acc2.c118 const psimd_f32 vk4x0123 = psimd_load_f32(w + 20); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() local
119 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
272 const psimd_f32 vk4x0123 = psimd_load_f32(w + 20); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() local
273 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
Dup4x25-psimd.c118 const psimd_f32 vk4x0123 = psimd_load_f32(w + 20); in xnn_f32_dwconv_ukernel_up4x25__psimd() local
119 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
270 const psimd_f32 vk4x0123 = psimd_load_f32(w + 20); in xnn_f32_dwconv_ukernel_up4x25__psimd() local
271 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
Dup4x25-sse.c118 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x25__sse() local
119 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
270 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x25__sse() local
271 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
Dup4x25-sse-acc2.c118 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() local
119 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
272 const __m128 vk4x0123 = _mm_load_ps(w + 20); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() local
273 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
Dup8x25-psimd.c132 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x25__psimd() local
134 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
359 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x25__psimd() local
360 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
511 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x25__psimd() local
512 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
Dup8x25-psimd-acc2.c132 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() local
134 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
362 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() local
363 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
516 const psimd_f32 vk4x0123 = psimd_load_f32(w + 40); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() local
517 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
Dup8x25-sse.c132 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__sse() local
134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
359 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__sse() local
360 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
511 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__sse() local
512 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
Dup8x25-sse-acc2.c132 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() local
134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
362 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() local
363 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
516 const __m128 vk4x0123 = _mm_load_ps(w + 40); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() local
517 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()